mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-20 22:39:03 +00:00
Synchronise with Khronos-private Gitlab branch
The maintenance of the conformance tests is moving to Github. This commit contains all the changes that have been done in Gitlab since the first public release of the conformance tests. Signed-off-by: Kevin Petit <kevin.petit@arm.com>
This commit is contained in:
@@ -1,22 +1,22 @@
|
||||
project
|
||||
: requirements
|
||||
<toolset>gcc:<cflags>-xc++
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
exe test_profiling
|
||||
: copy.c
|
||||
execute.c
|
||||
execute_multipass.c
|
||||
main.c
|
||||
readArray.c
|
||||
readImage.c
|
||||
writeArray.c
|
||||
writeImage.c
|
||||
;
|
||||
|
||||
install dist
|
||||
: test_profiling
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/profiling
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/profiling
|
||||
;
|
||||
project
|
||||
: requirements
|
||||
<toolset>gcc:<cflags>-xc++
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
exe test_profiling
|
||||
: copy.c
|
||||
execute.c
|
||||
execute_multipass.c
|
||||
main.c
|
||||
readArray.c
|
||||
readImage.c
|
||||
writeArray.c
|
||||
writeImage.c
|
||||
;
|
||||
|
||||
install dist
|
||||
: test_profiling
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/profiling
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/profiling
|
||||
;
|
||||
|
||||
@@ -1,45 +1,45 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.c readArray.c writeArray.c readImage.c writeImage.c copy.c execute.c execute_multipass.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/typeWrappers.cpp \
|
||||
../../test_common/harness/imageHelpers.cpp \
|
||||
../../test_common/harness/mt19937.c \
|
||||
../../test_common/harness/conversions.c \
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
|
||||
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
|
||||
FRAMEWORK = ${SOURCES}
|
||||
HEADERS =
|
||||
TARGET = test_profiling
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
|
||||
#COMPILERFLAGS = -c -Wall -g -DUSE_LOCAL_THREADS
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF}
|
||||
CXXFLAGS= $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF}
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.c readArray.c writeArray.c readImage.c writeImage.c copy.c execute.c execute_multipass.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/typeWrappers.cpp \
|
||||
../../test_common/harness/imageHelpers.cpp \
|
||||
../../test_common/harness/mt19937.c \
|
||||
../../test_common/harness/conversions.c \
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
|
||||
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
|
||||
FRAMEWORK = ${SOURCES}
|
||||
HEADERS =
|
||||
TARGET = test_profiling
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
|
||||
#COMPILERFLAGS = -c -Wall -g -DUSE_LOCAL_THREADS
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF}
|
||||
CXXFLAGS= $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF}
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,445 +1,445 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <time.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
#ifndef uchar
|
||||
typedef unsigned char uchar;
|
||||
#endif
|
||||
|
||||
#undef MIN
|
||||
#define MIN(x,y) ( (x) < (y) ? (x) : (y) )
|
||||
|
||||
#undef MAX
|
||||
#define MAX(x,y) ( (x) > (y) ? (x) : (y) )
|
||||
|
||||
//#define CREATE_OUTPUT 1
|
||||
|
||||
extern int writePPM( const char *filename, uchar *buf, int xsize, int ysize );
|
||||
|
||||
|
||||
|
||||
//--- the code for kernel executables
|
||||
static const char *image_filter_src =
|
||||
"constant sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
|
||||
"\n"
|
||||
"__kernel void image_filter( int n, int m, __global float *filter_weights,\n"
|
||||
" read_only image2d_t src_image, write_only image2d_t dst_image )\n"
|
||||
"{\n"
|
||||
" int i, j;\n"
|
||||
" int indx = 0;\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" float4 filter_result = (float4)( 0.f, 0.f, 0.f, 0.f );\n"
|
||||
"\n"
|
||||
" for (i=-m/2; i<(m+1)/2; i++){\n"
|
||||
" for (j=-n/2; j<(n+1)/2; j++){\n"
|
||||
" float w = filter_weights[indx++];\n"
|
||||
"\n"
|
||||
" if (w != 0.0f){\n"
|
||||
" filter_result += w * read_imagef(src_image, sampler,\n"
|
||||
" (int2)(tid_x + j, tid_y + i));\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" write_imagef(dst_image, (int2)(tid_x, tid_y), filter_result);\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
//--- equivalent non-kernel code
|
||||
static void read_imagef( int x, int y, int w, int h, int nChannels, uchar *src, float *srcRgb )
|
||||
{
|
||||
// clamp the coords
|
||||
int x0 = MIN( MAX( x, 0 ), w - 1 );
|
||||
int y0 = MIN( MAX( y, 0 ), h - 1 );
|
||||
|
||||
// get tine index
|
||||
int indx = ( y0 * w + x0 ) * nChannels;
|
||||
|
||||
// seed the return array
|
||||
int i;
|
||||
for( i = 0; i < nChannels; i++ ){
|
||||
srcRgb[i] = (float)src[indx+i];
|
||||
}
|
||||
} // end read_imagef()
|
||||
|
||||
|
||||
static void write_imagef( uchar *dst, int x, int y, int w, int h, int nChannels, float *dstRgb )
|
||||
{
|
||||
// get tine index
|
||||
int indx = ( y * w + x ) * nChannels;
|
||||
|
||||
// seed the return array
|
||||
int i;
|
||||
for( i = 0; i < nChannels; i++ ){
|
||||
dst[indx+i] = (uchar)dstRgb[i];
|
||||
}
|
||||
} // end write_imagef()
|
||||
|
||||
|
||||
static void basicFilterPixel( int x, int y, int n, int m, int xsize, int ysize, int nChannels, const float *filter_weights, uchar *src, uchar *dst )
|
||||
{
|
||||
int i, j, k;
|
||||
int indx = 0;
|
||||
float filter_result[] = { 0.f, 0.f, 0.f, 0.f };
|
||||
float srcRgb[4];
|
||||
|
||||
for( i = -m/2; i < (m+1)/2; i++ ){
|
||||
for( j = -n/2; j < (n+1)/2; j++ ){
|
||||
float w = filter_weights[indx++];
|
||||
|
||||
if( w != 0 ){
|
||||
read_imagef( x + j, y + i, xsize, ysize, nChannels, src, srcRgb );
|
||||
for( k = 0; k < nChannels; k++ ){
|
||||
filter_result[k] += w * srcRgb[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
write_imagef( dst, x, y, xsize, ysize, nChannels, filter_result );
|
||||
|
||||
} // end basicFilterPixel()
|
||||
|
||||
|
||||
//--- helper functions
|
||||
static uchar *createImage( int elements, MTdata d)
|
||||
{
|
||||
int i;
|
||||
uchar *ptr = (uchar *)malloc( elements * sizeof( cl_uchar ) );
|
||||
if( ! ptr )
|
||||
return NULL;
|
||||
|
||||
for( i = 0; i < elements; i++ ){
|
||||
ptr[i] = (uchar)genrand_int32(d);
|
||||
}
|
||||
|
||||
return ptr;
|
||||
|
||||
} // end createImage()
|
||||
|
||||
|
||||
static int verifyImages( uchar *ptr0, uchar *ptr1, uchar tolerance, int xsize, int ysize, int nChannels )
|
||||
{
|
||||
int x, y, z;
|
||||
uchar *p0 = ptr0;
|
||||
uchar *p1 = ptr1;
|
||||
|
||||
for( y = 0; y < ysize; y++ ){
|
||||
for( x = 0; x < xsize; x++ ){
|
||||
for( z = 0; z < nChannels; z++ ){
|
||||
if( (uchar)abs( (int)( *p0++ - *p1++ ) ) > tolerance ){
|
||||
log_error( " images differ at x,y = %d,%d, channel = %d, %d to %d\n", x, y, z,
|
||||
(int)p0[-1], (int)p1[-1] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
} // end verifyImages()
|
||||
|
||||
|
||||
static int kernelFilter( cl_device_id device, cl_context context, cl_command_queue queue, int w, int h, int nChannels,
|
||||
uchar *inptr, uchar *outptr )
|
||||
{
|
||||
cl_program program[1];
|
||||
cl_kernel kernel[1];
|
||||
cl_mem memobjs[3];
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
|
||||
cl_event executeEvent;
|
||||
cl_ulong queueStart, submitStart, writeStart, writeEnd;
|
||||
size_t threads[2];
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
size_t localThreads[2];
|
||||
#endif
|
||||
float filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
|
||||
int filter_w = 3, filter_h = 3;
|
||||
int err = 0;
|
||||
|
||||
// set thread dimensions
|
||||
threads[0] = w;
|
||||
threads[1] = h;
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
|
||||
test_error( err, "Unable to get thread group max size" );
|
||||
localThreads[1] = localThreads[0];
|
||||
if( localThreads[0] > threads[0] )
|
||||
localThreads[0] = threads[0];
|
||||
if( localThreads[1] > threads[1] )
|
||||
localThreads[1] = threads[1];
|
||||
#endif
|
||||
|
||||
// allocate the input and output image memory objects
|
||||
memobjs[0] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR),
|
||||
&image_format_desc, w, h, 0, inptr, &err );
|
||||
if( memobjs[0] == (cl_mem)0 ){
|
||||
log_error( " unable to create 2D image using create_image_2d\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
memobjs[1] = create_image_2d( context, CL_MEM_WRITE_ONLY, &image_format_desc, w, h, 0, NULL, &err );
|
||||
if( memobjs[1] == (cl_mem)0 ){
|
||||
log_error( " unable to create 2D image using create_image_2d\n" );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate an array memory object to load the filter weights
|
||||
memobjs[2] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR ),
|
||||
sizeof( cl_float ) * filter_w * filter_h, &filter_weights, &err );
|
||||
if( memobjs[2] == (cl_mem)0 ){
|
||||
log_error( " unable to create array using clCreateBuffer\n" );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// create the compute program
|
||||
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &image_filter_src, "image_filter" );
|
||||
if( err ){
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
// create kernel args object and set arg values.
|
||||
// set the args values
|
||||
err = clSetKernelArg( kernel[0], 0, sizeof( cl_int ), (void *)&filter_w );
|
||||
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_int ), (void *)&filter_h );
|
||||
err |= clSetKernelArg( kernel[0], 2, sizeof( cl_mem ), (void *)&memobjs[2] );
|
||||
err |= clSetKernelArg( kernel[0], 3, sizeof( cl_mem ), (void *)&memobjs[0] );
|
||||
err |= clSetKernelArg( kernel[0], 4, sizeof( cl_mem ), (void *)&memobjs[1] );
|
||||
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clSetKernelArg failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, localThreads, NULL, 0, &executeEvent );
|
||||
#else
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, NULL, 0, &executeEvent );
|
||||
#endif
|
||||
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueNDRangeKernel failed\n" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// This synchronization point is needed in order to assume the data is valid.
|
||||
// Getting profiling information is not a synchronization point.
|
||||
err = clWaitForEvents( 1, &executeEvent );
|
||||
if( err != CL_SUCCESS )
|
||||
{
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// test profiling
|
||||
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// read output image
|
||||
size_t origin[3] = { 0, 0, 0 };
|
||||
size_t region[3] = { w, h, 1 };
|
||||
err = clEnqueueReadImage( queue, memobjs[1], true, origin, region, 0, 0, outptr, 0, NULL, NULL);
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clReadImage failed\n" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// release event, kernel, program, and memory objects
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
|
||||
if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
|
||||
err = -1;
|
||||
|
||||
return err;
|
||||
|
||||
} // end kernelFilter()
|
||||
|
||||
|
||||
static int basicFilter( int w, int h, int nChannels, uchar *inptr, uchar *outptr )
|
||||
{
|
||||
const float filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
|
||||
int filter_w = 3, filter_h = 3;
|
||||
int x, y;
|
||||
|
||||
for( y = 0; y < h; y++ ){
|
||||
for( x = 0; x < w; x++ ){
|
||||
basicFilterPixel( x, y, filter_w, filter_h, w, h, nChannels, filter_weights, inptr, outptr );
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
} // end of basicFilter()
|
||||
|
||||
|
||||
int execute( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
uchar *inptr;
|
||||
uchar *outptr[2];
|
||||
int w = 256, h = 256;
|
||||
int nChannels = 4;
|
||||
int nElements = w * h * nChannels;
|
||||
int err = 0;
|
||||
MTdata d;
|
||||
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
inptr = createImage( nElements, d );
|
||||
free_mtdata( d); d = NULL;
|
||||
|
||||
if( ! inptr ){
|
||||
log_error( " unable to allocate %d bytes of memory for image\n", nElements );
|
||||
return -1;
|
||||
}
|
||||
|
||||
outptr[0] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
|
||||
if( ! outptr[0] ){
|
||||
log_error( " unable to allocate %d bytes of memory for output image #1\n", nElements );
|
||||
free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
outptr[1] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
|
||||
if( ! outptr[1] ){
|
||||
log_error( " unable to allocate %d bytes of memory for output image #2\n", nElements );
|
||||
free( (void *)outptr[0] );
|
||||
free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = kernelFilter( device, context, queue, w, h, nChannels, inptr, outptr[0] );
|
||||
|
||||
if( ! err ){
|
||||
basicFilter( w, h, nChannels, inptr, outptr[1] );
|
||||
|
||||
// verify that the images are the same
|
||||
err = verifyImages( outptr[0], outptr[1], (uchar)0x1, w, h, nChannels );
|
||||
if( err )
|
||||
log_error( " images do not match\n" );
|
||||
}
|
||||
|
||||
// clean up
|
||||
free( (void *)outptr[1] );
|
||||
free( (void *)outptr[0] );
|
||||
free( (void *)inptr );
|
||||
|
||||
return err;
|
||||
|
||||
} // end execute()
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <time.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
#ifndef uchar
|
||||
typedef unsigned char uchar;
|
||||
#endif
|
||||
|
||||
#undef MIN
|
||||
#define MIN(x,y) ( (x) < (y) ? (x) : (y) )
|
||||
|
||||
#undef MAX
|
||||
#define MAX(x,y) ( (x) > (y) ? (x) : (y) )
|
||||
|
||||
//#define CREATE_OUTPUT 1
|
||||
|
||||
extern int writePPM( const char *filename, uchar *buf, int xsize, int ysize );
|
||||
|
||||
|
||||
|
||||
//--- the code for kernel executables
|
||||
static const char *image_filter_src =
|
||||
"constant sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
|
||||
"\n"
|
||||
"__kernel void image_filter( int n, int m, __global float *filter_weights,\n"
|
||||
" read_only image2d_t src_image, write_only image2d_t dst_image )\n"
|
||||
"{\n"
|
||||
" int i, j;\n"
|
||||
" int indx = 0;\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" float4 filter_result = (float4)( 0.f, 0.f, 0.f, 0.f );\n"
|
||||
"\n"
|
||||
" for (i=-m/2; i<(m+1)/2; i++){\n"
|
||||
" for (j=-n/2; j<(n+1)/2; j++){\n"
|
||||
" float w = filter_weights[indx++];\n"
|
||||
"\n"
|
||||
" if (w != 0.0f){\n"
|
||||
" filter_result += w * read_imagef(src_image, sampler,\n"
|
||||
" (int2)(tid_x + j, tid_y + i));\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" write_imagef(dst_image, (int2)(tid_x, tid_y), filter_result);\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
//--- equivalent non-kernel code
|
||||
static void read_imagef( int x, int y, int w, int h, int nChannels, uchar *src, float *srcRgb )
|
||||
{
|
||||
// clamp the coords
|
||||
int x0 = MIN( MAX( x, 0 ), w - 1 );
|
||||
int y0 = MIN( MAX( y, 0 ), h - 1 );
|
||||
|
||||
// get tine index
|
||||
int indx = ( y0 * w + x0 ) * nChannels;
|
||||
|
||||
// seed the return array
|
||||
int i;
|
||||
for( i = 0; i < nChannels; i++ ){
|
||||
srcRgb[i] = (float)src[indx+i];
|
||||
}
|
||||
} // end read_imagef()
|
||||
|
||||
|
||||
static void write_imagef( uchar *dst, int x, int y, int w, int h, int nChannels, float *dstRgb )
|
||||
{
|
||||
// get tine index
|
||||
int indx = ( y * w + x ) * nChannels;
|
||||
|
||||
// seed the return array
|
||||
int i;
|
||||
for( i = 0; i < nChannels; i++ ){
|
||||
dst[indx+i] = (uchar)dstRgb[i];
|
||||
}
|
||||
} // end write_imagef()
|
||||
|
||||
|
||||
static void basicFilterPixel( int x, int y, int n, int m, int xsize, int ysize, int nChannels, const float *filter_weights, uchar *src, uchar *dst )
|
||||
{
|
||||
int i, j, k;
|
||||
int indx = 0;
|
||||
float filter_result[] = { 0.f, 0.f, 0.f, 0.f };
|
||||
float srcRgb[4];
|
||||
|
||||
for( i = -m/2; i < (m+1)/2; i++ ){
|
||||
for( j = -n/2; j < (n+1)/2; j++ ){
|
||||
float w = filter_weights[indx++];
|
||||
|
||||
if( w != 0 ){
|
||||
read_imagef( x + j, y + i, xsize, ysize, nChannels, src, srcRgb );
|
||||
for( k = 0; k < nChannels; k++ ){
|
||||
filter_result[k] += w * srcRgb[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
write_imagef( dst, x, y, xsize, ysize, nChannels, filter_result );
|
||||
|
||||
} // end basicFilterPixel()
|
||||
|
||||
|
||||
//--- helper functions
|
||||
static uchar *createImage( int elements, MTdata d)
|
||||
{
|
||||
int i;
|
||||
uchar *ptr = (uchar *)malloc( elements * sizeof( cl_uchar ) );
|
||||
if( ! ptr )
|
||||
return NULL;
|
||||
|
||||
for( i = 0; i < elements; i++ ){
|
||||
ptr[i] = (uchar)genrand_int32(d);
|
||||
}
|
||||
|
||||
return ptr;
|
||||
|
||||
} // end createImage()
|
||||
|
||||
|
||||
static int verifyImages( uchar *ptr0, uchar *ptr1, uchar tolerance, int xsize, int ysize, int nChannels )
|
||||
{
|
||||
int x, y, z;
|
||||
uchar *p0 = ptr0;
|
||||
uchar *p1 = ptr1;
|
||||
|
||||
for( y = 0; y < ysize; y++ ){
|
||||
for( x = 0; x < xsize; x++ ){
|
||||
for( z = 0; z < nChannels; z++ ){
|
||||
if( (uchar)abs( (int)( *p0++ - *p1++ ) ) > tolerance ){
|
||||
log_error( " images differ at x,y = %d,%d, channel = %d, %d to %d\n", x, y, z,
|
||||
(int)p0[-1], (int)p1[-1] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
} // end verifyImages()
|
||||
|
||||
|
||||
static int kernelFilter( cl_device_id device, cl_context context, cl_command_queue queue, int w, int h, int nChannels,
|
||||
uchar *inptr, uchar *outptr )
|
||||
{
|
||||
cl_program program[1];
|
||||
cl_kernel kernel[1];
|
||||
cl_mem memobjs[3];
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
|
||||
cl_event executeEvent;
|
||||
cl_ulong queueStart, submitStart, writeStart, writeEnd;
|
||||
size_t threads[2];
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
size_t localThreads[2];
|
||||
#endif
|
||||
float filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
|
||||
int filter_w = 3, filter_h = 3;
|
||||
int err = 0;
|
||||
|
||||
// set thread dimensions
|
||||
threads[0] = w;
|
||||
threads[1] = h;
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
|
||||
test_error( err, "Unable to get thread group max size" );
|
||||
localThreads[1] = localThreads[0];
|
||||
if( localThreads[0] > threads[0] )
|
||||
localThreads[0] = threads[0];
|
||||
if( localThreads[1] > threads[1] )
|
||||
localThreads[1] = threads[1];
|
||||
#endif
|
||||
|
||||
// allocate the input and output image memory objects
|
||||
memobjs[0] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR),
|
||||
&image_format_desc, w, h, 0, inptr, &err );
|
||||
if( memobjs[0] == (cl_mem)0 ){
|
||||
log_error( " unable to create 2D image using create_image_2d\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
memobjs[1] = create_image_2d( context, CL_MEM_WRITE_ONLY, &image_format_desc, w, h, 0, NULL, &err );
|
||||
if( memobjs[1] == (cl_mem)0 ){
|
||||
log_error( " unable to create 2D image using create_image_2d\n" );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate an array memory object to load the filter weights
|
||||
memobjs[2] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR ),
|
||||
sizeof( cl_float ) * filter_w * filter_h, &filter_weights, &err );
|
||||
if( memobjs[2] == (cl_mem)0 ){
|
||||
log_error( " unable to create array using clCreateBuffer\n" );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// create the compute program
|
||||
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &image_filter_src, "image_filter" );
|
||||
if( err ){
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
// create kernel args object and set arg values.
|
||||
// set the args values
|
||||
err = clSetKernelArg( kernel[0], 0, sizeof( cl_int ), (void *)&filter_w );
|
||||
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_int ), (void *)&filter_h );
|
||||
err |= clSetKernelArg( kernel[0], 2, sizeof( cl_mem ), (void *)&memobjs[2] );
|
||||
err |= clSetKernelArg( kernel[0], 3, sizeof( cl_mem ), (void *)&memobjs[0] );
|
||||
err |= clSetKernelArg( kernel[0], 4, sizeof( cl_mem ), (void *)&memobjs[1] );
|
||||
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clSetKernelArg failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, localThreads, NULL, 0, &executeEvent );
|
||||
#else
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, NULL, 0, &executeEvent );
|
||||
#endif
|
||||
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueNDRangeKernel failed\n" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// This synchronization point is needed in order to assume the data is valid.
|
||||
// Getting profiling information is not a synchronization point.
|
||||
err = clWaitForEvents( 1, &executeEvent );
|
||||
if( err != CL_SUCCESS )
|
||||
{
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// test profiling
|
||||
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// read output image
|
||||
size_t origin[3] = { 0, 0, 0 };
|
||||
size_t region[3] = { w, h, 1 };
|
||||
err = clEnqueueReadImage( queue, memobjs[1], true, origin, region, 0, 0, outptr, 0, NULL, NULL);
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clReadImage failed\n" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// release event, kernel, program, and memory objects
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
|
||||
if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
|
||||
err = -1;
|
||||
|
||||
return err;
|
||||
|
||||
} // end kernelFilter()
|
||||
|
||||
|
||||
static int basicFilter( int w, int h, int nChannels, uchar *inptr, uchar *outptr )
|
||||
{
|
||||
const float filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
|
||||
int filter_w = 3, filter_h = 3;
|
||||
int x, y;
|
||||
|
||||
for( y = 0; y < h; y++ ){
|
||||
for( x = 0; x < w; x++ ){
|
||||
basicFilterPixel( x, y, filter_w, filter_h, w, h, nChannels, filter_weights, inptr, outptr );
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
} // end of basicFilter()
|
||||
|
||||
|
||||
int execute( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
uchar *inptr;
|
||||
uchar *outptr[2];
|
||||
int w = 256, h = 256;
|
||||
int nChannels = 4;
|
||||
int nElements = w * h * nChannels;
|
||||
int err = 0;
|
||||
MTdata d;
|
||||
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
inptr = createImage( nElements, d );
|
||||
free_mtdata( d); d = NULL;
|
||||
|
||||
if( ! inptr ){
|
||||
log_error( " unable to allocate %d bytes of memory for image\n", nElements );
|
||||
return -1;
|
||||
}
|
||||
|
||||
outptr[0] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
|
||||
if( ! outptr[0] ){
|
||||
log_error( " unable to allocate %d bytes of memory for output image #1\n", nElements );
|
||||
free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
outptr[1] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
|
||||
if( ! outptr[1] ){
|
||||
log_error( " unable to allocate %d bytes of memory for output image #2\n", nElements );
|
||||
free( (void *)outptr[0] );
|
||||
free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = kernelFilter( device, context, queue, w, h, nChannels, inptr, outptr[0] );
|
||||
|
||||
if( ! err ){
|
||||
basicFilter( w, h, nChannels, inptr, outptr[1] );
|
||||
|
||||
// verify that the images are the same
|
||||
err = verifyImages( outptr[0], outptr[1], (uchar)0x1, w, h, nChannels );
|
||||
if( err )
|
||||
log_error( " images do not match\n" );
|
||||
}
|
||||
|
||||
// clean up
|
||||
free( (void *)outptr[1] );
|
||||
free( (void *)outptr[0] );
|
||||
free( (void *)inptr );
|
||||
|
||||
return err;
|
||||
|
||||
} // end execute()
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,314 +1,314 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <time.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
static const char *read3d_kernel_code =
|
||||
"\n"
|
||||
"__kernel void read3d(read_only image3d_t srcimg, __global unsigned char *dst, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int tid_z = get_global_id(2);\n"
|
||||
" int indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
|
||||
" indx *= 4;\n"
|
||||
" dst[indx+0] = (unsigned char)(color.x * 255.0f);\n"
|
||||
" dst[indx+1] = (unsigned char)(color.y * 255.0f);\n"
|
||||
" dst[indx+2] = (unsigned char)(color.z * 255.0f);\n"
|
||||
" dst[indx+3] = (unsigned char)(color.w * 255.0f);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static cl_uchar *createImage( int elements, MTdata d )
|
||||
{
|
||||
int i;
|
||||
cl_uchar *ptr = (cl_uchar *)malloc( elements * sizeof( cl_uchar ) );
|
||||
if( ! ptr )
|
||||
return NULL;
|
||||
|
||||
for( i = 0; i < elements; i++ ){
|
||||
ptr[i] = (cl_uchar)genrand_int32(d);
|
||||
}
|
||||
|
||||
return ptr;
|
||||
|
||||
} // end createImage()
|
||||
|
||||
|
||||
static int verifyImages( cl_uchar *ptr0, cl_uchar *ptr1, cl_uchar tolerance, int xsize, int ysize, int zsize, int nChannels )
|
||||
{
|
||||
int x, y, z, c;
|
||||
cl_uchar *p0 = ptr0;
|
||||
cl_uchar *p1 = ptr1;
|
||||
|
||||
for( z = 0; z < zsize; z++ ){
|
||||
for( y = 0; y < ysize; y++ ){
|
||||
for( x = 0; x < xsize; x++ ){
|
||||
for( c = 0; c < nChannels; c++ ){
|
||||
if( (cl_uchar)abs( (int)( *p0++ - *p1++ ) ) > tolerance ){
|
||||
log_error( " images differ at x,y,z = %d,%d,%d channel = %d, %d to %d\n",
|
||||
x, y, z, c, (int)p0[-1], (int)p1[-1] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
} // end verifyImages()
|
||||
|
||||
|
||||
static int run_kernel( cl_device_id device, cl_context context, cl_command_queue queue,
|
||||
int w, int h, int d, int nChannels, cl_uchar *inptr, cl_uchar *outptr )
|
||||
{
|
||||
cl_program program[1];
|
||||
cl_kernel kernel[1];
|
||||
cl_mem memobjs[2];
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
|
||||
cl_event executeEvent = NULL;
|
||||
cl_ulong queueStart, submitStart, writeStart, writeEnd;
|
||||
size_t threads[3];
|
||||
size_t localThreads[3];
|
||||
int err = 0;
|
||||
|
||||
// set thread dimensions
|
||||
threads[0] = w;
|
||||
threads[1] = h;
|
||||
threads[2] = d;
|
||||
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof( cl_uint ), (size_t*)localThreads, NULL );
|
||||
if (err)
|
||||
{
|
||||
localThreads[0] = 256; localThreads[1] = 1; localThreads[2] = 1;
|
||||
err = 0;
|
||||
}
|
||||
if( localThreads[0] > threads[0] )
|
||||
localThreads[0] = threads[0];
|
||||
if( localThreads[1] > threads[1] )
|
||||
localThreads[1] = threads[1];
|
||||
|
||||
cl_sampler sampler = clCreateSampler( context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err );
|
||||
if( err ){
|
||||
log_error( " clCreateSampler failed.\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate the input and output image memory objects
|
||||
memobjs[0] = create_image_3d( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), &image_format_desc, w, h, d, 0, 0, inptr, &err );
|
||||
if( memobjs[0] == (cl_mem)0 ){
|
||||
log_error( " unable to create 2D image using create_image_2d\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate an array memory object to load the filter weights
|
||||
memobjs[1] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_READ_WRITE ), sizeof( cl_float ) * w*h*d*nChannels, NULL, &err );
|
||||
if( memobjs[1] == (cl_mem)0 ){
|
||||
log_error( " unable to create array using clCreateBuffer\n" );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// create the compute program
|
||||
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &read3d_kernel_code, "read3d" );
|
||||
if( err ){
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
// create kernel args object and set arg values.
|
||||
// set the args values
|
||||
err |= clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[0] );
|
||||
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[1] );
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
|
||||
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clSetKernelArg failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 3, NULL, threads, localThreads, NULL, 0, &executeEvent );
|
||||
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueNDRangeKernel failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (executeEvent) {
|
||||
|
||||
// This synchronization point is needed in order to assume the data is valid.
|
||||
// Getting profiling information is not a synchronization point.
|
||||
err = clWaitForEvents( 1, &executeEvent );
|
||||
if( err != CL_SUCCESS )
|
||||
{
|
||||
print_error( err, "clWaitForEvents failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// test profiling
|
||||
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
log_info( "Profiling info:\n" );
|
||||
log_info( "Time from queue to start of clEnqueueNDRangeKernel: %f seconds\n", (double)(writeStart - queueStart) / 1000000000000.f );
|
||||
log_info( "Time from start of clEnqueueNDRangeKernel to end: %f seconds\n", (double)(writeEnd - writeStart) / 1000000000000.f );
|
||||
}
|
||||
|
||||
// read output image
|
||||
err = clEnqueueReadBuffer(queue, memobjs[1], CL_TRUE, 0, w*h*d*nChannels*4, outptr, 0, NULL, NULL);
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clReadImage failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// release kernel, program, and memory objects
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
|
||||
return err;
|
||||
|
||||
} // end run_kernel()
|
||||
|
||||
|
||||
// The main point of this test is to exercise code that causes a multipass cld launch for a single
|
||||
// kernel exec at the cl level. This is done on the gpu for 3d launches, and it's also done
|
||||
// to handle gdims that excede the maximums allowed by the hardware. In this case we
|
||||
// use 3d to exercise the multipass events. In the future 3d may not be multpass, in which
|
||||
// case we will need to ensure that we use gdims large enough to force multipass.
|
||||
|
||||
int execute_multipass( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
cl_uchar *inptr;
|
||||
cl_uchar *outptr;
|
||||
int w = 256, h = 128, d = 32;
|
||||
int nChannels = 4;
|
||||
int nElements = w * h * d * nChannels;
|
||||
int err = 0;
|
||||
MTdata mtData;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
mtData = init_genrand( gRandomSeed );
|
||||
inptr = createImage( nElements, mtData );
|
||||
free_mtdata( mtData); mtData = NULL;
|
||||
if( ! inptr ){
|
||||
log_error( " unable to allocate %d bytes of memory for image\n", nElements );
|
||||
return -1;
|
||||
}
|
||||
|
||||
outptr = (cl_uchar *)malloc( nElements * sizeof( cl_uchar ) );
|
||||
if( ! outptr ){
|
||||
log_error( " unable to allocate %d bytes of memory for output image #1\n", nElements );
|
||||
free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
err = run_kernel( device, context, queue, w, h, d, nChannels, inptr, outptr );
|
||||
|
||||
if( ! err ){
|
||||
// verify that the images are the same
|
||||
err = verifyImages( outptr, inptr, (cl_uchar)0x1, w, h, d, nChannels );
|
||||
if( err )
|
||||
log_error( " images do not match\n" );
|
||||
}
|
||||
|
||||
// clean up
|
||||
free( (void *)outptr );
|
||||
free( (void *)inptr );
|
||||
|
||||
return err;
|
||||
|
||||
} // end execute()
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <time.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
static const char *read3d_kernel_code =
|
||||
"\n"
|
||||
"__kernel void read3d(read_only image3d_t srcimg, __global unsigned char *dst, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int tid_z = get_global_id(2);\n"
|
||||
" int indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
|
||||
" indx *= 4;\n"
|
||||
" dst[indx+0] = (unsigned char)(color.x * 255.0f);\n"
|
||||
" dst[indx+1] = (unsigned char)(color.y * 255.0f);\n"
|
||||
" dst[indx+2] = (unsigned char)(color.z * 255.0f);\n"
|
||||
" dst[indx+3] = (unsigned char)(color.w * 255.0f);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static cl_uchar *createImage( int elements, MTdata d )
|
||||
{
|
||||
int i;
|
||||
cl_uchar *ptr = (cl_uchar *)malloc( elements * sizeof( cl_uchar ) );
|
||||
if( ! ptr )
|
||||
return NULL;
|
||||
|
||||
for( i = 0; i < elements; i++ ){
|
||||
ptr[i] = (cl_uchar)genrand_int32(d);
|
||||
}
|
||||
|
||||
return ptr;
|
||||
|
||||
} // end createImage()
|
||||
|
||||
|
||||
static int verifyImages( cl_uchar *ptr0, cl_uchar *ptr1, cl_uchar tolerance, int xsize, int ysize, int zsize, int nChannels )
|
||||
{
|
||||
int x, y, z, c;
|
||||
cl_uchar *p0 = ptr0;
|
||||
cl_uchar *p1 = ptr1;
|
||||
|
||||
for( z = 0; z < zsize; z++ ){
|
||||
for( y = 0; y < ysize; y++ ){
|
||||
for( x = 0; x < xsize; x++ ){
|
||||
for( c = 0; c < nChannels; c++ ){
|
||||
if( (cl_uchar)abs( (int)( *p0++ - *p1++ ) ) > tolerance ){
|
||||
log_error( " images differ at x,y,z = %d,%d,%d channel = %d, %d to %d\n",
|
||||
x, y, z, c, (int)p0[-1], (int)p1[-1] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
} // end verifyImages()
|
||||
|
||||
|
||||
static int run_kernel( cl_device_id device, cl_context context, cl_command_queue queue,
|
||||
int w, int h, int d, int nChannels, cl_uchar *inptr, cl_uchar *outptr )
|
||||
{
|
||||
cl_program program[1];
|
||||
cl_kernel kernel[1];
|
||||
cl_mem memobjs[2];
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
|
||||
cl_event executeEvent = NULL;
|
||||
cl_ulong queueStart, submitStart, writeStart, writeEnd;
|
||||
size_t threads[3];
|
||||
size_t localThreads[3];
|
||||
int err = 0;
|
||||
|
||||
// set thread dimensions
|
||||
threads[0] = w;
|
||||
threads[1] = h;
|
||||
threads[2] = d;
|
||||
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof( cl_uint ), (size_t*)localThreads, NULL );
|
||||
if (err)
|
||||
{
|
||||
localThreads[0] = 256; localThreads[1] = 1; localThreads[2] = 1;
|
||||
err = 0;
|
||||
}
|
||||
if( localThreads[0] > threads[0] )
|
||||
localThreads[0] = threads[0];
|
||||
if( localThreads[1] > threads[1] )
|
||||
localThreads[1] = threads[1];
|
||||
|
||||
cl_sampler sampler = clCreateSampler( context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err );
|
||||
if( err ){
|
||||
log_error( " clCreateSampler failed.\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate the input and output image memory objects
|
||||
memobjs[0] = create_image_3d( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), &image_format_desc, w, h, d, 0, 0, inptr, &err );
|
||||
if( memobjs[0] == (cl_mem)0 ){
|
||||
log_error( " unable to create 2D image using create_image_2d\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate an array memory object to load the filter weights
|
||||
memobjs[1] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_READ_WRITE ), sizeof( cl_float ) * w*h*d*nChannels, NULL, &err );
|
||||
if( memobjs[1] == (cl_mem)0 ){
|
||||
log_error( " unable to create array using clCreateBuffer\n" );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// create the compute program
|
||||
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &read3d_kernel_code, "read3d" );
|
||||
if( err ){
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
// create kernel args object and set arg values.
|
||||
// set the args values
|
||||
err |= clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[0] );
|
||||
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[1] );
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
|
||||
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clSetKernelArg failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 3, NULL, threads, localThreads, NULL, 0, &executeEvent );
|
||||
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueNDRangeKernel failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (executeEvent) {
|
||||
|
||||
// This synchronization point is needed in order to assume the data is valid.
|
||||
// Getting profiling information is not a synchronization point.
|
||||
err = clWaitForEvents( 1, &executeEvent );
|
||||
if( err != CL_SUCCESS )
|
||||
{
|
||||
print_error( err, "clWaitForEvents failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// test profiling
|
||||
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
log_info( "Profiling info:\n" );
|
||||
log_info( "Time from queue to start of clEnqueueNDRangeKernel: %f seconds\n", (double)(writeStart - queueStart) / 1000000000000.f );
|
||||
log_info( "Time from start of clEnqueueNDRangeKernel to end: %f seconds\n", (double)(writeEnd - writeStart) / 1000000000000.f );
|
||||
}
|
||||
|
||||
// read output image
|
||||
err = clEnqueueReadBuffer(queue, memobjs[1], CL_TRUE, 0, w*h*d*nChannels*4, outptr, 0, NULL, NULL);
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clReadImage failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// release kernel, program, and memory objects
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
|
||||
return err;
|
||||
|
||||
} // end run_kernel()
|
||||
|
||||
|
||||
// The main point of this test is to exercise code that causes a multipass cld launch for a single
|
||||
// kernel exec at the cl level. This is done on the gpu for 3d launches, and it's also done
|
||||
// to handle gdims that excede the maximums allowed by the hardware. In this case we
|
||||
// use 3d to exercise the multipass events. In the future 3d may not be multpass, in which
|
||||
// case we will need to ensure that we use gdims large enough to force multipass.
|
||||
|
||||
int execute_multipass( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
cl_uchar *inptr;
|
||||
cl_uchar *outptr;
|
||||
int w = 256, h = 128, d = 32;
|
||||
int nChannels = 4;
|
||||
int nElements = w * h * d * nChannels;
|
||||
int err = 0;
|
||||
MTdata mtData;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
mtData = init_genrand( gRandomSeed );
|
||||
inptr = createImage( nElements, mtData );
|
||||
free_mtdata( mtData); mtData = NULL;
|
||||
if( ! inptr ){
|
||||
log_error( " unable to allocate %d bytes of memory for image\n", nElements );
|
||||
return -1;
|
||||
}
|
||||
|
||||
outptr = (cl_uchar *)malloc( nElements * sizeof( cl_uchar ) );
|
||||
if( ! outptr ){
|
||||
log_error( " unable to allocate %d bytes of memory for output image #1\n", nElements );
|
||||
free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
err = run_kernel( device, context, queue, w, h, d, nChannels, inptr, outptr );
|
||||
|
||||
if( ! err ){
|
||||
// verify that the images are the same
|
||||
err = verifyImages( outptr, inptr, (cl_uchar)0x1, w, h, d, nChannels );
|
||||
if( err )
|
||||
log_error( " images do not match\n" );
|
||||
}
|
||||
|
||||
// clean up
|
||||
free( (void *)outptr );
|
||||
free( (void *)inptr );
|
||||
|
||||
return err;
|
||||
|
||||
} // end execute()
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,173 +1,173 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
basefn basefn_list[] = {
|
||||
read_int_array,
|
||||
read_uint_array,
|
||||
read_long_array,
|
||||
read_ulong_array,
|
||||
read_short_array,
|
||||
read_ushort_array,
|
||||
read_float_array,
|
||||
read_char_array,
|
||||
read_uchar_array,
|
||||
read_struct_array,
|
||||
write_int_array,
|
||||
write_uint_array,
|
||||
write_long_array,
|
||||
write_ulong_array,
|
||||
write_short_array,
|
||||
write_ushort_array,
|
||||
write_float_array,
|
||||
write_char_array,
|
||||
write_uchar_array,
|
||||
write_struct_array,
|
||||
read_float_image,
|
||||
read_char_image,
|
||||
read_uchar_image,
|
||||
write_float_image,
|
||||
write_char_image,
|
||||
write_uchar_image,
|
||||
copy_array,
|
||||
copy_partial_array,
|
||||
copy_image,
|
||||
copy_array_to_image,
|
||||
execute
|
||||
};
|
||||
|
||||
|
||||
const char *basefn_names[] = {
|
||||
"read_array_int",
|
||||
"read_array_uint",
|
||||
"read_array_long",
|
||||
"read_array_ulong",
|
||||
"read_array_short",
|
||||
"read_array_ushort",
|
||||
"read_array_float",
|
||||
"read_array_char",
|
||||
"read_array_uchar",
|
||||
"read_array_struct",
|
||||
"write_array_int",
|
||||
"write_array_uint",
|
||||
"write_array_long",
|
||||
"write_array_ulong",
|
||||
"write_array_short",
|
||||
"write_array_ushort",
|
||||
"write_array_float",
|
||||
"write_array_char",
|
||||
"write_array_uchar",
|
||||
"write_array_struct",
|
||||
"read_image_float",
|
||||
"read_image_int",
|
||||
"read_image_uint",
|
||||
"write_image_float",
|
||||
"write_image_char",
|
||||
"write_image_uchar",
|
||||
"copy_array",
|
||||
"copy_partial_array",
|
||||
"copy_image",
|
||||
"copy_array_to_image",
|
||||
"execute",
|
||||
"all"
|
||||
};
|
||||
|
||||
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0]) - 1) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
int num_streamfns = sizeof(basefn_names) / sizeof(char *);
|
||||
|
||||
// FIXME: use timer resolution rather than hardcoding 1µs per tick.
|
||||
|
||||
#define QUEUE_SECONDS_LIMIT 30
|
||||
#define SUBMIT_SECONDS_LIMIT 30
|
||||
#define COMMAND_SECONDS_LIMIT 30
|
||||
int check_times(cl_ulong queueStart, cl_ulong commandSubmit, cl_ulong commandStart, cl_ulong commandEnd, cl_device_id device) {
|
||||
int err = 0;
|
||||
|
||||
size_t profiling_resolution = 0;
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(profiling_resolution), &profiling_resolution, NULL);
|
||||
test_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILING_TIMER_RESOLUTION failed.\n");
|
||||
|
||||
log_info("CL_PROFILING_COMMAND_QUEUED: %llu CL_PROFILING_COMMAND_SUBMIT: %llu CL_PROFILING_COMMAND_START: %llu CL_PROFILING_COMMAND_END: %llu CL_DEVICE_PROFILING_TIMER_RESOLUTION: %ld\n",
|
||||
queueStart, commandSubmit, commandStart, commandEnd, profiling_resolution);
|
||||
|
||||
double queueTosubmitTimeS = (double)(commandSubmit - queueStart)*1e-9;
|
||||
double submitToStartTimeS = (double)(commandStart - commandSubmit)*1e-9;
|
||||
double startToEndTimeS = (double)(commandEnd - commandStart)*1e-9;
|
||||
|
||||
log_info( "Profiling info:\n" );
|
||||
log_info( "Time from queue to submit : %fms\n", (double)(queueTosubmitTimeS) * 1000.f );
|
||||
log_info( "Time from submit to start : %fms\n", (double)(submitToStartTimeS) * 1000.f );
|
||||
log_info( "Time from start to end: %fms\n", (double)(startToEndTimeS) * 1000.f );
|
||||
|
||||
if(queueStart > commandSubmit) {
|
||||
log_error("CL_PROFILING_COMMAND_QUEUED > CL_PROFILING_COMMAND_SUBMIT.\n");
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (commandSubmit > commandStart) {
|
||||
log_error("CL_PROFILING_COMMAND_SUBMIT > CL_PROFILING_COMMAND_START.\n");
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (commandStart > commandEnd) {
|
||||
log_error("CL_PROFILING_COMMAND_START > CL_PROFILING_COMMAND_END.\n");
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (queueStart == 0 && commandStart == 0 && commandEnd == 0) {
|
||||
log_error("All values are 0. This is exceedingly unlikely.\n");
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (queueTosubmitTimeS > QUEUE_SECONDS_LIMIT) {
|
||||
log_error("Time between queue and submit is too big: %fs, test limit: %fs.\n",
|
||||
queueTosubmitTimeS , (double)QUEUE_SECONDS_LIMIT);
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (submitToStartTimeS > SUBMIT_SECONDS_LIMIT) {
|
||||
log_error("Time between submit and start is too big: %fs, test limit: %fs.\n",
|
||||
submitToStartTimeS , (double)QUEUE_SECONDS_LIMIT);
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (startToEndTimeS > COMMAND_SECONDS_LIMIT) {
|
||||
log_error("Time between queue and start is too big: %fs, test limit: %fs.\n",
|
||||
startToEndTimeS , (double)QUEUE_SECONDS_LIMIT);
|
||||
err = -1;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int main( int argc, const char *argv[] )
|
||||
{
|
||||
return runTestHarness( argc, argv, num_streamfns, basefn_list, basefn_names,
|
||||
false, false, CL_QUEUE_PROFILING_ENABLE );
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
basefn basefn_list[] = {
|
||||
read_int_array,
|
||||
read_uint_array,
|
||||
read_long_array,
|
||||
read_ulong_array,
|
||||
read_short_array,
|
||||
read_ushort_array,
|
||||
read_float_array,
|
||||
read_char_array,
|
||||
read_uchar_array,
|
||||
read_struct_array,
|
||||
write_int_array,
|
||||
write_uint_array,
|
||||
write_long_array,
|
||||
write_ulong_array,
|
||||
write_short_array,
|
||||
write_ushort_array,
|
||||
write_float_array,
|
||||
write_char_array,
|
||||
write_uchar_array,
|
||||
write_struct_array,
|
||||
read_float_image,
|
||||
read_char_image,
|
||||
read_uchar_image,
|
||||
write_float_image,
|
||||
write_char_image,
|
||||
write_uchar_image,
|
||||
copy_array,
|
||||
copy_partial_array,
|
||||
copy_image,
|
||||
copy_array_to_image,
|
||||
execute
|
||||
};
|
||||
|
||||
|
||||
const char *basefn_names[] = {
|
||||
"read_array_int",
|
||||
"read_array_uint",
|
||||
"read_array_long",
|
||||
"read_array_ulong",
|
||||
"read_array_short",
|
||||
"read_array_ushort",
|
||||
"read_array_float",
|
||||
"read_array_char",
|
||||
"read_array_uchar",
|
||||
"read_array_struct",
|
||||
"write_array_int",
|
||||
"write_array_uint",
|
||||
"write_array_long",
|
||||
"write_array_ulong",
|
||||
"write_array_short",
|
||||
"write_array_ushort",
|
||||
"write_array_float",
|
||||
"write_array_char",
|
||||
"write_array_uchar",
|
||||
"write_array_struct",
|
||||
"read_image_float",
|
||||
"read_image_int",
|
||||
"read_image_uint",
|
||||
"write_image_float",
|
||||
"write_image_char",
|
||||
"write_image_uchar",
|
||||
"copy_array",
|
||||
"copy_partial_array",
|
||||
"copy_image",
|
||||
"copy_array_to_image",
|
||||
"execute",
|
||||
"all"
|
||||
};
|
||||
|
||||
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0]) - 1) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
int num_streamfns = sizeof(basefn_names) / sizeof(char *);
|
||||
|
||||
// FIXME: use timer resolution rather than hardcoding 1µs per tick.
|
||||
|
||||
#define QUEUE_SECONDS_LIMIT 30
|
||||
#define SUBMIT_SECONDS_LIMIT 30
|
||||
#define COMMAND_SECONDS_LIMIT 30
|
||||
int check_times(cl_ulong queueStart, cl_ulong commandSubmit, cl_ulong commandStart, cl_ulong commandEnd, cl_device_id device) {
|
||||
int err = 0;
|
||||
|
||||
size_t profiling_resolution = 0;
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(profiling_resolution), &profiling_resolution, NULL);
|
||||
test_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILING_TIMER_RESOLUTION failed.\n");
|
||||
|
||||
log_info("CL_PROFILING_COMMAND_QUEUED: %llu CL_PROFILING_COMMAND_SUBMIT: %llu CL_PROFILING_COMMAND_START: %llu CL_PROFILING_COMMAND_END: %llu CL_DEVICE_PROFILING_TIMER_RESOLUTION: %ld\n",
|
||||
queueStart, commandSubmit, commandStart, commandEnd, profiling_resolution);
|
||||
|
||||
double queueTosubmitTimeS = (double)(commandSubmit - queueStart)*1e-9;
|
||||
double submitToStartTimeS = (double)(commandStart - commandSubmit)*1e-9;
|
||||
double startToEndTimeS = (double)(commandEnd - commandStart)*1e-9;
|
||||
|
||||
log_info( "Profiling info:\n" );
|
||||
log_info( "Time from queue to submit : %fms\n", (double)(queueTosubmitTimeS) * 1000.f );
|
||||
log_info( "Time from submit to start : %fms\n", (double)(submitToStartTimeS) * 1000.f );
|
||||
log_info( "Time from start to end: %fms\n", (double)(startToEndTimeS) * 1000.f );
|
||||
|
||||
if(queueStart > commandSubmit) {
|
||||
log_error("CL_PROFILING_COMMAND_QUEUED > CL_PROFILING_COMMAND_SUBMIT.\n");
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (commandSubmit > commandStart) {
|
||||
log_error("CL_PROFILING_COMMAND_SUBMIT > CL_PROFILING_COMMAND_START.\n");
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (commandStart > commandEnd) {
|
||||
log_error("CL_PROFILING_COMMAND_START > CL_PROFILING_COMMAND_END.\n");
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (queueStart == 0 && commandStart == 0 && commandEnd == 0) {
|
||||
log_error("All values are 0. This is exceedingly unlikely.\n");
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (queueTosubmitTimeS > QUEUE_SECONDS_LIMIT) {
|
||||
log_error("Time between queue and submit is too big: %fs, test limit: %fs.\n",
|
||||
queueTosubmitTimeS , (double)QUEUE_SECONDS_LIMIT);
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (submitToStartTimeS > SUBMIT_SECONDS_LIMIT) {
|
||||
log_error("Time between submit and start is too big: %fs, test limit: %fs.\n",
|
||||
submitToStartTimeS , (double)QUEUE_SECONDS_LIMIT);
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (startToEndTimeS > COMMAND_SECONDS_LIMIT) {
|
||||
log_error("Time between queue and start is too big: %fs, test limit: %fs.\n",
|
||||
startToEndTimeS , (double)QUEUE_SECONDS_LIMIT);
|
||||
err = -1;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int main( int argc, const char *argv[] )
|
||||
{
|
||||
return runTestHarness( argc, argv, num_streamfns, basefn_list, basefn_names,
|
||||
false, false, CL_QUEUE_PROFILING_ENABLE );
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,65 +1,65 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef __PROCS_H__
|
||||
#define __PROCS_H__
|
||||
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/imageHelpers.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
|
||||
|
||||
extern int check_times(cl_ulong queueStart, cl_ulong submitStart, cl_ulong commandStart, cl_ulong commandEnd, cl_device_id device);
|
||||
|
||||
extern int read_int_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_uint_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_long_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_ulong_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_short_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_ushort_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_float_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_half_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_char_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_uchar_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_struct_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_int_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_uint_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_long_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_ulong_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_short_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_ushort_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_float_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_half_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_char_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_uchar_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_struct_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int copy_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int copy_partial_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int copy_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int copy_array_to_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int execute( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_parallel_kernels( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
|
||||
|
||||
#endif // #ifndef __PROCS_H__
|
||||
|
||||
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef __PROCS_H__
|
||||
#define __PROCS_H__
|
||||
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/imageHelpers.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
|
||||
|
||||
extern int check_times(cl_ulong queueStart, cl_ulong submitStart, cl_ulong commandStart, cl_ulong commandEnd, cl_device_id device);
|
||||
|
||||
extern int read_int_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_uint_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_long_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_ulong_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_short_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_ushort_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_float_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_half_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_char_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_uchar_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_struct_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_int_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_uint_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_long_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_ulong_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_short_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_ushort_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_float_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_half_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_char_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_uchar_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_struct_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int copy_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int copy_partial_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int copy_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int copy_array_to_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int execute( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_parallel_kernels( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
|
||||
|
||||
#endif // #ifndef __PROCS_H__
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,391 +1,391 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
//--- the code for the kernel executables
|
||||
static const char *readKernelCode[] = {
|
||||
"__kernel void testWritef(__global uchar *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
|
||||
" color /= (float4)(255.f, 255.f, 255.f, 255.f);\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testWritei(__global char *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" int4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color.x = (int)src[indx+0];\n"
|
||||
" color.y = (int)src[indx+1];\n"
|
||||
" color.z = (int)src[indx+2];\n"
|
||||
" color.w = (int)src[indx+3];\n"
|
||||
" write_imagei(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testWriteui(__global uchar *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" uint4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color.x = (uint)src[indx+0];\n"
|
||||
" color.y = (uint)src[indx+1];\n"
|
||||
" color.z = (uint)src[indx+2];\n"
|
||||
" color.w = (uint)src[indx+3];\n"
|
||||
" write_imageui(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
static const char *readKernelName[] = { "testWritef", "testWritei", "testWriteui" };
|
||||
|
||||
|
||||
//--- helper functions
|
||||
static cl_uchar *generateImage( int n, MTdata d )
|
||||
{
|
||||
cl_uchar *ptr = (cl_uchar *)malloc( n * sizeof( cl_uchar ) );
|
||||
int i;
|
||||
|
||||
for( i = 0; i < n; i++ ){
|
||||
ptr[i] = (cl_uchar)genrand_int32( d );
|
||||
}
|
||||
|
||||
return ptr;
|
||||
|
||||
}
|
||||
|
||||
|
||||
static char *generateSignedImage( int n, MTdata d )
|
||||
{
|
||||
char *ptr = (char *)malloc( n * sizeof( char ) );
|
||||
int i;
|
||||
|
||||
for( i = 0; i < n; i++ ){
|
||||
ptr[i] = (char)genrand_int32( d );
|
||||
}
|
||||
|
||||
return ptr;
|
||||
|
||||
}
|
||||
|
||||
|
||||
static int verifyImage( cl_uchar *image, cl_uchar *outptr, int w, int h )
|
||||
{
|
||||
int i;
|
||||
|
||||
for( i = 0; i < w * h * 4; i++ ){
|
||||
if( outptr[i] != image[i] ){
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//----- the test functions
|
||||
int read_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements, const char *code, const char *name,
|
||||
cl_image_format image_format_desc )
|
||||
{
|
||||
cl_mem memobjs[2];
|
||||
cl_program program[1];
|
||||
void *inptr;
|
||||
void *dst = NULL;
|
||||
cl_kernel kernel[1];
|
||||
cl_event readEvent;
|
||||
cl_ulong queueStart, submitStart, readStart, readEnd;
|
||||
size_t threads[2];
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
size_t localThreads[2];
|
||||
#endif
|
||||
int err;
|
||||
int w = 64, h = 64;
|
||||
cl_mem_flags flags;
|
||||
size_t element_nbytes;
|
||||
size_t num_bytes;
|
||||
size_t channel_nbytes = sizeof( cl_uchar );
|
||||
MTdata d;
|
||||
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
element_nbytes = channel_nbytes * get_format_channel_count( &image_format_desc );
|
||||
num_bytes = w * h * element_nbytes;
|
||||
|
||||
threads[0] = (size_t)w;
|
||||
threads[1] = (size_t)h;
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( unsigned int ), NULL );
|
||||
test_error( err, "Unable to get thread group max size" );
|
||||
localThreads[1] = localThreads[0];
|
||||
if( localThreads[0] > threads[0] )
|
||||
localThreads[0] = threads[0];
|
||||
if( localThreads[1] > threads[1] )
|
||||
localThreads[1] = threads[1];
|
||||
#endif
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
if( image_format_desc.image_channel_data_type == CL_SIGNED_INT8 )
|
||||
inptr = (void *)generateSignedImage( w * h * 4, d );
|
||||
else
|
||||
inptr = (void *)generateImage( w * h * 4, d );
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
if( ! inptr ){
|
||||
log_error("unable to allocate inptr at %d x %d\n", (int)w, (int)h );
|
||||
return -1;
|
||||
}
|
||||
|
||||
dst = malloc( num_bytes );
|
||||
if( ! dst ){
|
||||
free( (void *)inptr );
|
||||
log_error("unable to allocate dst at %d x %d\n", (int)w, (int)h );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate the input and output image memory objects
|
||||
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
|
||||
memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
|
||||
if( memobjs[0] == (cl_mem)0 ){
|
||||
free( dst );
|
||||
free( (void *)inptr );
|
||||
log_error("unable to create Image2D\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), channel_nbytes * 4 * w * h, NULL, &err );
|
||||
if( memobjs[1] == (cl_mem)0 ){
|
||||
free( dst );
|
||||
free( (void *)inptr );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
log_error("unable to create array\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer( queue, memobjs[1], true, 0, num_bytes, inptr, 0, NULL, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
log_error("clWriteArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &code, name );
|
||||
if( err ){
|
||||
log_error( "Unable to create program and kernel\n" );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[1] );
|
||||
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[0] );
|
||||
if( err != CL_SUCCESS ){
|
||||
log_error( "clSetKernelArg failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
#else
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
#endif
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueNDRangeKernel failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = { 0, 0, 0 };
|
||||
size_t region[3] = { w, h, 1 };
|
||||
err = clEnqueueReadImage( queue, memobjs[0], false, origin, region, 0, 0, dst, 0, NULL, &readEvent );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clReadImage2D failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// This synchronization point is needed in order to assume the data is valid.
|
||||
// Getting profiling information is not a synchronization point.
|
||||
err = clWaitForEvents( 1, &readEvent );
|
||||
if( err != CL_SUCCESS )
|
||||
{
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( ( err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( ( err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &readStart, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &readEnd, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verifyImage( (cl_uchar *)inptr, (cl_uchar *)dst, w, h );
|
||||
if( err ){
|
||||
log_error( "Image failed to verify.\n" );
|
||||
}
|
||||
else{
|
||||
log_info( "Image verified.\n" );
|
||||
}
|
||||
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel(kernel[0]);
|
||||
clReleaseProgram(program[0]);
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free(dst);
|
||||
free(inptr);
|
||||
|
||||
if (check_times(queueStart, submitStart, readStart, readEnd, device))
|
||||
err = -1;
|
||||
|
||||
return err;
|
||||
|
||||
} // end read_image()
|
||||
|
||||
|
||||
int read_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
// 0 to 255 for unsigned image data
|
||||
return read_image( device, context, queue, numElements, readKernelCode[0], readKernelName[0], image_format_desc );
|
||||
|
||||
}
|
||||
|
||||
|
||||
int read_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_SIGNED_INT8 };
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
// -128 to 127 for signed iamge data
|
||||
return read_image( device, context, queue, numElements, readKernelCode[1], readKernelName[1], image_format_desc );
|
||||
|
||||
}
|
||||
|
||||
|
||||
int read_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_UNSIGNED_INT8 };
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
// 0 to 255 for unsigned image data
|
||||
return read_image( device, context, queue, numElements, readKernelCode[2], readKernelName[2], image_format_desc );
|
||||
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
//--- the code for the kernel executables
|
||||
static const char *readKernelCode[] = {
|
||||
"__kernel void testWritef(__global uchar *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
|
||||
" color /= (float4)(255.f, 255.f, 255.f, 255.f);\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testWritei(__global char *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" int4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color.x = (int)src[indx+0];\n"
|
||||
" color.y = (int)src[indx+1];\n"
|
||||
" color.z = (int)src[indx+2];\n"
|
||||
" color.w = (int)src[indx+3];\n"
|
||||
" write_imagei(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testWriteui(__global uchar *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" uint4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color.x = (uint)src[indx+0];\n"
|
||||
" color.y = (uint)src[indx+1];\n"
|
||||
" color.z = (uint)src[indx+2];\n"
|
||||
" color.w = (uint)src[indx+3];\n"
|
||||
" write_imageui(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
static const char *readKernelName[] = { "testWritef", "testWritei", "testWriteui" };
|
||||
|
||||
|
||||
//--- helper functions
|
||||
static cl_uchar *generateImage( int n, MTdata d )
|
||||
{
|
||||
cl_uchar *ptr = (cl_uchar *)malloc( n * sizeof( cl_uchar ) );
|
||||
int i;
|
||||
|
||||
for( i = 0; i < n; i++ ){
|
||||
ptr[i] = (cl_uchar)genrand_int32( d );
|
||||
}
|
||||
|
||||
return ptr;
|
||||
|
||||
}
|
||||
|
||||
|
||||
static char *generateSignedImage( int n, MTdata d )
|
||||
{
|
||||
char *ptr = (char *)malloc( n * sizeof( char ) );
|
||||
int i;
|
||||
|
||||
for( i = 0; i < n; i++ ){
|
||||
ptr[i] = (char)genrand_int32( d );
|
||||
}
|
||||
|
||||
return ptr;
|
||||
|
||||
}
|
||||
|
||||
|
||||
static int verifyImage( cl_uchar *image, cl_uchar *outptr, int w, int h )
|
||||
{
|
||||
int i;
|
||||
|
||||
for( i = 0; i < w * h * 4; i++ ){
|
||||
if( outptr[i] != image[i] ){
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//----- the test functions
|
||||
int read_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements, const char *code, const char *name,
|
||||
cl_image_format image_format_desc )
|
||||
{
|
||||
cl_mem memobjs[2];
|
||||
cl_program program[1];
|
||||
void *inptr;
|
||||
void *dst = NULL;
|
||||
cl_kernel kernel[1];
|
||||
cl_event readEvent;
|
||||
cl_ulong queueStart, submitStart, readStart, readEnd;
|
||||
size_t threads[2];
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
size_t localThreads[2];
|
||||
#endif
|
||||
int err;
|
||||
int w = 64, h = 64;
|
||||
cl_mem_flags flags;
|
||||
size_t element_nbytes;
|
||||
size_t num_bytes;
|
||||
size_t channel_nbytes = sizeof( cl_uchar );
|
||||
MTdata d;
|
||||
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
element_nbytes = channel_nbytes * get_format_channel_count( &image_format_desc );
|
||||
num_bytes = w * h * element_nbytes;
|
||||
|
||||
threads[0] = (size_t)w;
|
||||
threads[1] = (size_t)h;
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( unsigned int ), NULL );
|
||||
test_error( err, "Unable to get thread group max size" );
|
||||
localThreads[1] = localThreads[0];
|
||||
if( localThreads[0] > threads[0] )
|
||||
localThreads[0] = threads[0];
|
||||
if( localThreads[1] > threads[1] )
|
||||
localThreads[1] = threads[1];
|
||||
#endif
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
if( image_format_desc.image_channel_data_type == CL_SIGNED_INT8 )
|
||||
inptr = (void *)generateSignedImage( w * h * 4, d );
|
||||
else
|
||||
inptr = (void *)generateImage( w * h * 4, d );
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
if( ! inptr ){
|
||||
log_error("unable to allocate inptr at %d x %d\n", (int)w, (int)h );
|
||||
return -1;
|
||||
}
|
||||
|
||||
dst = malloc( num_bytes );
|
||||
if( ! dst ){
|
||||
free( (void *)inptr );
|
||||
log_error("unable to allocate dst at %d x %d\n", (int)w, (int)h );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate the input and output image memory objects
|
||||
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
|
||||
memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
|
||||
if( memobjs[0] == (cl_mem)0 ){
|
||||
free( dst );
|
||||
free( (void *)inptr );
|
||||
log_error("unable to create Image2D\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), channel_nbytes * 4 * w * h, NULL, &err );
|
||||
if( memobjs[1] == (cl_mem)0 ){
|
||||
free( dst );
|
||||
free( (void *)inptr );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
log_error("unable to create array\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer( queue, memobjs[1], true, 0, num_bytes, inptr, 0, NULL, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
log_error("clWriteArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &code, name );
|
||||
if( err ){
|
||||
log_error( "Unable to create program and kernel\n" );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[1] );
|
||||
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[0] );
|
||||
if( err != CL_SUCCESS ){
|
||||
log_error( "clSetKernelArg failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
#else
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
#endif
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueNDRangeKernel failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = { 0, 0, 0 };
|
||||
size_t region[3] = { w, h, 1 };
|
||||
err = clEnqueueReadImage( queue, memobjs[0], false, origin, region, 0, 0, dst, 0, NULL, &readEvent );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clReadImage2D failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// This synchronization point is needed in order to assume the data is valid.
|
||||
// Getting profiling information is not a synchronization point.
|
||||
err = clWaitForEvents( 1, &readEvent );
|
||||
if( err != CL_SUCCESS )
|
||||
{
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( ( err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( ( err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &readStart, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &readEnd, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verifyImage( (cl_uchar *)inptr, (cl_uchar *)dst, w, h );
|
||||
if( err ){
|
||||
log_error( "Image failed to verify.\n" );
|
||||
}
|
||||
else{
|
||||
log_info( "Image verified.\n" );
|
||||
}
|
||||
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel(kernel[0]);
|
||||
clReleaseProgram(program[0]);
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free(dst);
|
||||
free(inptr);
|
||||
|
||||
if (check_times(queueStart, submitStart, readStart, readEnd, device))
|
||||
err = -1;
|
||||
|
||||
return err;
|
||||
|
||||
} // end read_image()
|
||||
|
||||
|
||||
int read_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
// 0 to 255 for unsigned image data
|
||||
return read_image( device, context, queue, numElements, readKernelCode[0], readKernelName[0], image_format_desc );
|
||||
|
||||
}
|
||||
|
||||
|
||||
int read_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_SIGNED_INT8 };
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
// -128 to 127 for signed iamge data
|
||||
return read_image( device, context, queue, numElements, readKernelCode[1], readKernelName[1], image_format_desc );
|
||||
|
||||
}
|
||||
|
||||
|
||||
int read_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_UNSIGNED_INT8 };
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
// 0 to 255 for unsigned image data
|
||||
return read_image( device, context, queue, numElements, readKernelCode[2], readKernelName[2], image_format_desc );
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user