Fix double release of object in test_api and test_gl (#1287)

* Fix clang format only

* Fix double release of objects
This commit is contained in:
Grzegorz Wawiorko
2021-07-21 09:50:22 +02:00
committed by GitHub
parent b500da5fbc
commit 12637114ac
3 changed files with 575 additions and 449 deletions

View File

@@ -52,12 +52,7 @@ int test_context_destructor_callback(cl_device_id deviceID, cl_context context,
test_error(error, "Unable to set destructor callback"); test_error(error, "Unable to set destructor callback");
// Now release the context, which SHOULD call the callbacks // Now release the context, which SHOULD call the callbacks
error = clReleaseContext(localContext); localContext.reset();
test_error(error, "Unable to release local context");
// Note: since we manually released the context, we need to set it to NULL
// to prevent a double-release
localContext = NULL;
// At this point, all three callbacks should have already been called // At this point, all three callbacks should have already been called
int numErrors = 0; int numErrors = 0;

View File

@@ -22,7 +22,8 @@
#endif #endif
static const char *bufferKernelPattern = static const char *bufferKernelPattern =
"__kernel void sample_test( __global %s%s *source, __global %s%s *clDest, __global %s%s *glDest )\n" "__kernel void sample_test( __global %s%s *source, __global %s%s *clDest, "
"__global %s%s *glDest )\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
" clDest[ tid ] = source[ tid ] + (%s%s)(1);\n" " clDest[ tid ] = source[ tid ] + (%s%s)(1);\n"
@@ -30,8 +31,7 @@ static const char *bufferKernelPattern =
"}\n"; "}\n";
#define TYPE_CASE(enum, type, range, offset) \ #define TYPE_CASE(enum, type, range, offset) \
case enum: \ case enum: { \
{ \
cl_##type *ptr = (cl_##type *)outData; \ cl_##type *ptr = (cl_##type *)outData; \
for (i = 0; i < count; i++) \ for (i = 0; i < count; i++) \
ptr[i] = (cl_##type)((genrand_int32(d) & range) - offset); \ ptr[i] = (cl_##type)((genrand_int32(d) & range) - offset); \
@@ -44,8 +44,7 @@ void gen_input_data( ExplicitType type, size_t count, MTdata d, void *outData )
switch (type) switch (type)
{ {
case kBool: case kBool: {
{
bool *boolPtr = (bool *)outData; bool *boolPtr = (bool *)outData;
for (i = 0; i < count; i++) for (i = 0; i < count; i++)
{ {
@@ -61,28 +60,27 @@ void gen_input_data( ExplicitType type, size_t count, MTdata d, void *outData )
TYPE_CASE(kInt, int, 0x0fffffff, 0x70000000) TYPE_CASE(kInt, int, 0x0fffffff, 0x70000000)
TYPE_CASE(kUInt, uint, 0x0fffffff, 0) TYPE_CASE(kUInt, uint, 0x0fffffff, 0)
case kLong: case kLong: {
{
cl_long *longPtr = (cl_long *)outData; cl_long *longPtr = (cl_long *)outData;
for (i = 0; i < count; i++) for (i = 0; i < count; i++)
{ {
longPtr[i] = (cl_long)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 ); longPtr[i] = (cl_long)genrand_int32(d)
| ((cl_ulong)genrand_int32(d) << 32);
} }
break; break;
} }
case kULong: case kULong: {
{
cl_ulong *ulongPtr = (cl_ulong *)outData; cl_ulong *ulongPtr = (cl_ulong *)outData;
for (i = 0; i < count; i++) for (i = 0; i < count; i++)
{ {
ulongPtr[i] = (cl_ulong)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 ); ulongPtr[i] = (cl_ulong)genrand_int32(d)
| ((cl_ulong)genrand_int32(d) << 32);
} }
break; break;
} }
case kFloat: case kFloat: {
{
cl_float *floatPtr = (float *)outData; cl_float *floatPtr = (float *)outData;
for (i = 0; i < count; i++) for (i = 0; i < count; i++)
floatPtr[i] = get_random_float(-100000.f, 100000.f, d); floatPtr[i] = get_random_float(-100000.f, 100000.f, d);
@@ -90,14 +88,14 @@ void gen_input_data( ExplicitType type, size_t count, MTdata d, void *outData )
} }
default: default:
log_error( "ERROR: Invalid type passed in to generate_random_data!\n" ); log_error(
"ERROR: Invalid type passed in to generate_random_data!\n");
break; break;
} }
} }
#define INC_CASE(enum, type) \ #define INC_CASE(enum, type) \
case enum: \ case enum: { \
{ \
cl_##type *src = (cl_##type *)inData; \ cl_##type *src = (cl_##type *)inData; \
cl_##type *dst = (cl_##type *)outData; \ cl_##type *dst = (cl_##type *)outData; \
*dst = *src + 1; \ *dst = *src + 1; \
@@ -117,19 +115,21 @@ void get_incremented_value( void *inData, void *outData, ExplicitType type )
INC_CASE(kLong, long) INC_CASE(kLong, long)
INC_CASE(kULong, ulong) INC_CASE(kULong, ulong)
INC_CASE(kFloat, float) INC_CASE(kFloat, float)
default: default: break;
break;
} }
} }
int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType vecType, size_t vecSize, int numElements, int validate_only, MTdata d) int test_buffer_kernel(cl_context context, cl_command_queue queue,
ExplicitType vecType, size_t vecSize, int numElements,
int validate_only, MTdata d)
{ {
clProgramWrapper program; clProgramWrapper program;
clKernelWrapper kernel; clKernelWrapper kernel;
clMemWrapper streams[3]; clMemWrapper streams[3];
size_t dataSize = numElements * 16 * sizeof(cl_long); size_t dataSize = numElements * 16 * sizeof(cl_long);
#if !(defined(_WIN32) && defined(_MSC_VER)) #if !(defined(_WIN32) && defined(_MSC_VER))
cl_long inData[numElements * 16], outDataCL[numElements * 16], outDataGL[ numElements * 16 ]; cl_long inData[numElements * 16], outDataCL[numElements * 16],
outDataGL[numElements * 16];
#else #else
cl_long *inData = (cl_long *)_malloca(dataSize); cl_long *inData = (cl_long *)_malloca(dataSize);
cl_long *outDataCL = (cl_long *)_malloca(dataSize); cl_long *outDataCL = (cl_long *)_malloca(dataSize);
@@ -151,15 +151,16 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType
else else
sprintf(sizeName, "%d", (int)vecSize); sprintf(sizeName, "%d", (int)vecSize);
sprintf( kernelSource, bufferKernelPattern, get_explicit_type_name( vecType ), sizeName, sprintf(kernelSource, bufferKernelPattern, get_explicit_type_name(vecType),
get_explicit_type_name( vecType ), sizeName, sizeName, get_explicit_type_name(vecType), sizeName,
get_explicit_type_name(vecType), sizeName, get_explicit_type_name(vecType), sizeName,
get_explicit_type_name(vecType), sizeName, get_explicit_type_name(vecType), sizeName,
get_explicit_type_name(vecType), sizeName); get_explicit_type_name(vecType), sizeName);
/* Create kernels */ /* Create kernels */
programPtr = kernelSource; programPtr = kernelSource;
if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) ) if (create_single_kernel_helper(context, &program, &kernel, 1,
(const char **)&programPtr, "sample_test"))
{ {
return -1; return -1;
} }
@@ -178,8 +179,8 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType
glBindBuffer(GL_ARRAY_BUFFER, inGLBuffer); glBindBuffer(GL_ARRAY_BUFFER, inGLBuffer);
glBufferData(GL_ARRAY_BUFFER, bufferSize, inData, GL_STATIC_DRAW); glBufferData(GL_ARRAY_BUFFER, bufferSize, inData, GL_STATIC_DRAW);
// Note: we need to bind the output buffer, even though we don't care about its values yet, // Note: we need to bind the output buffer, even though we don't care about
// because CL needs it to get the buffer size // its values yet, because CL needs it to get the buffer size
glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer); glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer);
glBufferData(GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW); glBufferData(GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW);
@@ -187,29 +188,37 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType
glFinish(); glFinish();
/* Generate some streams. The first and last ones are GL, middle one just vanilla CL */ /* Generate some streams. The first and last ones are GL, middle one just
streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_ONLY, inGLBuffer, &error ); * vanilla CL */
streams[0] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_ONLY,
inGLBuffer, &error);
test_error(error, "Unable to create input GL buffer"); test_error(error, "Unable to create input GL buffer");
streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, bufferSize, NULL, &error ); streams[1] =
clCreateBuffer(context, CL_MEM_READ_WRITE, bufferSize, NULL, &error);
test_error(error, "Unable to create output CL buffer"); test_error(error, "Unable to create output CL buffer");
streams[ 2 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_WRITE_ONLY, outGLBuffer, &error ); streams[2] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_WRITE_ONLY,
outGLBuffer, &error);
test_error(error, "Unable to create output GL buffer"); test_error(error, "Unable to create output GL buffer");
/* Validate the info */ /* Validate the info */
if (validate_only) { if (validate_only)
int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER, (GLuint)inGLBuffer, (GLenum)0, 0) | {
CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER, (GLuint)outGLBuffer, (GLenum)0, 0) ); int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER,
(GLuint)inGLBuffer, (GLenum)0, 0)
| CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER,
(GLuint)outGLBuffer, (GLenum)0, 0));
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
{ {
clReleaseMemObject(streams[i]); streams[i].reset();
streams[i] = NULL;
} }
glDeleteBuffers(1, &inGLBuffer); inGLBuffer = 0; glDeleteBuffers(1, &inGLBuffer);
glDeleteBuffers(1, &outGLBuffer); outGLBuffer = 0; inGLBuffer = 0;
glDeleteBuffers(1, &outGLBuffer);
outGLBuffer = 0;
return result; return result;
} }
@@ -220,27 +229,35 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType
error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]);
test_error(error, "Unable to set kernel arguments"); test_error(error, "Unable to set kernel arguments");
} }
error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL); error =
(*clEnqueueAcquireGLObjects_ptr)(queue, 1, &streams[0], 0, NULL, NULL);
test_error(error, "Unable to acquire GL obejcts"); test_error(error, "Unable to acquire GL obejcts");
error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 2 ], 0, NULL, NULL); error =
(*clEnqueueAcquireGLObjects_ptr)(queue, 1, &streams[2], 0, NULL, NULL);
test_error(error, "Unable to acquire GL obejcts"); test_error(error, "Unable to acquire GL obejcts");
/* Run the kernel */ /* Run the kernel */
threads[0] = numElements; threads[0] = numElements;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); error = get_max_common_work_group_size(context, kernel, threads[0],
&localThreads[0]);
test_error(error, "Unable to get work group size to use"); test_error(error, "Unable to get work group size to use");
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
localThreads, 0, NULL, NULL);
test_error(error, "Unable to execute test kernel"); test_error(error, "Unable to execute test kernel");
error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL ); error =
(*clEnqueueReleaseGLObjects_ptr)(queue, 1, &streams[0], 0, NULL, NULL);
test_error(error, "clEnqueueReleaseGLObjects failed"); test_error(error, "clEnqueueReleaseGLObjects failed");
error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 2 ], 0, NULL, NULL ); error =
(*clEnqueueReleaseGLObjects_ptr)(queue, 1, &streams[2], 0, NULL, NULL);
test_error(error, "clEnqueueReleaseGLObjects failed"); test_error(error, "clEnqueueReleaseGLObjects failed");
// Get the results from both CL and GL and make sure everything looks correct // Get the results from both CL and GL and make sure everything looks
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, bufferSize, outDataCL, 0, NULL, NULL ); // correct
error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, bufferSize,
outDataCL, 0, NULL, NULL);
test_error(error, "Unable to read output CL array!"); test_error(error, "Unable to read output CL array!");
glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer); glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer);
@@ -248,7 +265,8 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType
memcpy(outDataGL, glMem, bufferSize); memcpy(outDataGL, glMem, bufferSize);
glUnmapBuffer(GL_ARRAY_BUFFER); glUnmapBuffer(GL_ARRAY_BUFFER);
char *inP = (char *)inData, *glP = (char *)outDataGL, *clP = (char *)outDataCL; char *inP = (char *)inData, *glP = (char *)outDataGL,
*clP = (char *)outDataCL;
error = 0; error = 0;
for (size_t i = 0; i < numElements * vecSize; i++) for (size_t i = 0; i < numElements * vecSize; i++)
{ {
@@ -259,25 +277,42 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType
if (memcmp(clP, &expectedCLValue, get_explicit_type_size(vecType)) != 0) if (memcmp(clP, &expectedCLValue, get_explicit_type_size(vecType)) != 0)
{ {
char scratch[64]; char scratch[64];
log_error( "ERROR: Data sample %d from the CL output did not validate!\n", (int)i ); log_error(
log_error( "\t Input: %s\n", GetDataVectorString( inP, get_explicit_type_size( vecType ), 1, scratch ) ); "ERROR: Data sample %d from the CL output did not validate!\n",
log_error( "\tExpected: %s\n", GetDataVectorString( &expectedCLValue, get_explicit_type_size( vecType ), 1, scratch ) ); (int)i);
log_error( "\t Actual: %s\n", GetDataVectorString( clP, get_explicit_type_size( vecType ), 1, scratch ) ); log_error("\t Input: %s\n",
GetDataVectorString(inP, get_explicit_type_size(vecType),
1, scratch));
log_error("\tExpected: %s\n",
GetDataVectorString(&expectedCLValue,
get_explicit_type_size(vecType), 1,
scratch));
log_error("\t Actual: %s\n",
GetDataVectorString(clP, get_explicit_type_size(vecType),
1, scratch));
error = -1; error = -1;
} }
if (memcmp(glP, &expectedGLValue, get_explicit_type_size(vecType)) != 0) if (memcmp(glP, &expectedGLValue, get_explicit_type_size(vecType)) != 0)
{ {
char scratch[64]; char scratch[64];
log_error( "ERROR: Data sample %d from the GL output did not validate!\n", (int)i ); log_error(
log_error( "\t Input: %s\n", GetDataVectorString( inP, get_explicit_type_size( vecType ), 1, scratch ) ); "ERROR: Data sample %d from the GL output did not validate!\n",
log_error( "\tExpected: %s\n", GetDataVectorString( &expectedGLValue, get_explicit_type_size( vecType ), 1, scratch ) ); (int)i);
log_error( "\t Actual: %s\n", GetDataVectorString( glP, get_explicit_type_size( vecType ), 1, scratch ) ); log_error("\t Input: %s\n",
GetDataVectorString(inP, get_explicit_type_size(vecType),
1, scratch));
log_error("\tExpected: %s\n",
GetDataVectorString(&expectedGLValue,
get_explicit_type_size(vecType), 1,
scratch));
log_error("\t Actual: %s\n",
GetDataVectorString(glP, get_explicit_type_size(vecType),
1, scratch));
error = -1; error = -1;
} }
if( error ) if (error) return error;
return error;
inP += get_explicit_type_size(vecType); inP += get_explicit_type_size(vecType);
glP += get_explicit_type_size(vecType); glP += get_explicit_type_size(vecType);
@@ -286,19 +321,24 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
{ {
clReleaseMemObject(streams[i]); streams[i].reset();
streams[i] = NULL;
} }
glDeleteBuffers(1, &inGLBuffer); inGLBuffer = 0; glDeleteBuffers(1, &inGLBuffer);
glDeleteBuffers(1, &outGLBuffer); outGLBuffer = 0; inGLBuffer = 0;
glDeleteBuffers(1, &outGLBuffer);
outGLBuffer = 0;
return 0; return 0;
} }
int test_buffers( cl_device_id device, cl_context context, cl_command_queue queue, int numElements ) int test_buffers(cl_device_id device, cl_context context,
cl_command_queue queue, int numElements)
{ {
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kNumExplicitTypes }; ExplicitType vecType[] = {
kChar, kUChar, kShort, kUShort, kInt,
kUInt, kLong, kULong, kFloat, kNumExplicitTypes
};
unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 }; unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
unsigned int index, typeIndex; unsigned int index, typeIndex;
int retVal = 0; int retVal = 0;
@@ -310,23 +350,31 @@ int test_buffers( cl_device_id device, cl_context context, cl_command_queue queu
for (index = 0; vecSizes[index] != 0; index++) for (index = 0; vecSizes[index] != 0; index++)
{ {
// Test! // Test!
if( test_buffer_kernel( context, queue, vecType[ typeIndex ], vecSizes[ index ], numElements, 0, seed) != 0 ) if (test_buffer_kernel(context, queue, vecType[typeIndex],
vecSizes[index], numElements, 0, seed)
!= 0)
{ {
char sizeNames[][ 4 ] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" }; char sizeNames[][4] = { "", "", "2", "", "4", "", "", "", "8",
log_error( " Buffer test %s%s FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), sizeNames[ vecSizes[ index ] ] ); "", "", "", "", "", "", "", "16" };
log_error(" Buffer test %s%s FAILED\n",
get_explicit_type_name(vecType[typeIndex]),
sizeNames[vecSizes[index]]);
retVal++; retVal++;
} }
} }
} }
return retVal; return retVal;
} }
int test_buffers_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements ) int test_buffers_getinfo(cl_device_id device, cl_context context,
cl_command_queue queue, int numElements)
{ {
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kNumExplicitTypes }; ExplicitType vecType[] = {
kChar, kUChar, kShort, kUShort, kInt,
kUInt, kLong, kULong, kFloat, kNumExplicitTypes
};
unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 }; unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
unsigned int index, typeIndex; unsigned int index, typeIndex;
int retVal = 0; int retVal = 0;
@@ -338,18 +386,19 @@ int test_buffers_getinfo( cl_device_id device, cl_context context, cl_command_qu
for (index = 0; vecSizes[index] != 0; index++) for (index = 0; vecSizes[index] != 0; index++)
{ {
// Test! // Test!
if( test_buffer_kernel( context, queue, vecType[ typeIndex ], vecSizes[ index ], numElements, 1, seed ) != 0 ) if (test_buffer_kernel(context, queue, vecType[typeIndex],
vecSizes[index], numElements, 1, seed)
!= 0)
{ {
char sizeNames[][ 4 ] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" }; char sizeNames[][4] = { "", "", "2", "", "4", "", "", "", "8",
log_error( " Buffer test %s%s FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), sizeNames[ vecSizes[ index ] ] ); "", "", "", "", "", "", "", "16" };
log_error(" Buffer test %s%s FAILED\n",
get_explicit_type_name(vecType[typeIndex]),
sizeNames[vecSizes[index]]);
retVal++; retVal++;
} }
} }
} }
return retVal; return retVal;
} }

View File

@@ -49,16 +49,19 @@ glIsSyncPtr glIsSyncFunc;
typedef void(APIENTRY *glDeleteSyncPtr)(GLsync sync); typedef void(APIENTRY *glDeleteSyncPtr)(GLsync sync);
glDeleteSyncPtr glDeleteSyncFunc; glDeleteSyncPtr glDeleteSyncFunc;
typedef GLenum (APIENTRY *glClientWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout); typedef GLenum(APIENTRY *glClientWaitSyncPtr)(GLsync sync, GLbitfield flags,
GLuint64 timeout);
glClientWaitSyncPtr glClientWaitSyncFunc; glClientWaitSyncPtr glClientWaitSyncFunc;
typedef void (APIENTRY *glWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout); typedef void(APIENTRY *glWaitSyncPtr)(GLsync sync, GLbitfield flags,
GLuint64 timeout);
glWaitSyncPtr glWaitSyncFunc; glWaitSyncPtr glWaitSyncFunc;
typedef void(APIENTRY *glGetInteger64vPtr)(GLenum pname, GLint64 *params); typedef void(APIENTRY *glGetInteger64vPtr)(GLenum pname, GLint64 *params);
glGetInteger64vPtr glGetInteger64vFunc; glGetInteger64vPtr glGetInteger64vFunc;
typedef void (APIENTRY *glGetSyncivPtr)(GLsync sync,GLenum pname,GLsizei bufSize,GLsizei *length, typedef void(APIENTRY *glGetSyncivPtr)(GLsync sync, GLenum pname,
GLsizei bufSize, GLsizei *length,
GLint *values); GLint *values);
glGetSyncivPtr glGetSyncivFunc; glGetSyncivPtr glGetSyncivFunc;
@@ -69,9 +72,11 @@ static void InitSyncFns( void )
glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress("glFenceSync"); glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress("glFenceSync");
glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress("glIsSync"); glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress("glIsSync");
glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress("glDeleteSync"); glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress("glDeleteSync");
glClientWaitSyncFunc = (glClientWaitSyncPtr)glutGetProcAddress( "glClientWaitSync" ); glClientWaitSyncFunc =
(glClientWaitSyncPtr)glutGetProcAddress("glClientWaitSync");
glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress("glWaitSync"); glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress("glWaitSync");
glGetInteger64vFunc = (glGetInteger64vPtr)glutGetProcAddress( "glGetInteger64v" ); glGetInteger64vFunc =
(glGetInteger64vPtr)glutGetProcAddress("glGetInteger64v");
glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress("glGetSynciv"); glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress("glGetSynciv");
} }
#ifndef GL_ARB_sync #ifndef GL_ARB_sync
@@ -102,29 +107,34 @@ static void InitSyncFns( void )
#define USING_ARB_sync 1 #define USING_ARB_sync 1
#endif #endif
typedef cl_event (CL_API_CALL *clCreateEventFromGLsyncKHR_fn)( cl_context context, GLsync sync, cl_int *errCode_ret) ; typedef cl_event(CL_API_CALL *clCreateEventFromGLsyncKHR_fn)(
cl_context context, GLsync sync, cl_int *errCode_ret);
clCreateEventFromGLsyncKHR_fn clCreateEventFromGLsyncKHR_ptr; clCreateEventFromGLsyncKHR_fn clCreateEventFromGLsyncKHR_ptr;
static const char *updateBuffersKernel[] = { static const char *updateBuffersKernel[] = {
"__kernel void update( __global float4 * vertices, __global float4 *colors, int horizWrap, int rowIdx )\n" "__kernel void update( __global float4 * vertices, __global float4 "
"*colors, int horizWrap, int rowIdx )\n"
"{\n" "{\n"
" size_t tid = get_global_id(0);\n" " size_t tid = get_global_id(0);\n"
"\n" "\n"
" size_t xVal = ( tid & ( horizWrap - 1 ) );\n" " size_t xVal = ( tid & ( horizWrap - 1 ) );\n"
" vertices[ tid * 2 + 0 ] = (float4)( xVal, rowIdx*16.f, 0.0f, 1.f );\n" " vertices[ tid * 2 + 0 ] = (float4)( xVal, rowIdx*16.f, 0.0f, 1.f );\n"
" vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, 1.f );\n" " vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, "
"1.f );\n"
"\n" "\n"
" int rowV = rowIdx + 1;\n" " int rowV = rowIdx + 1;\n"
" colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 ) >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n" " colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 "
" //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, 1.0f, 1.0f, 1.0f );\n" ") >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n"
" //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, "
"1.0f, 1.0f, 1.0f );\n"
" colors[ tid * 2 + 1 ] = colors[ tid * 2 + 0 ];\n" " colors[ tid * 2 + 1 ] = colors[ tid * 2 + 0 ];\n"
"}\n" }; "}\n"
};
// Passthrough VertexShader // Passthrough VertexShader
static const char *vertexshader = static const char *vertexshader = "#version 150\n"
"#version 150\n"
"uniform mat4 projMatrix;\n" "uniform mat4 projMatrix;\n"
"in vec4 inPosition;\n" "in vec4 inPosition;\n"
"in vec4 inColor;\n" "in vec4 inColor;\n"
@@ -135,8 +145,7 @@ static const char *vertexshader =
"}\n"; "}\n";
// Passthrough FragmentShader // Passthrough FragmentShader
static const char *fragmentshader = static const char *fragmentshader = "#version 150\n"
"#version 150\n"
"in vec4 vertColor;\n" "in vec4 vertColor;\n"
"out vec4 outColor;\n" "out vec4 outColor;\n"
"void main (void) {\n" "void main (void) {\n"
@@ -153,7 +162,8 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
glShaderSource(vpShader, 1, (const GLchar **)&vertexshader, NULL); glShaderSource(vpShader, 1, (const GLchar **)&vertexshader, NULL);
glCompileShader(vpShader); glCompileShader(vpShader);
glGetShaderiv(vpShader, GL_INFO_LOG_LENGTH, &logLength); glGetShaderiv(vpShader, GL_INFO_LOG_LENGTH, &logLength);
if (logLength > 0) { if (logLength > 0)
{
GLchar *log = (GLchar *)malloc(logLength); GLchar *log = (GLchar *)malloc(logLength);
glGetShaderInfoLog(vpShader, logLength, &logLength, log); glGetShaderInfoLog(vpShader, logLength, &logLength, log);
log_info("Vtx Shader compile log:\n%s", log); log_info("Vtx Shader compile log:\n%s", log);
@@ -175,7 +185,8 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
glCompileShader(fpShader); glCompileShader(fpShader);
glGetShaderiv(fpShader, GL_INFO_LOG_LENGTH, &logLength); glGetShaderiv(fpShader, GL_INFO_LOG_LENGTH, &logLength);
if (logLength > 0) { if (logLength > 0)
{
GLchar *log = (GLchar *)malloc(logLength); GLchar *log = (GLchar *)malloc(logLength);
glGetShaderInfoLog(fpShader, logLength, &logLength, log); glGetShaderInfoLog(fpShader, logLength, &logLength, log);
log_info("Frag Shader compile log:\n%s", log); log_info("Frag Shader compile log:\n%s", log);
@@ -192,7 +203,8 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
glLinkProgram(program); glLinkProgram(program);
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength); glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength);
if (logLength > 0) { if (logLength > 0)
{
GLchar *log = (GLchar *)malloc(logLength); GLchar *log = (GLchar *)malloc(logLength);
glGetProgramInfoLog(program, logLength, &logLength, log); glGetProgramInfoLog(program, logLength, &logLength, log);
log_info("Program link log:\n%s", log); log_info("Program link log:\n%s", log);
@@ -227,9 +239,11 @@ void destroyShaderProgram(GLuint program)
glDeleteProgram(program); glDeleteProgram(program);
} }
// This function queues up and runs the above CL kernel that writes the vertex data // This function queues up and runs the above CL kernel that writes the vertex
cl_int run_cl_kernel( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1, // data
cl_int rowIdx, cl_event fenceEvent, size_t numThreads ) cl_int run_cl_kernel(cl_kernel kernel, cl_command_queue queue, cl_mem stream0,
cl_mem stream1, cl_int rowIdx, cl_event fenceEvent,
size_t numThreads)
{ {
cl_int error = clSetKernelArg(kernel, 3, sizeof(rowIdx), &rowIdx); cl_int error = clSetKernelArg(kernel, 3, sizeof(rowIdx), &rowIdx);
test_error(error, "Unable to set kernel arguments"); test_error(error, "Unable to set kernel arguments");
@@ -238,19 +252,24 @@ cl_int run_cl_kernel( cl_kernel kernel, cl_command_queue queue, cl_mem stream0,
int numEvents = (fenceEvent != NULL) ? 1 : 0; int numEvents = (fenceEvent != NULL) ? 1 : 0;
cl_event *fence_evt = (fenceEvent != NULL) ? &fenceEvent : NULL; cl_event *fence_evt = (fenceEvent != NULL) ? &fenceEvent : NULL;
error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream0, numEvents, fence_evt, &acqEvent1 ); error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &stream0, numEvents,
fence_evt, &acqEvent1);
test_error(error, "Unable to acquire GL obejcts"); test_error(error, "Unable to acquire GL obejcts");
error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream1, numEvents, fence_evt, &acqEvent2 ); error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &stream1, numEvents,
fence_evt, &acqEvent2);
test_error(error, "Unable to acquire GL obejcts"); test_error(error, "Unable to acquire GL obejcts");
cl_event evts[2] = { acqEvent1, acqEvent2 }; cl_event evts[2] = { acqEvent1, acqEvent2 };
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numThreads, NULL, 2, evts, &kernEvent ); error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &numThreads, NULL, 2,
evts, &kernEvent);
test_error(error, "Unable to execute test kernel"); test_error(error, "Unable to execute test kernel");
error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream0, 1, &kernEvent, &relEvent1 ); error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &stream0, 1, &kernEvent,
&relEvent1);
test_error(error, "clEnqueueReleaseGLObjects failed"); test_error(error, "clEnqueueReleaseGLObjects failed");
error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream1, 1, &kernEvent, &relEvent2 ); error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &stream1, 1, &kernEvent,
&relEvent2);
test_error(error, "clEnqueueReleaseGLObjects failed"); test_error(error, "clEnqueueReleaseGLObjects failed");
evts[0] = relEvent1; evts[0] = relEvent1;
@@ -261,10 +280,8 @@ cl_int run_cl_kernel( cl_kernel kernel, cl_command_queue queue, cl_mem stream0,
return 0; return 0;
} }
class RunThread : public genericThread class RunThread : public genericThread {
{
public: public:
cl_kernel mKernel; cl_kernel mKernel;
cl_command_queue mQueue; cl_command_queue mQueue;
cl_mem mStream0, mStream1; cl_mem mStream0, mStream1;
@@ -272,10 +289,11 @@ public:
cl_event mFenceEvent; cl_event mFenceEvent;
size_t mNumThreads; size_t mNumThreads;
RunThread( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1, size_t numThreads ) RunThread(cl_kernel kernel, cl_command_queue queue, cl_mem stream0,
: mKernel( kernel ), mQueue( queue ), mStream0( stream0 ), mStream1( stream1 ), mNumThreads( numThreads ) cl_mem stream1, size_t numThreads)
{ : mKernel(kernel), mQueue(queue), mStream0(stream0), mStream1(stream1),
} mNumThreads(numThreads)
{}
void SetRunData(cl_int rowIdx, cl_event fenceEvent) void SetRunData(cl_int rowIdx, cl_event fenceEvent)
{ {
@@ -285,13 +303,17 @@ public:
virtual void *IRun(void) virtual void *IRun(void)
{ {
cl_int error = run_cl_kernel( mKernel, mQueue, mStream0, mStream1, mRowIdx, mFenceEvent, mNumThreads ); cl_int error = run_cl_kernel(mKernel, mQueue, mStream0, mStream1,
mRowIdx, mFenceEvent, mNumThreads);
return (void *)(uintptr_t)error; return (void *)(uintptr_t)error;
} }
}; };
int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_queue queue, bool separateThreads, GLint rend_vs, GLint read_vs, cl_device_id rend_device ) int test_fence_sync_single(cl_device_id device, cl_context context,
cl_command_queue queue, bool separateThreads,
GLint rend_vs, GLint read_vs,
cl_device_id rend_device)
{ {
int error; int error;
const int framebufferSize = 512; const int framebufferSize = 512;
@@ -299,7 +321,8 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
if (!is_extension_available(device, "cl_khr_gl_event")) if (!is_extension_available(device, "cl_khr_gl_event"))
{ {
log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" ); log_info("NOTE: cl_khr_gl_event extension not present on this device; "
"skipping fence sync test\n");
return 0; return 0;
} }
@@ -312,7 +335,8 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
clGetPlatformIDs(0, NULL, &nplatforms); clGetPlatformIDs(0, NULL, &nplatforms);
clGetPlatformIDs(1, &platform, NULL); clGetPlatformIDs(1, &platform, NULL);
if (nplatforms > 1) { if (nplatforms > 1)
{
log_info("clGetPlatformIDs returned multiple values. This is not " log_info("clGetPlatformIDs returned multiple values. This is not "
"an error, but might result in obtaining incorrect function " "an error, but might result in obtaining incorrect function "
"pointers if you do not want the first returned platform.\n"); "pointers if you do not want the first returned platform.\n");
@@ -330,11 +354,16 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
free(name); free(name);
} }
clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clCreateEventFromGLsyncKHR"); clCreateEventFromGLsyncKHR_ptr =
(clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(
platform, "clCreateEventFromGLsyncKHR");
if (clCreateEventFromGLsyncKHR_ptr == NULL) if (clCreateEventFromGLsyncKHR_ptr == NULL)
{ {
log_error( "ERROR: Unable to run fence_sync test (clCreateEventFromGLsyncKHR function not discovered!)\n" ); log_error("ERROR: Unable to run fence_sync test "
clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clCreateEventFromGLsyncAPPLE"); "(clCreateEventFromGLsyncKHR function not discovered!)\n");
clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)
clGetExtensionFunctionAddressForPlatform(
platform, "clCreateEventFromGLsyncAPPLE");
return -1; return -1;
} }
@@ -344,7 +373,9 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
sscanf(gl_version_str, "%f", &glCoreVersion); sscanf(gl_version_str, "%f", &glCoreVersion);
if (glCoreVersion < 3.0f) if (glCoreVersion < 3.0f)
{ {
log_info( "OpenGL version %f does not support fence/sync! Skipping test.\n", glCoreVersion ); log_info(
"OpenGL version %f does not support fence/sync! Skipping test.\n",
glCoreVersion);
return 0; return 0;
} }
@@ -356,8 +387,11 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
CGLDescribePixelFormat(pixFmt, screen, kCGLPFAOpenGLProfile, &val); CGLDescribePixelFormat(pixFmt, screen, kCGLPFAOpenGLProfile, &val);
if (val != kCGLOGLPVersion_3_2_Core) if (val != kCGLOGLPVersion_3_2_Core)
{ {
log_error( "OpenGL context was not created with OpenGL version >= 3.0 profile even though platform supports it" log_error(
"OpenGL profile %f does not support fence/sync! Skipping test.\n", glCoreVersion ); "OpenGL context was not created with OpenGL version >= 3.0 profile "
"even though platform supports it"
"OpenGL profile %f does not support fence/sync! Skipping test.\n",
glCoreVersion);
return -1; return -1;
} }
#else #else
@@ -392,29 +426,42 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
return -1; return -1;
} }
float l = 0.0f; float r = framebufferSize; float l = 0.0f;
float b = 0.0f; float t = framebufferSize; float r = framebufferSize;
float b = 0.0f;
float t = framebufferSize;
float projMatrix[16] = { 2.0f/(r-l), 0.0f, 0.0f, 0.0f, float projMatrix[16] = { 2.0f / (r - l),
0.0f, 2.0f/(t-b), 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, -1.0f, 0.0f, 0.0f,
-(r+l)/(r-l), -(t+b)/(t-b), 0.0f, 1.0f 0.0f,
}; 0.0f,
2.0f / (t - b),
0.0f,
0.0f,
0.0f,
0.0f,
-1.0f,
0.0f,
-(r + l) / (r - l),
-(t + b) / (t - b),
0.0f,
1.0f };
glUseProgram(shaderprogram); glUseProgram(shaderprogram);
GLuint projMatLoc = glGetUniformLocation(shaderprogram, "projMatrix"); GLuint projMatLoc = glGetUniformLocation(shaderprogram, "projMatrix");
glUniformMatrix4fv(projMatLoc, 1, 0, projMatrix); glUniformMatrix4fv(projMatLoc, 1, 0, projMatrix);
glUseProgram(0); glUseProgram(0);
// Note: the framebuffer is just the target to verify our results against, so we don't // Note: the framebuffer is just the target to verify our results against,
// really care to go through all the possible formats in this case // so we don't really care to go through all the possible formats in this
// case
glFramebufferWrapper glFramebuffer; glFramebufferWrapper glFramebuffer;
glRenderbufferWrapper glRenderbuffer; glRenderbufferWrapper glRenderbuffer;
error = CreateGLRenderbufferRaw( framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT, error = CreateGLRenderbufferRaw(
GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT, GL_RGBA, GL_RGBA,
&glFramebuffer, &glRenderbuffer ); GL_UNSIGNED_INT_8_8_8_8_REV, &glFramebuffer, &glRenderbuffer);
if( error != 0 ) if (error != 0) return error;
return error;
GLuint vao; GLuint vao;
glGenVertexArrays(1, &vao); glGenVertexArrays(1, &vao);
@@ -427,10 +474,12 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
const int numHorizVertices = (framebufferSize * 64) + 1; const int numHorizVertices = (framebufferSize * 64) + 1;
glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer); glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer);
glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW ); glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * numHorizVertices * 2 * 4,
NULL, GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, colorBuffer); glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW ); glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * numHorizVertices * 2 * 4,
NULL, GL_STATIC_DRAW);
// Now that the requisite objects are bound, we can attempt program // Now that the requisite objects are bound, we can attempt program
// validation: // validation:
@@ -439,7 +488,8 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
GLint logLength, status; GLint logLength, status;
glGetProgramiv(shaderprogram, GL_INFO_LOG_LENGTH, &logLength); glGetProgramiv(shaderprogram, GL_INFO_LOG_LENGTH, &logLength);
if (logLength > 0) { if (logLength > 0)
{
GLchar *log = (GLchar *)malloc(logLength); GLchar *log = (GLchar *)malloc(logLength);
glGetProgramInfoLog(shaderprogram, logLength, &logLength, log); glGetProgramInfoLog(shaderprogram, logLength, &logLength, log);
log_info("Program validate log:\n%s", log); log_info("Program validate log:\n%s", log);
@@ -457,13 +507,16 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
clKernelWrapper kernel; clKernelWrapper kernel;
clMemWrapper streams[2]; clMemWrapper streams[2];
if( create_single_kernel_helper( context, &program, &kernel, 1, updateBuffersKernel, "update" ) ) if (create_single_kernel_helper(context, &program, &kernel, 1,
updateBuffersKernel, "update"))
return -1; return -1;
streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, vtxBuffer, &error ); streams[0] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_WRITE,
vtxBuffer, &error);
test_error(error, "Unable to create CL buffer from GL vertex buffer"); test_error(error, "Unable to create CL buffer from GL vertex buffer");
streams[ 1 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, colorBuffer, &error ); streams[1] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_WRITE,
colorBuffer, &error);
test_error(error, "Unable to create CL buffer from GL color buffer"); test_error(error, "Unable to create CL buffer from GL color buffer");
error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
@@ -488,38 +541,37 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
GLsync glFence = 0; GLsync glFence = 0;
// Do a loop through 8 different horizontal stripes against the framebuffer // Do a loop through 8 different horizontal stripes against the framebuffer
RunThread thread( kernel, queue, streams[ 0 ], streams[ 1 ], (size_t)numHorizVertices ); RunThread thread(kernel, queue, streams[0], streams[1],
(size_t)numHorizVertices);
for (int i = 0; i < 8; i++) for (int i = 0; i < 8; i++)
{ {
// if current rendering device is not the compute device and // if current rendering device is not the compute device and
// separateThreads == false which means compute is going on same // separateThreads == false which means compute is going on same
// thread and we are using implicit synchronization (no GLSync obj used) // thread and we are using implicit synchronization (no GLSync obj used)
// then glFlush by clEnqueueAcquireGLObject is not sufficient ... we need // then glFlush by clEnqueueAcquireGLObject is not sufficient ... we
// to wait for rendering to finish on other device before CL can start // need to wait for rendering to finish on other device before CL can
// writing to CL/GL shared mem objects. When separateThreads is true i.e. // start writing to CL/GL shared mem objects. When separateThreads is
// we are using GLSync obj to synchronize then we dont need to call glFinish // true i.e. we are using GLSync obj to synchronize then we dont need to
// here since CL should wait for rendering on other device before this // call glFinish here since CL should wait for rendering on other device
// GLSync object to finish before it starts writing to shared mem object. // before this GLSync object to finish before it starts writing to
// Also rend_device == compute_device no need to call glFinish // shared mem object. Also rend_device == compute_device no need to call
if(rend_device != device && !separateThreads) // glFinish
glFinish(); if (rend_device != device && !separateThreads) glFinish();
if (separateThreads) if (separateThreads)
{ {
if (fenceEvent != NULL)
{
clReleaseEvent(fenceEvent);
glDeleteSyncFunc(glFence); glDeleteSyncFunc(glFence);
}
glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
fenceEvent = clCreateEventFromGLsyncKHR_ptr(context, glFence, &error); fenceEvent =
clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
test_error(error, "Unable to create CL event from GL fence"); test_error(error, "Unable to create CL event from GL fence");
// in case of explicit synchronization, we just wait for the sync object to complete // in case of explicit synchronization, we just wait for the sync
// in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility // object to complete in clEnqueueAcquireGLObject but we dont flush.
// to flush on the context on which glSync is created // Its application's responsibility to flush on the context on which
// glSync is created
glFlush(); glFlush();
thread.SetRunData((cl_int)i, fenceEvent); thread.SetRunData((cl_int)i, fenceEvent);
@@ -529,7 +581,9 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
} }
else else
{ {
error = run_cl_kernel( kernel, queue, streams[ 0 ], streams[ 1 ], (cl_int)i, fenceEvent, (size_t)numHorizVertices ); error =
run_cl_kernel(kernel, queue, streams[0], streams[1], (cl_int)i,
fenceEvent, (size_t)numHorizVertices);
} }
test_error(error, "Unable to run CL kernel"); test_error(error, "Unable to run CL kernel");
@@ -537,9 +591,11 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
glEnableVertexAttribArray(posLoc); glEnableVertexAttribArray(posLoc);
glEnableVertexAttribArray(colLoc); glEnableVertexAttribArray(colLoc);
glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer); glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer);
glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0); glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE,
4 * sizeof(GLfloat), 0);
glBindBuffer(GL_ARRAY_BUFFER, colorBuffer); glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0); glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE,
4 * sizeof(GLfloat), 0);
glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ARRAY_BUFFER, 0);
glDrawArrays(GL_TRIANGLE_STRIP, 0, numHorizVertices * 2); glDrawArrays(GL_TRIANGLE_STRIP, 0, numHorizVertices * 2);
@@ -550,21 +606,20 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
if (separateThreads) if (separateThreads)
{ {
// If we're on the same thread, then we're testing implicit syncing, so we // If we're on the same thread, then we're testing implicit syncing,
// don't need the actual fence code // so we don't need the actual fence code
if( fenceEvent != NULL )
{
clReleaseEvent( fenceEvent );
glDeleteSyncFunc(glFence); glDeleteSyncFunc(glFence);
}
glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
fenceEvent = clCreateEventFromGLsyncKHR_ptr( context, glFence, &error ); fenceEvent =
clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
test_error(error, "Unable to create CL event from GL fence"); test_error(error, "Unable to create CL event from GL fence");
// in case of explicit synchronization, we just wait for the sync object to complete // in case of explicit synchronization, we just wait for the sync
// in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility // object to complete in clEnqueueAcquireGLObject but we dont flush.
// to flush on the context on which glSync is created // Its application's responsibility to flush on the context on which
// glSync is created
glFlush(); glFlush();
} }
else else
@@ -572,7 +627,8 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
} }
if (glFence != 0) if (glFence != 0)
// Don't need the final release for fenceEvent, because the wrapper will take care of that // Don't need the final release for fenceEvent, because the wrapper will
// take care of that
glDeleteSyncFunc(glFence); glDeleteSyncFunc(glFence);
#ifdef __APPLE__ #ifdef __APPLE__
@@ -585,43 +641,52 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
#endif #endif
#endif #endif
// Grab the contents of the final framebuffer // Grab the contents of the final framebuffer
BufferOwningPtr<char> resultData( ReadGLRenderbuffer( glFramebuffer, glRenderbuffer, BufferOwningPtr<char> resultData(ReadGLRenderbuffer(
GL_COLOR_ATTACHMENT0_EXT, glFramebuffer, glRenderbuffer, GL_COLOR_ATTACHMENT0_EXT, GL_RGBA,
GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar, framebufferSize, 128));
framebufferSize, 128 ) );
// Check the contents now. We should end up with solid color bands 32 pixels high and the // Check the contents now. We should end up with solid color bands 32 pixels
// full width of the framebuffer, at values (128,128,128) due to the additive blending // high and the full width of the framebuffer, at values (128,128,128) due
// to the additive blending
for (int i = 0; i < 8; i++) for (int i = 0; i < 8; i++)
{ {
for (int y = 0; y < 4; y++) for (int y = 0; y < 4; y++)
{ {
// Note: coverage will be double because the 63-0 triangle overwrites again at the end of the pass // Note: coverage will be double because the 63-0 triangle
cl_uchar valA = ( ( ( i + 1 ) & 1 ) ) * numHorizVertices * 2 / framebufferSize; // overwrites again at the end of the pass
cl_uchar valB = ( ( ( i + 1 ) & 2 ) >> 1 ) * numHorizVertices * 2 / framebufferSize; cl_uchar valA =
cl_uchar valC = ( ( ( i + 1 ) & 4 ) >> 2 ) * numHorizVertices * 2 / framebufferSize; (((i + 1) & 1)) * numHorizVertices * 2 / framebufferSize;
cl_uchar valB =
(((i + 1) & 2) >> 1) * numHorizVertices * 2 / framebufferSize;
cl_uchar valC =
(((i + 1) & 4) >> 2) * numHorizVertices * 2 / framebufferSize;
cl_uchar *row = (cl_uchar *)&resultData[ ( i * 16 + y ) * framebufferSize * 4 ]; cl_uchar *row =
(cl_uchar *)&resultData[(i * 16 + y) * framebufferSize * 4];
for (int x = 0; x < (framebufferSize - 1) - 1; x++) for (int x = 0; x < (framebufferSize - 1) - 1; x++)
{ {
if( ( row[ x * 4 ] != valA ) || ( row[ x * 4 + 1 ] != valB ) || if ((row[x * 4] != valA) || (row[x * 4 + 1] != valB)
( row[ x * 4 + 2 ] != valC ) ) || (row[x * 4 + 2] != valC))
{ {
log_error("ERROR: Output framebuffer did not validate!\n"); log_error("ERROR: Output framebuffer did not validate!\n");
DumpGLBuffer( GL_UNSIGNED_BYTE, framebufferSize, 128, resultData ); DumpGLBuffer(GL_UNSIGNED_BYTE, framebufferSize, 128,
resultData);
log_error("RUNS:\n"); log_error("RUNS:\n");
uint32_t *p = (uint32_t *)(char *)resultData; uint32_t *p = (uint32_t *)(char *)resultData;
size_t a = 0; size_t a = 0;
for( size_t t = 1; t < framebufferSize * framebufferSize; t++ ) for (size_t t = 1; t < framebufferSize * framebufferSize;
t++)
{ {
if (p[a] != 0) if (p[a] != 0)
{ {
if (p[t] == 0) if (p[t] == 0)
{ {
log_error( "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n", a, t - 1, log_error(
(int)( a % framebufferSize ), (int)( a / framebufferSize ), "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n",
(int)( ( t - 1 ) % framebufferSize ), (int)( ( t - 1 ) / framebufferSize ), a, t - 1, (int)(a % framebufferSize),
p[ a ] ); (int)(a / framebufferSize),
(int)((t - 1) % framebufferSize),
(int)((t - 1) / framebufferSize), p[a]);
a = t; a = t;
} }
} }
@@ -632,7 +697,6 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
a = t; a = t;
} }
} }
} }
return -1; return -1;
} }
@@ -645,28 +709,35 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
return 0; return 0;
} }
int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue queue, int numElements ) int test_fence_sync(cl_device_id device, cl_context context,
cl_command_queue queue, int numElements)
{ {
GLint vs_count = 0; GLint vs_count = 0;
cl_device_id *device_list = NULL; cl_device_id *device_list = NULL;
if (!is_extension_available(device, "cl_khr_gl_event")) if (!is_extension_available(device, "cl_khr_gl_event"))
{ {
log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" ); log_info("NOTE: cl_khr_gl_event extension not present on this device; "
"skipping fence sync test\n");
return 0; return 0;
} }
#ifdef __APPLE__ #ifdef __APPLE__
CGLContextObj ctx = CGLGetCurrentContext(); CGLContextObj ctx = CGLGetCurrentContext();
CGLPixelFormatObj pix = CGLGetPixelFormat(ctx); CGLPixelFormatObj pix = CGLGetPixelFormat(ctx);
CGLError err = CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count); CGLError err =
CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count);
device_list = (cl_device_id *)malloc(sizeof(cl_device_id) * vs_count); device_list = (cl_device_id *)malloc(sizeof(cl_device_id) * vs_count);
clGetGLContextInfoAPPLE(context, ctx, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, sizeof(cl_device_id)*vs_count, device_list, NULL); clGetGLContextInfoAPPLE(context, ctx,
CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE,
sizeof(cl_device_id) * vs_count, device_list, NULL);
#else #else
// Need platform specific way of getting devices from CL context to which OpenGL can render // Need platform specific way of getting devices from CL context to which
// If not available it can be replaced with clGetContextInfo with CL_CONTEXT_DEVICES // OpenGL can render If not available it can be replaced with
// clGetContextInfo with CL_CONTEXT_DEVICES
size_t device_cb; size_t device_cb;
cl_int err = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &device_cb); cl_int err =
clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &device_cb);
if (err != CL_SUCCESS) if (err != CL_SUCCESS)
{ {
print_error(err, "Unable to get device count from context"); print_error(err, "Unable to get device count from context");
@@ -674,14 +745,17 @@ int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue q
} }
vs_count = (GLint)device_cb / sizeof(cl_device_id); vs_count = (GLint)device_cb / sizeof(cl_device_id);
if (vs_count < 1) { if (vs_count < 1)
{
log_error("No devices found.\n"); log_error("No devices found.\n");
return -1; return -1;
} }
device_list = (cl_device_id *)malloc(device_cb); device_list = (cl_device_id *)malloc(device_cb);
err = clGetContextInfo( context, CL_CONTEXT_DEVICES, device_cb, device_list, NULL); err = clGetContextInfo(context, CL_CONTEXT_DEVICES, device_cb, device_list,
if( err != CL_SUCCESS ) { NULL);
if (err != CL_SUCCESS)
{
free(device_list); free(device_list);
print_error(err, "Unable to get device list from context"); print_error(err, "Unable to get device list from context");
return -1; return -1;
@@ -701,24 +775,32 @@ int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue q
// compute target // compute target
for (read_vs = 0; read_vs < vs_count; read_vs++) for (read_vs = 0; read_vs < vs_count; read_vs++)
{ {
cl_device_id rend_device = device_list[rend_vs], read_device = device_list[read_vs]; cl_device_id rend_device = device_list[rend_vs],
read_device = device_list[read_vs];
char rend_name[200], read_name[200]; char rend_name[200], read_name[200];
clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name), rend_name, NULL); clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name),
clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name), read_name, NULL); rend_name, NULL);
clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name),
read_name, NULL);
log_info("Rendering on: %s, read back on: %s\n", rend_name, read_name); log_info("Rendering on: %s, read back on: %s\n", rend_name,
error = test_fence_sync_single( device, context, queue, false, rend_vs, read_vs, rend_device ); read_name);
error = test_fence_sync_single(device, context, queue, false,
rend_vs, read_vs, rend_device);
any_failed |= error; any_failed |= error;
if (error != 0) if (error != 0)
log_error( "ERROR: Implicit syncing with GL sync events failed!\n\n" ); log_error(
"ERROR: Implicit syncing with GL sync events failed!\n\n");
else else
log_info("Implicit syncing Passed\n"); log_info("Implicit syncing Passed\n");
error = test_fence_sync_single( device, context, queue, true, rend_vs, read_vs, rend_device ); error = test_fence_sync_single(device, context, queue, true,
rend_vs, read_vs, rend_device);
any_failed |= error; any_failed |= error;
if (error != 0) if (error != 0)
log_error( "ERROR: Explicit syncing with GL sync events failed!\n\n" ); log_error(
"ERROR: Explicit syncing with GL sync events failed!\n\n");
else else
log_info("Explicit syncing Passed\n"); log_info("Explicit syncing Passed\n");
} }