mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Fix double release of object in test_api and test_gl (#1287)
* Fix clang format only * Fix double release of objects
This commit is contained in:
committed by
GitHub
parent
b500da5fbc
commit
12637114ac
@@ -1,6 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@@ -17,126 +17,126 @@
|
||||
#include "harness/conversions.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
|
||||
#if !defined (__APPLE__)
|
||||
#include <CL/cl_gl.h>
|
||||
#if !defined(__APPLE__)
|
||||
#include <CL/cl_gl.h>
|
||||
#endif
|
||||
|
||||
static const char *bufferKernelPattern =
|
||||
"__kernel void sample_test( __global %s%s *source, __global %s%s *clDest, __global %s%s *glDest )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" clDest[ tid ] = source[ tid ] + (%s%s)(1);\n"
|
||||
" glDest[ tid ] = source[ tid ] + (%s%s)(2);\n"
|
||||
"}\n";
|
||||
"__kernel void sample_test( __global %s%s *source, __global %s%s *clDest, "
|
||||
"__global %s%s *glDest )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" clDest[ tid ] = source[ tid ] + (%s%s)(1);\n"
|
||||
" glDest[ tid ] = source[ tid ] + (%s%s)(2);\n"
|
||||
"}\n";
|
||||
|
||||
#define TYPE_CASE( enum, type, range, offset ) \
|
||||
case enum: \
|
||||
{ \
|
||||
cl_##type *ptr = (cl_##type *)outData; \
|
||||
for( i = 0; i < count; i++ ) \
|
||||
ptr[ i ] = (cl_##type)( ( genrand_int32(d) & range ) - offset ); \
|
||||
break; \
|
||||
#define TYPE_CASE(enum, type, range, offset) \
|
||||
case enum: { \
|
||||
cl_##type *ptr = (cl_##type *)outData; \
|
||||
for (i = 0; i < count; i++) \
|
||||
ptr[i] = (cl_##type)((genrand_int32(d) & range) - offset); \
|
||||
break; \
|
||||
}
|
||||
|
||||
void gen_input_data( ExplicitType type, size_t count, MTdata d, void *outData )
|
||||
void gen_input_data(ExplicitType type, size_t count, MTdata d, void *outData)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
switch( type )
|
||||
switch (type)
|
||||
{
|
||||
case kBool:
|
||||
{
|
||||
case kBool: {
|
||||
bool *boolPtr = (bool *)outData;
|
||||
for( i = 0; i < count; i++ )
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
boolPtr[i] = ( genrand_int32(d) & 1 ) ? true : false;
|
||||
boolPtr[i] = (genrand_int32(d) & 1) ? true : false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
TYPE_CASE( kChar, char, 250, 127 )
|
||||
TYPE_CASE( kUChar, uchar, 250, 0 )
|
||||
TYPE_CASE( kShort, short, 65530, 32767 )
|
||||
TYPE_CASE( kUShort, ushort, 65530, 0 )
|
||||
TYPE_CASE( kInt, int, 0x0fffffff, 0x70000000 )
|
||||
TYPE_CASE( kUInt, uint, 0x0fffffff, 0 )
|
||||
TYPE_CASE(kChar, char, 250, 127)
|
||||
TYPE_CASE(kUChar, uchar, 250, 0)
|
||||
TYPE_CASE(kShort, short, 65530, 32767)
|
||||
TYPE_CASE(kUShort, ushort, 65530, 0)
|
||||
TYPE_CASE(kInt, int, 0x0fffffff, 0x70000000)
|
||||
TYPE_CASE(kUInt, uint, 0x0fffffff, 0)
|
||||
|
||||
case kLong:
|
||||
{
|
||||
case kLong: {
|
||||
cl_long *longPtr = (cl_long *)outData;
|
||||
for( i = 0; i < count; i++ )
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
longPtr[i] = (cl_long)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 );
|
||||
longPtr[i] = (cl_long)genrand_int32(d)
|
||||
| ((cl_ulong)genrand_int32(d) << 32);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case kULong:
|
||||
{
|
||||
case kULong: {
|
||||
cl_ulong *ulongPtr = (cl_ulong *)outData;
|
||||
for( i = 0; i < count; i++ )
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
ulongPtr[i] = (cl_ulong)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 );
|
||||
ulongPtr[i] = (cl_ulong)genrand_int32(d)
|
||||
| ((cl_ulong)genrand_int32(d) << 32);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case kFloat:
|
||||
{
|
||||
case kFloat: {
|
||||
cl_float *floatPtr = (float *)outData;
|
||||
for( i = 0; i < count; i++ )
|
||||
floatPtr[i] = get_random_float( -100000.f, 100000.f, d );
|
||||
for (i = 0; i < count; i++)
|
||||
floatPtr[i] = get_random_float(-100000.f, 100000.f, d);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
log_error( "ERROR: Invalid type passed in to generate_random_data!\n" );
|
||||
log_error(
|
||||
"ERROR: Invalid type passed in to generate_random_data!\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#define INC_CASE( enum, type ) \
|
||||
case enum: \
|
||||
{ \
|
||||
cl_##type *src = (cl_##type *)inData; \
|
||||
cl_##type *dst = (cl_##type *)outData; \
|
||||
*dst = *src + 1; \
|
||||
break; \
|
||||
#define INC_CASE(enum, type) \
|
||||
case enum: { \
|
||||
cl_##type *src = (cl_##type *)inData; \
|
||||
cl_##type *dst = (cl_##type *)outData; \
|
||||
*dst = *src + 1; \
|
||||
break; \
|
||||
}
|
||||
|
||||
void get_incremented_value( void *inData, void *outData, ExplicitType type )
|
||||
void get_incremented_value(void *inData, void *outData, ExplicitType type)
|
||||
{
|
||||
switch( type )
|
||||
switch (type)
|
||||
{
|
||||
INC_CASE( kChar, char )
|
||||
INC_CASE( kUChar, uchar )
|
||||
INC_CASE( kShort, short )
|
||||
INC_CASE( kUShort, ushort )
|
||||
INC_CASE( kInt, int )
|
||||
INC_CASE( kUInt, uint )
|
||||
INC_CASE( kLong, long )
|
||||
INC_CASE( kULong, ulong )
|
||||
INC_CASE( kFloat, float )
|
||||
default:
|
||||
break;
|
||||
INC_CASE(kChar, char)
|
||||
INC_CASE(kUChar, uchar)
|
||||
INC_CASE(kShort, short)
|
||||
INC_CASE(kUShort, ushort)
|
||||
INC_CASE(kInt, int)
|
||||
INC_CASE(kUInt, uint)
|
||||
INC_CASE(kLong, long)
|
||||
INC_CASE(kULong, ulong)
|
||||
INC_CASE(kFloat, float)
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType vecType, size_t vecSize, int numElements, int validate_only, MTdata d)
|
||||
int test_buffer_kernel(cl_context context, cl_command_queue queue,
|
||||
ExplicitType vecType, size_t vecSize, int numElements,
|
||||
int validate_only, MTdata d)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 3 ];
|
||||
clMemWrapper streams[3];
|
||||
size_t dataSize = numElements * 16 * sizeof(cl_long);
|
||||
#if !(defined(_WIN32) && defined(_MSC_VER))
|
||||
cl_long inData[numElements * 16], outDataCL[numElements * 16], outDataGL[ numElements * 16 ];
|
||||
cl_long inData[numElements * 16], outDataCL[numElements * 16],
|
||||
outDataGL[numElements * 16];
|
||||
#else
|
||||
cl_long* inData = (cl_long*)_malloca(dataSize);
|
||||
cl_long* outDataCL = (cl_long*)_malloca(dataSize);
|
||||
cl_long* outDataGL = (cl_long*)_malloca(dataSize);
|
||||
cl_long *inData = (cl_long *)_malloca(dataSize);
|
||||
cl_long *outDataCL = (cl_long *)_malloca(dataSize);
|
||||
cl_long *outDataGL = (cl_long *)_malloca(dataSize);
|
||||
#endif
|
||||
glBufferWrapper inGLBuffer, outGLBuffer;
|
||||
int i;
|
||||
int i;
|
||||
size_t bufferSize;
|
||||
|
||||
int error;
|
||||
@@ -146,210 +146,259 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType
|
||||
char sizeName[4];
|
||||
|
||||
/* Create the source */
|
||||
if( vecSize == 1 )
|
||||
sizeName[ 0 ] = 0;
|
||||
if (vecSize == 1)
|
||||
sizeName[0] = 0;
|
||||
else
|
||||
sprintf( sizeName, "%d", (int)vecSize );
|
||||
sprintf(sizeName, "%d", (int)vecSize);
|
||||
|
||||
sprintf( kernelSource, bufferKernelPattern, get_explicit_type_name( vecType ), sizeName,
|
||||
get_explicit_type_name( vecType ), sizeName,
|
||||
get_explicit_type_name( vecType ), sizeName,
|
||||
get_explicit_type_name( vecType ), sizeName,
|
||||
get_explicit_type_name( vecType ), sizeName );
|
||||
sprintf(kernelSource, bufferKernelPattern, get_explicit_type_name(vecType),
|
||||
sizeName, get_explicit_type_name(vecType), sizeName,
|
||||
get_explicit_type_name(vecType), sizeName,
|
||||
get_explicit_type_name(vecType), sizeName,
|
||||
get_explicit_type_name(vecType), sizeName);
|
||||
|
||||
/* Create kernels */
|
||||
programPtr = kernelSource;
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
|
||||
if (create_single_kernel_helper(context, &program, &kernel, 1,
|
||||
(const char **)&programPtr, "sample_test"))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
bufferSize = numElements * vecSize * get_explicit_type_size( vecType );
|
||||
bufferSize = numElements * vecSize * get_explicit_type_size(vecType);
|
||||
|
||||
/* Generate some almost-random input data */
|
||||
gen_input_data( vecType, vecSize * numElements, d, inData );
|
||||
memset( outDataCL, 0, dataSize );
|
||||
memset( outDataGL, 0, dataSize );
|
||||
gen_input_data(vecType, vecSize * numElements, d, inData);
|
||||
memset(outDataCL, 0, dataSize);
|
||||
memset(outDataGL, 0, dataSize);
|
||||
|
||||
/* Generate some GL buffers to go against */
|
||||
glGenBuffers( 1, &inGLBuffer );
|
||||
glGenBuffers( 1, &outGLBuffer );
|
||||
glGenBuffers(1, &inGLBuffer);
|
||||
glGenBuffers(1, &outGLBuffer);
|
||||
|
||||
glBindBuffer( GL_ARRAY_BUFFER, inGLBuffer );
|
||||
glBufferData( GL_ARRAY_BUFFER, bufferSize, inData, GL_STATIC_DRAW );
|
||||
glBindBuffer(GL_ARRAY_BUFFER, inGLBuffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, bufferSize, inData, GL_STATIC_DRAW);
|
||||
|
||||
// Note: we need to bind the output buffer, even though we don't care about its values yet,
|
||||
// because CL needs it to get the buffer size
|
||||
glBindBuffer( GL_ARRAY_BUFFER, outGLBuffer );
|
||||
glBufferData( GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW );
|
||||
// Note: we need to bind the output buffer, even though we don't care about
|
||||
// its values yet, because CL needs it to get the buffer size
|
||||
glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW);
|
||||
|
||||
glBindBuffer( GL_ARRAY_BUFFER, 0 );
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
glFinish();
|
||||
|
||||
|
||||
/* Generate some streams. The first and last ones are GL, middle one just vanilla CL */
|
||||
streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_ONLY, inGLBuffer, &error );
|
||||
test_error( error, "Unable to create input GL buffer" );
|
||||
/* Generate some streams. The first and last ones are GL, middle one just
|
||||
* vanilla CL */
|
||||
streams[0] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_ONLY,
|
||||
inGLBuffer, &error);
|
||||
test_error(error, "Unable to create input GL buffer");
|
||||
|
||||
streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, bufferSize, NULL, &error );
|
||||
test_error( error, "Unable to create output CL buffer" );
|
||||
streams[1] =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE, bufferSize, NULL, &error);
|
||||
test_error(error, "Unable to create output CL buffer");
|
||||
|
||||
streams[ 2 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_WRITE_ONLY, outGLBuffer, &error );
|
||||
test_error( error, "Unable to create output GL buffer" );
|
||||
streams[2] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_WRITE_ONLY,
|
||||
outGLBuffer, &error);
|
||||
test_error(error, "Unable to create output GL buffer");
|
||||
|
||||
|
||||
/* Validate the info */
|
||||
if (validate_only) {
|
||||
int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER, (GLuint)inGLBuffer, (GLenum)0, 0) |
|
||||
CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER, (GLuint)outGLBuffer, (GLenum)0, 0) );
|
||||
for(i=0;i<3;i++)
|
||||
/* Validate the info */
|
||||
if (validate_only)
|
||||
{
|
||||
clReleaseMemObject(streams[i]);
|
||||
streams[i] = NULL;
|
||||
int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER,
|
||||
(GLuint)inGLBuffer, (GLenum)0, 0)
|
||||
| CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER,
|
||||
(GLuint)outGLBuffer, (GLenum)0, 0));
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
streams[i].reset();
|
||||
}
|
||||
|
||||
glDeleteBuffers(1, &inGLBuffer);
|
||||
inGLBuffer = 0;
|
||||
glDeleteBuffers(1, &outGLBuffer);
|
||||
outGLBuffer = 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
glDeleteBuffers(1, &inGLBuffer); inGLBuffer = 0;
|
||||
glDeleteBuffers(1, &outGLBuffer); outGLBuffer = 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Assign streams and execute */
|
||||
for( int i = 0; i < 3; i++ )
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
error = clSetKernelArg( kernel, i, sizeof( streams[ i ] ), &streams[ i ] );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]);
|
||||
test_error(error, "Unable to set kernel arguments");
|
||||
}
|
||||
error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL);
|
||||
test_error( error, "Unable to acquire GL obejcts");
|
||||
error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 2 ], 0, NULL, NULL);
|
||||
test_error( error, "Unable to acquire GL obejcts");
|
||||
error =
|
||||
(*clEnqueueAcquireGLObjects_ptr)(queue, 1, &streams[0], 0, NULL, NULL);
|
||||
test_error(error, "Unable to acquire GL obejcts");
|
||||
error =
|
||||
(*clEnqueueAcquireGLObjects_ptr)(queue, 1, &streams[2], 0, NULL, NULL);
|
||||
test_error(error, "Unable to acquire GL obejcts");
|
||||
|
||||
/* Run the kernel */
|
||||
threads[0] = numElements;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
error = get_max_common_work_group_size(context, kernel, threads[0],
|
||||
&localThreads[0]);
|
||||
test_error(error, "Unable to get work group size to use");
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Unable to execute test kernel" );
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
|
||||
localThreads, 0, NULL, NULL);
|
||||
test_error(error, "Unable to execute test kernel");
|
||||
|
||||
error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL );
|
||||
test_error(error, "clEnqueueReleaseGLObjects failed");
|
||||
error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 2 ], 0, NULL, NULL );
|
||||
test_error(error, "clEnqueueReleaseGLObjects failed");
|
||||
error =
|
||||
(*clEnqueueReleaseGLObjects_ptr)(queue, 1, &streams[0], 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueReleaseGLObjects failed");
|
||||
error =
|
||||
(*clEnqueueReleaseGLObjects_ptr)(queue, 1, &streams[2], 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueReleaseGLObjects failed");
|
||||
|
||||
// Get the results from both CL and GL and make sure everything looks correct
|
||||
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, bufferSize, outDataCL, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read output CL array!" );
|
||||
// Get the results from both CL and GL and make sure everything looks
|
||||
// correct
|
||||
error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, bufferSize,
|
||||
outDataCL, 0, NULL, NULL);
|
||||
test_error(error, "Unable to read output CL array!");
|
||||
|
||||
glBindBuffer( GL_ARRAY_BUFFER, outGLBuffer );
|
||||
void *glMem = glMapBuffer( GL_ARRAY_BUFFER, GL_READ_ONLY );
|
||||
memcpy( outDataGL, glMem, bufferSize );
|
||||
glUnmapBuffer( GL_ARRAY_BUFFER );
|
||||
glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer);
|
||||
void *glMem = glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY);
|
||||
memcpy(outDataGL, glMem, bufferSize);
|
||||
glUnmapBuffer(GL_ARRAY_BUFFER);
|
||||
|
||||
char *inP = (char *)inData, *glP = (char *)outDataGL, *clP = (char *)outDataCL;
|
||||
char *inP = (char *)inData, *glP = (char *)outDataGL,
|
||||
*clP = (char *)outDataCL;
|
||||
error = 0;
|
||||
for( size_t i = 0; i < numElements * vecSize; i++ )
|
||||
for (size_t i = 0; i < numElements * vecSize; i++)
|
||||
{
|
||||
cl_long expectedCLValue, expectedGLValue;
|
||||
get_incremented_value( inP, &expectedCLValue, vecType );
|
||||
get_incremented_value( &expectedCLValue, &expectedGLValue, vecType );
|
||||
get_incremented_value(inP, &expectedCLValue, vecType);
|
||||
get_incremented_value(&expectedCLValue, &expectedGLValue, vecType);
|
||||
|
||||
if( memcmp( clP, &expectedCLValue, get_explicit_type_size( vecType ) ) != 0 )
|
||||
if (memcmp(clP, &expectedCLValue, get_explicit_type_size(vecType)) != 0)
|
||||
{
|
||||
char scratch[ 64 ];
|
||||
log_error( "ERROR: Data sample %d from the CL output did not validate!\n", (int)i );
|
||||
log_error( "\t Input: %s\n", GetDataVectorString( inP, get_explicit_type_size( vecType ), 1, scratch ) );
|
||||
log_error( "\tExpected: %s\n", GetDataVectorString( &expectedCLValue, get_explicit_type_size( vecType ), 1, scratch ) );
|
||||
log_error( "\t Actual: %s\n", GetDataVectorString( clP, get_explicit_type_size( vecType ), 1, scratch ) );
|
||||
char scratch[64];
|
||||
log_error(
|
||||
"ERROR: Data sample %d from the CL output did not validate!\n",
|
||||
(int)i);
|
||||
log_error("\t Input: %s\n",
|
||||
GetDataVectorString(inP, get_explicit_type_size(vecType),
|
||||
1, scratch));
|
||||
log_error("\tExpected: %s\n",
|
||||
GetDataVectorString(&expectedCLValue,
|
||||
get_explicit_type_size(vecType), 1,
|
||||
scratch));
|
||||
log_error("\t Actual: %s\n",
|
||||
GetDataVectorString(clP, get_explicit_type_size(vecType),
|
||||
1, scratch));
|
||||
error = -1;
|
||||
}
|
||||
|
||||
if( memcmp( glP, &expectedGLValue, get_explicit_type_size( vecType ) ) != 0 )
|
||||
if (memcmp(glP, &expectedGLValue, get_explicit_type_size(vecType)) != 0)
|
||||
{
|
||||
char scratch[ 64 ];
|
||||
log_error( "ERROR: Data sample %d from the GL output did not validate!\n", (int)i );
|
||||
log_error( "\t Input: %s\n", GetDataVectorString( inP, get_explicit_type_size( vecType ), 1, scratch ) );
|
||||
log_error( "\tExpected: %s\n", GetDataVectorString( &expectedGLValue, get_explicit_type_size( vecType ), 1, scratch ) );
|
||||
log_error( "\t Actual: %s\n", GetDataVectorString( glP, get_explicit_type_size( vecType ), 1, scratch ) );
|
||||
char scratch[64];
|
||||
log_error(
|
||||
"ERROR: Data sample %d from the GL output did not validate!\n",
|
||||
(int)i);
|
||||
log_error("\t Input: %s\n",
|
||||
GetDataVectorString(inP, get_explicit_type_size(vecType),
|
||||
1, scratch));
|
||||
log_error("\tExpected: %s\n",
|
||||
GetDataVectorString(&expectedGLValue,
|
||||
get_explicit_type_size(vecType), 1,
|
||||
scratch));
|
||||
log_error("\t Actual: %s\n",
|
||||
GetDataVectorString(glP, get_explicit_type_size(vecType),
|
||||
1, scratch));
|
||||
error = -1;
|
||||
}
|
||||
|
||||
if( error )
|
||||
return error;
|
||||
if (error) return error;
|
||||
|
||||
inP += get_explicit_type_size( vecType );
|
||||
glP += get_explicit_type_size( vecType );
|
||||
clP += get_explicit_type_size( vecType );
|
||||
inP += get_explicit_type_size(vecType);
|
||||
glP += get_explicit_type_size(vecType);
|
||||
clP += get_explicit_type_size(vecType);
|
||||
}
|
||||
|
||||
for(i=0;i<3;i++)
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
clReleaseMemObject(streams[i]);
|
||||
streams[i] = NULL;
|
||||
streams[i].reset();
|
||||
}
|
||||
|
||||
glDeleteBuffers(1, &inGLBuffer); inGLBuffer = 0;
|
||||
glDeleteBuffers(1, &outGLBuffer); outGLBuffer = 0;
|
||||
glDeleteBuffers(1, &inGLBuffer);
|
||||
inGLBuffer = 0;
|
||||
glDeleteBuffers(1, &outGLBuffer);
|
||||
outGLBuffer = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_buffers( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
int test_buffers(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int numElements)
|
||||
{
|
||||
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kNumExplicitTypes };
|
||||
ExplicitType vecType[] = {
|
||||
kChar, kUChar, kShort, kUShort, kInt,
|
||||
kUInt, kLong, kULong, kFloat, kNumExplicitTypes
|
||||
};
|
||||
unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
|
||||
unsigned int index, typeIndex;
|
||||
int retVal = 0;
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
|
||||
for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
|
||||
for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
|
||||
{
|
||||
for( index = 0; vecSizes[ index ] != 0; index++ )
|
||||
for (index = 0; vecSizes[index] != 0; index++)
|
||||
{
|
||||
// Test!
|
||||
if( test_buffer_kernel( context, queue, vecType[ typeIndex ], vecSizes[ index ], numElements, 0, seed) != 0 )
|
||||
if (test_buffer_kernel(context, queue, vecType[typeIndex],
|
||||
vecSizes[index], numElements, 0, seed)
|
||||
!= 0)
|
||||
{
|
||||
char sizeNames[][ 4 ] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
|
||||
log_error( " Buffer test %s%s FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), sizeNames[ vecSizes[ index ] ] );
|
||||
char sizeNames[][4] = { "", "", "2", "", "4", "", "", "", "8",
|
||||
"", "", "", "", "", "", "", "16" };
|
||||
log_error(" Buffer test %s%s FAILED\n",
|
||||
get_explicit_type_name(vecType[typeIndex]),
|
||||
sizeNames[vecSizes[index]]);
|
||||
retVal++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
|
||||
}
|
||||
|
||||
|
||||
int test_buffers_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
int test_buffers_getinfo(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int numElements)
|
||||
{
|
||||
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kNumExplicitTypes };
|
||||
ExplicitType vecType[] = {
|
||||
kChar, kUChar, kShort, kUShort, kInt,
|
||||
kUInt, kLong, kULong, kFloat, kNumExplicitTypes
|
||||
};
|
||||
unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
|
||||
unsigned int index, typeIndex;
|
||||
int retVal = 0;
|
||||
RandomSeed seed( gRandomSeed );
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
|
||||
for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
|
||||
for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
|
||||
{
|
||||
for( index = 0; vecSizes[ index ] != 0; index++ )
|
||||
for (index = 0; vecSizes[index] != 0; index++)
|
||||
{
|
||||
// Test!
|
||||
if( test_buffer_kernel( context, queue, vecType[ typeIndex ], vecSizes[ index ], numElements, 1, seed ) != 0 )
|
||||
if (test_buffer_kernel(context, queue, vecType[typeIndex],
|
||||
vecSizes[index], numElements, 1, seed)
|
||||
!= 0)
|
||||
{
|
||||
char sizeNames[][ 4 ] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
|
||||
log_error( " Buffer test %s%s FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), sizeNames[ vecSizes[ index ] ] );
|
||||
char sizeNames[][4] = { "", "", "2", "", "4", "", "", "", "8",
|
||||
"", "", "", "", "", "", "", "16" };
|
||||
log_error(" Buffer test %s%s FAILED\n",
|
||||
get_explicit_type_name(vecType[typeIndex]),
|
||||
sizeNames[vecSizes[index]]);
|
||||
retVal++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@@ -17,7 +17,7 @@
|
||||
#include "gl/setup.h"
|
||||
#include "harness/genericThread.h"
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#if defined(__APPLE__)
|
||||
#include <OpenGL/glu.h>
|
||||
#else
|
||||
#include <GL/glu.h>
|
||||
@@ -40,112 +40,121 @@ typedef struct __GLsync *GLsync;
|
||||
#define APIENTRY
|
||||
#endif
|
||||
|
||||
typedef GLsync (APIENTRY *glFenceSyncPtr)(GLenum condition,GLbitfield flags);
|
||||
typedef GLsync(APIENTRY *glFenceSyncPtr)(GLenum condition, GLbitfield flags);
|
||||
glFenceSyncPtr glFenceSyncFunc;
|
||||
|
||||
typedef bool (APIENTRY *glIsSyncPtr)(GLsync sync);
|
||||
typedef bool(APIENTRY *glIsSyncPtr)(GLsync sync);
|
||||
glIsSyncPtr glIsSyncFunc;
|
||||
|
||||
typedef void (APIENTRY *glDeleteSyncPtr)(GLsync sync);
|
||||
typedef void(APIENTRY *glDeleteSyncPtr)(GLsync sync);
|
||||
glDeleteSyncPtr glDeleteSyncFunc;
|
||||
|
||||
typedef GLenum (APIENTRY *glClientWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout);
|
||||
typedef GLenum(APIENTRY *glClientWaitSyncPtr)(GLsync sync, GLbitfield flags,
|
||||
GLuint64 timeout);
|
||||
glClientWaitSyncPtr glClientWaitSyncFunc;
|
||||
|
||||
typedef void (APIENTRY *glWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout);
|
||||
typedef void(APIENTRY *glWaitSyncPtr)(GLsync sync, GLbitfield flags,
|
||||
GLuint64 timeout);
|
||||
glWaitSyncPtr glWaitSyncFunc;
|
||||
|
||||
typedef void (APIENTRY *glGetInteger64vPtr)(GLenum pname, GLint64 *params);
|
||||
typedef void(APIENTRY *glGetInteger64vPtr)(GLenum pname, GLint64 *params);
|
||||
glGetInteger64vPtr glGetInteger64vFunc;
|
||||
|
||||
typedef void (APIENTRY *glGetSyncivPtr)(GLsync sync,GLenum pname,GLsizei bufSize,GLsizei *length,
|
||||
GLint *values);
|
||||
typedef void(APIENTRY *glGetSyncivPtr)(GLsync sync, GLenum pname,
|
||||
GLsizei bufSize, GLsizei *length,
|
||||
GLint *values);
|
||||
glGetSyncivPtr glGetSyncivFunc;
|
||||
|
||||
#define CHK_GL_ERR() printf("%s\n", gluErrorString(glGetError()))
|
||||
|
||||
static void InitSyncFns( void )
|
||||
static void InitSyncFns(void)
|
||||
{
|
||||
glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress( "glFenceSync" );
|
||||
glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress( "glIsSync" );
|
||||
glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress( "glDeleteSync" );
|
||||
glClientWaitSyncFunc = (glClientWaitSyncPtr)glutGetProcAddress( "glClientWaitSync" );
|
||||
glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress( "glWaitSync" );
|
||||
glGetInteger64vFunc = (glGetInteger64vPtr)glutGetProcAddress( "glGetInteger64v" );
|
||||
glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress( "glGetSynciv" );
|
||||
glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress("glFenceSync");
|
||||
glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress("glIsSync");
|
||||
glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress("glDeleteSync");
|
||||
glClientWaitSyncFunc =
|
||||
(glClientWaitSyncPtr)glutGetProcAddress("glClientWaitSync");
|
||||
glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress("glWaitSync");
|
||||
glGetInteger64vFunc =
|
||||
(glGetInteger64vPtr)glutGetProcAddress("glGetInteger64v");
|
||||
glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress("glGetSynciv");
|
||||
}
|
||||
#ifndef GL_ARB_sync
|
||||
#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111
|
||||
#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111
|
||||
|
||||
#define GL_OBJECT_TYPE 0x9112
|
||||
#define GL_SYNC_CONDITION 0x9113
|
||||
#define GL_SYNC_STATUS 0x9114
|
||||
#define GL_SYNC_FLAGS 0x9115
|
||||
#define GL_OBJECT_TYPE 0x9112
|
||||
#define GL_SYNC_CONDITION 0x9113
|
||||
#define GL_SYNC_STATUS 0x9114
|
||||
#define GL_SYNC_FLAGS 0x9115
|
||||
|
||||
#define GL_SYNC_FENCE 0x9116
|
||||
#define GL_SYNC_FENCE 0x9116
|
||||
|
||||
#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117
|
||||
#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117
|
||||
|
||||
#define GL_UNSIGNALED 0x9118
|
||||
#define GL_SIGNALED 0x9119
|
||||
#define GL_UNSIGNALED 0x9118
|
||||
#define GL_SIGNALED 0x9119
|
||||
|
||||
#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001
|
||||
#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001
|
||||
|
||||
#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull
|
||||
#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull
|
||||
|
||||
#define GL_ALREADY_SIGNALED 0x911A
|
||||
#define GL_TIMEOUT_EXPIRED 0x911B
|
||||
#define GL_CONDITION_SATISFIED 0x911C
|
||||
#define GL_WAIT_FAILED 0x911D
|
||||
#define GL_ALREADY_SIGNALED 0x911A
|
||||
#define GL_TIMEOUT_EXPIRED 0x911B
|
||||
#define GL_CONDITION_SATISFIED 0x911C
|
||||
#define GL_WAIT_FAILED 0x911D
|
||||
#endif
|
||||
|
||||
#define USING_ARB_sync 1
|
||||
#endif
|
||||
|
||||
typedef cl_event (CL_API_CALL *clCreateEventFromGLsyncKHR_fn)( cl_context context, GLsync sync, cl_int *errCode_ret) ;
|
||||
typedef cl_event(CL_API_CALL *clCreateEventFromGLsyncKHR_fn)(
|
||||
cl_context context, GLsync sync, cl_int *errCode_ret);
|
||||
|
||||
clCreateEventFromGLsyncKHR_fn clCreateEventFromGLsyncKHR_ptr;
|
||||
|
||||
|
||||
static const char *updateBuffersKernel[] = {
|
||||
"__kernel void update( __global float4 * vertices, __global float4 *colors, int horizWrap, int rowIdx )\n"
|
||||
"__kernel void update( __global float4 * vertices, __global float4 "
|
||||
"*colors, int horizWrap, int rowIdx )\n"
|
||||
"{\n"
|
||||
" size_t tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" size_t xVal = ( tid & ( horizWrap - 1 ) );\n"
|
||||
" vertices[ tid * 2 + 0 ] = (float4)( xVal, rowIdx*16.f, 0.0f, 1.f );\n"
|
||||
" vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, 1.f );\n"
|
||||
" vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, "
|
||||
"1.f );\n"
|
||||
"\n"
|
||||
" int rowV = rowIdx + 1;\n"
|
||||
" colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 ) >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n"
|
||||
" //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, 1.0f, 1.0f, 1.0f );\n"
|
||||
" colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 "
|
||||
") >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n"
|
||||
" //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, "
|
||||
"1.0f, 1.0f, 1.0f );\n"
|
||||
" colors[ tid * 2 + 1 ] = colors[ tid * 2 + 0 ];\n"
|
||||
"}\n" };
|
||||
"}\n"
|
||||
};
|
||||
|
||||
//Passthrough VertexShader
|
||||
static const char *vertexshader =
|
||||
"#version 150\n"
|
||||
"uniform mat4 projMatrix;\n"
|
||||
"in vec4 inPosition;\n"
|
||||
"in vec4 inColor;\n"
|
||||
"out vec4 vertColor;\n"
|
||||
"void main (void) {\n"
|
||||
" gl_Position = projMatrix*inPosition;\n"
|
||||
" vertColor = inColor;\n"
|
||||
"}\n";
|
||||
// Passthrough VertexShader
|
||||
static const char *vertexshader = "#version 150\n"
|
||||
"uniform mat4 projMatrix;\n"
|
||||
"in vec4 inPosition;\n"
|
||||
"in vec4 inColor;\n"
|
||||
"out vec4 vertColor;\n"
|
||||
"void main (void) {\n"
|
||||
" gl_Position = projMatrix*inPosition;\n"
|
||||
" vertColor = inColor;\n"
|
||||
"}\n";
|
||||
|
||||
//Passthrough FragmentShader
|
||||
static const char *fragmentshader =
|
||||
"#version 150\n"
|
||||
"in vec4 vertColor;\n"
|
||||
"out vec4 outColor;\n"
|
||||
"void main (void) {\n"
|
||||
" outColor = vertColor;\n"
|
||||
"}\n";
|
||||
// Passthrough FragmentShader
|
||||
static const char *fragmentshader = "#version 150\n"
|
||||
"in vec4 vertColor;\n"
|
||||
"out vec4 outColor;\n"
|
||||
"void main (void) {\n"
|
||||
" outColor = vertColor;\n"
|
||||
"}\n";
|
||||
|
||||
GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
|
||||
{
|
||||
GLint logLength, status;
|
||||
GLint logLength, status;
|
||||
GLuint program = glCreateProgram();
|
||||
GLuint vpShader;
|
||||
|
||||
@@ -153,8 +162,9 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
|
||||
glShaderSource(vpShader, 1, (const GLchar **)&vertexshader, NULL);
|
||||
glCompileShader(vpShader);
|
||||
glGetShaderiv(vpShader, GL_INFO_LOG_LENGTH, &logLength);
|
||||
if (logLength > 0) {
|
||||
GLchar *log = (GLchar*) malloc(logLength);
|
||||
if (logLength > 0)
|
||||
{
|
||||
GLchar *log = (GLchar *)malloc(logLength);
|
||||
glGetShaderInfoLog(vpShader, logLength, &logLength, log);
|
||||
log_info("Vtx Shader compile log:\n%s", log);
|
||||
free(log);
|
||||
@@ -175,8 +185,9 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
|
||||
glCompileShader(fpShader);
|
||||
|
||||
glGetShaderiv(fpShader, GL_INFO_LOG_LENGTH, &logLength);
|
||||
if (logLength > 0) {
|
||||
GLchar *log = (GLchar*)malloc(logLength);
|
||||
if (logLength > 0)
|
||||
{
|
||||
GLchar *log = (GLchar *)malloc(logLength);
|
||||
glGetShaderInfoLog(fpShader, logLength, &logLength, log);
|
||||
log_info("Frag Shader compile log:\n%s", log);
|
||||
free(log);
|
||||
@@ -192,8 +203,9 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
|
||||
|
||||
glLinkProgram(program);
|
||||
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength);
|
||||
if (logLength > 0) {
|
||||
GLchar *log = (GLchar*)malloc(logLength);
|
||||
if (logLength > 0)
|
||||
{
|
||||
GLchar *log = (GLchar *)malloc(logLength);
|
||||
glGetProgramInfoLog(program, logLength, &logLength, log);
|
||||
log_info("Program link log:\n%s", log);
|
||||
free(log);
|
||||
@@ -219,7 +231,7 @@ void destroyShaderProgram(GLuint program)
|
||||
glUseProgram(0);
|
||||
glGetAttachedShaders(program, 2, &count, shaders);
|
||||
int i;
|
||||
for(i = 0; i < count; i++)
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
glDetachShader(program, shaders[i]);
|
||||
glDeleteShader(shaders[i]);
|
||||
@@ -227,44 +239,49 @@ void destroyShaderProgram(GLuint program)
|
||||
glDeleteProgram(program);
|
||||
}
|
||||
|
||||
// This function queues up and runs the above CL kernel that writes the vertex data
|
||||
cl_int run_cl_kernel( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1,
|
||||
cl_int rowIdx, cl_event fenceEvent, size_t numThreads )
|
||||
// This function queues up and runs the above CL kernel that writes the vertex
|
||||
// data
|
||||
cl_int run_cl_kernel(cl_kernel kernel, cl_command_queue queue, cl_mem stream0,
|
||||
cl_mem stream1, cl_int rowIdx, cl_event fenceEvent,
|
||||
size_t numThreads)
|
||||
{
|
||||
cl_int error = clSetKernelArg( kernel, 3, sizeof( rowIdx ), &rowIdx );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
cl_int error = clSetKernelArg(kernel, 3, sizeof(rowIdx), &rowIdx);
|
||||
test_error(error, "Unable to set kernel arguments");
|
||||
|
||||
clEventWrapper acqEvent1, acqEvent2, kernEvent, relEvent1, relEvent2;
|
||||
int numEvents = ( fenceEvent != NULL ) ? 1 : 0;
|
||||
cl_event *fence_evt = ( fenceEvent != NULL ) ? &fenceEvent : NULL;
|
||||
int numEvents = (fenceEvent != NULL) ? 1 : 0;
|
||||
cl_event *fence_evt = (fenceEvent != NULL) ? &fenceEvent : NULL;
|
||||
|
||||
error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream0, numEvents, fence_evt, &acqEvent1 );
|
||||
test_error( error, "Unable to acquire GL obejcts");
|
||||
error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream1, numEvents, fence_evt, &acqEvent2 );
|
||||
test_error( error, "Unable to acquire GL obejcts");
|
||||
error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &stream0, numEvents,
|
||||
fence_evt, &acqEvent1);
|
||||
test_error(error, "Unable to acquire GL obejcts");
|
||||
error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &stream1, numEvents,
|
||||
fence_evt, &acqEvent2);
|
||||
test_error(error, "Unable to acquire GL obejcts");
|
||||
|
||||
cl_event evts[ 2 ] = { acqEvent1, acqEvent2 };
|
||||
cl_event evts[2] = { acqEvent1, acqEvent2 };
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numThreads, NULL, 2, evts, &kernEvent );
|
||||
test_error( error, "Unable to execute test kernel" );
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &numThreads, NULL, 2,
|
||||
evts, &kernEvent);
|
||||
test_error(error, "Unable to execute test kernel");
|
||||
|
||||
error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream0, 1, &kernEvent, &relEvent1 );
|
||||
error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &stream0, 1, &kernEvent,
|
||||
&relEvent1);
|
||||
test_error(error, "clEnqueueReleaseGLObjects failed");
|
||||
error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream1, 1, &kernEvent, &relEvent2 );
|
||||
error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &stream1, 1, &kernEvent,
|
||||
&relEvent2);
|
||||
test_error(error, "clEnqueueReleaseGLObjects failed");
|
||||
|
||||
evts[ 0 ] = relEvent1;
|
||||
evts[ 1 ] = relEvent2;
|
||||
error = clWaitForEvents( 2, evts );
|
||||
test_error( error, "Unable to wait for release events" );
|
||||
evts[0] = relEvent1;
|
||||
evts[1] = relEvent2;
|
||||
error = clWaitForEvents(2, evts);
|
||||
test_error(error, "Unable to wait for release events");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
class RunThread : public genericThread
|
||||
{
|
||||
class RunThread : public genericThread {
|
||||
public:
|
||||
|
||||
cl_kernel mKernel;
|
||||
cl_command_queue mQueue;
|
||||
cl_mem mStream0, mStream1;
|
||||
@@ -272,34 +289,40 @@ public:
|
||||
cl_event mFenceEvent;
|
||||
size_t mNumThreads;
|
||||
|
||||
RunThread( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1, size_t numThreads )
|
||||
: mKernel( kernel ), mQueue( queue ), mStream0( stream0 ), mStream1( stream1 ), mNumThreads( numThreads )
|
||||
{
|
||||
}
|
||||
RunThread(cl_kernel kernel, cl_command_queue queue, cl_mem stream0,
|
||||
cl_mem stream1, size_t numThreads)
|
||||
: mKernel(kernel), mQueue(queue), mStream0(stream0), mStream1(stream1),
|
||||
mNumThreads(numThreads)
|
||||
{}
|
||||
|
||||
void SetRunData( cl_int rowIdx, cl_event fenceEvent )
|
||||
void SetRunData(cl_int rowIdx, cl_event fenceEvent)
|
||||
{
|
||||
mRowIdx = rowIdx;
|
||||
mFenceEvent = fenceEvent;
|
||||
}
|
||||
|
||||
virtual void * IRun( void )
|
||||
virtual void *IRun(void)
|
||||
{
|
||||
cl_int error = run_cl_kernel( mKernel, mQueue, mStream0, mStream1, mRowIdx, mFenceEvent, mNumThreads );
|
||||
cl_int error = run_cl_kernel(mKernel, mQueue, mStream0, mStream1,
|
||||
mRowIdx, mFenceEvent, mNumThreads);
|
||||
return (void *)(uintptr_t)error;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_queue queue, bool separateThreads, GLint rend_vs, GLint read_vs, cl_device_id rend_device )
|
||||
int test_fence_sync_single(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, bool separateThreads,
|
||||
GLint rend_vs, GLint read_vs,
|
||||
cl_device_id rend_device)
|
||||
{
|
||||
int error;
|
||||
const int framebufferSize = 512;
|
||||
|
||||
|
||||
if( !is_extension_available( device, "cl_khr_gl_event" ) )
|
||||
if (!is_extension_available(device, "cl_khr_gl_event"))
|
||||
{
|
||||
log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" );
|
||||
log_info("NOTE: cl_khr_gl_event extension not present on this device; "
|
||||
"skipping fence sync test\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -312,10 +335,11 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
|
||||
clGetPlatformIDs(0, NULL, &nplatforms);
|
||||
clGetPlatformIDs(1, &platform, NULL);
|
||||
|
||||
if (nplatforms > 1) {
|
||||
if (nplatforms > 1)
|
||||
{
|
||||
log_info("clGetPlatformIDs returned multiple values. This is not "
|
||||
"an error, but might result in obtaining incorrect function "
|
||||
"pointers if you do not want the first returned platform.\n");
|
||||
"an error, but might result in obtaining incorrect function "
|
||||
"pointers if you do not want the first returned platform.\n");
|
||||
|
||||
// Show them the platform name, in case it is a problem.
|
||||
|
||||
@@ -323,28 +347,35 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
|
||||
char *name;
|
||||
|
||||
clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
|
||||
name = (char*)malloc(size);
|
||||
name = (char *)malloc(size);
|
||||
clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
|
||||
|
||||
log_info("Using platform with name: %s \n", name);
|
||||
free(name);
|
||||
}
|
||||
|
||||
clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clCreateEventFromGLsyncKHR");
|
||||
if( clCreateEventFromGLsyncKHR_ptr == NULL )
|
||||
clCreateEventFromGLsyncKHR_ptr =
|
||||
(clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(
|
||||
platform, "clCreateEventFromGLsyncKHR");
|
||||
if (clCreateEventFromGLsyncKHR_ptr == NULL)
|
||||
{
|
||||
log_error( "ERROR: Unable to run fence_sync test (clCreateEventFromGLsyncKHR function not discovered!)\n" );
|
||||
clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clCreateEventFromGLsyncAPPLE");
|
||||
log_error("ERROR: Unable to run fence_sync test "
|
||||
"(clCreateEventFromGLsyncKHR function not discovered!)\n");
|
||||
clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)
|
||||
clGetExtensionFunctionAddressForPlatform(
|
||||
platform, "clCreateEventFromGLsyncAPPLE");
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USING_ARB_sync
|
||||
char *gl_version_str = (char*)glGetString( GL_VERSION );
|
||||
char *gl_version_str = (char *)glGetString(GL_VERSION);
|
||||
float glCoreVersion;
|
||||
sscanf(gl_version_str, "%f", &glCoreVersion);
|
||||
if( glCoreVersion < 3.0f )
|
||||
if (glCoreVersion < 3.0f)
|
||||
{
|
||||
log_info( "OpenGL version %f does not support fence/sync! Skipping test.\n", glCoreVersion );
|
||||
log_info(
|
||||
"OpenGL version %f does not support fence/sync! Skipping test.\n",
|
||||
glCoreVersion);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -354,10 +385,13 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
|
||||
GLint val, screen;
|
||||
CGLGetVirtualScreen(currCtx, &screen);
|
||||
CGLDescribePixelFormat(pixFmt, screen, kCGLPFAOpenGLProfile, &val);
|
||||
if(val != kCGLOGLPVersion_3_2_Core)
|
||||
if (val != kCGLOGLPVersion_3_2_Core)
|
||||
{
|
||||
log_error( "OpenGL context was not created with OpenGL version >= 3.0 profile even though platform supports it"
|
||||
"OpenGL profile %f does not support fence/sync! Skipping test.\n", glCoreVersion );
|
||||
log_error(
|
||||
"OpenGL context was not created with OpenGL version >= 3.0 profile "
|
||||
"even though platform supports it"
|
||||
"OpenGL profile %f does not support fence/sync! Skipping test.\n",
|
||||
glCoreVersion);
|
||||
return -1;
|
||||
}
|
||||
#else
|
||||
@@ -365,7 +399,7 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
|
||||
HDC hdc = wglGetCurrentDC();
|
||||
HGLRC hglrc = wglGetCurrentContext();
|
||||
#else
|
||||
Display* dpy = glXGetCurrentDisplay();
|
||||
Display *dpy = glXGetCurrentDisplay();
|
||||
GLXDrawable drawable = glXGetCurrentDrawable();
|
||||
GLXContext ctx = glXGetCurrentContext();
|
||||
#endif
|
||||
@@ -386,51 +420,66 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
|
||||
|
||||
GLint posLoc, colLoc;
|
||||
GLuint shaderprogram = createShaderProgram(&posLoc, &colLoc);
|
||||
if(!shaderprogram)
|
||||
if (!shaderprogram)
|
||||
{
|
||||
log_error("Failed to create shader program\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
float l = 0.0f; float r = framebufferSize;
|
||||
float b = 0.0f; float t = framebufferSize;
|
||||
float l = 0.0f;
|
||||
float r = framebufferSize;
|
||||
float b = 0.0f;
|
||||
float t = framebufferSize;
|
||||
|
||||
float projMatrix[16] = { 2.0f/(r-l), 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 2.0f/(t-b), 0.0f, 0.0f,
|
||||
0.0f, 0.0f, -1.0f, 0.0f,
|
||||
-(r+l)/(r-l), -(t+b)/(t-b), 0.0f, 1.0f
|
||||
};
|
||||
float projMatrix[16] = { 2.0f / (r - l),
|
||||
0.0f,
|
||||
0.0f,
|
||||
0.0f,
|
||||
0.0f,
|
||||
2.0f / (t - b),
|
||||
0.0f,
|
||||
0.0f,
|
||||
0.0f,
|
||||
0.0f,
|
||||
-1.0f,
|
||||
0.0f,
|
||||
-(r + l) / (r - l),
|
||||
-(t + b) / (t - b),
|
||||
0.0f,
|
||||
1.0f };
|
||||
|
||||
glUseProgram(shaderprogram);
|
||||
GLuint projMatLoc = glGetUniformLocation(shaderprogram, "projMatrix");
|
||||
glUniformMatrix4fv(projMatLoc, 1, 0, projMatrix);
|
||||
glUseProgram(0);
|
||||
|
||||
// Note: the framebuffer is just the target to verify our results against, so we don't
|
||||
// really care to go through all the possible formats in this case
|
||||
// Note: the framebuffer is just the target to verify our results against,
|
||||
// so we don't really care to go through all the possible formats in this
|
||||
// case
|
||||
glFramebufferWrapper glFramebuffer;
|
||||
glRenderbufferWrapper glRenderbuffer;
|
||||
error = CreateGLRenderbufferRaw( framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT,
|
||||
GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV,
|
||||
&glFramebuffer, &glRenderbuffer );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
error = CreateGLRenderbufferRaw(
|
||||
framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT, GL_RGBA, GL_RGBA,
|
||||
GL_UNSIGNED_INT_8_8_8_8_REV, &glFramebuffer, &glRenderbuffer);
|
||||
if (error != 0) return error;
|
||||
|
||||
GLuint vao;
|
||||
glGenVertexArrays(1, &vao);
|
||||
glBindVertexArray(vao);
|
||||
|
||||
glBufferWrapper vtxBuffer, colorBuffer;
|
||||
glGenBuffers( 1, &vtxBuffer );
|
||||
glGenBuffers( 1, &colorBuffer );
|
||||
glGenBuffers(1, &vtxBuffer);
|
||||
glGenBuffers(1, &colorBuffer);
|
||||
|
||||
const int numHorizVertices = ( framebufferSize * 64 ) + 1;
|
||||
const int numHorizVertices = (framebufferSize * 64) + 1;
|
||||
|
||||
glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer );
|
||||
glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW );
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * numHorizVertices * 2 * 4,
|
||||
NULL, GL_STATIC_DRAW);
|
||||
|
||||
glBindBuffer( GL_ARRAY_BUFFER, colorBuffer );
|
||||
glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW );
|
||||
glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * numHorizVertices * 2 * 4,
|
||||
NULL, GL_STATIC_DRAW);
|
||||
|
||||
// Now that the requisite objects are bound, we can attempt program
|
||||
// validation:
|
||||
@@ -439,8 +488,9 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
|
||||
|
||||
GLint logLength, status;
|
||||
glGetProgramiv(shaderprogram, GL_INFO_LOG_LENGTH, &logLength);
|
||||
if (logLength > 0) {
|
||||
GLchar *log = (GLchar*)malloc(logLength);
|
||||
if (logLength > 0)
|
||||
{
|
||||
GLchar *log = (GLchar *)malloc(logLength);
|
||||
glGetProgramInfoLog(shaderprogram, logLength, &logLength, log);
|
||||
log_info("Program validate log:\n%s", log);
|
||||
free(log);
|
||||
@@ -455,125 +505,131 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
|
||||
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 2 ];
|
||||
clMemWrapper streams[2];
|
||||
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, updateBuffersKernel, "update" ) )
|
||||
if (create_single_kernel_helper(context, &program, &kernel, 1,
|
||||
updateBuffersKernel, "update"))
|
||||
return -1;
|
||||
|
||||
streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, vtxBuffer, &error );
|
||||
test_error( error, "Unable to create CL buffer from GL vertex buffer" );
|
||||
streams[0] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_WRITE,
|
||||
vtxBuffer, &error);
|
||||
test_error(error, "Unable to create CL buffer from GL vertex buffer");
|
||||
|
||||
streams[ 1 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, colorBuffer, &error );
|
||||
test_error( error, "Unable to create CL buffer from GL color buffer" );
|
||||
streams[1] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_WRITE,
|
||||
colorBuffer, &error);
|
||||
test_error(error, "Unable to create CL buffer from GL color buffer");
|
||||
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
|
||||
test_error(error, "Unable to set kernel arguments");
|
||||
|
||||
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
|
||||
test_error(error, "Unable to set kernel arguments");
|
||||
|
||||
cl_int horizWrap = (cl_int)framebufferSize;
|
||||
error = clSetKernelArg( kernel, 2, sizeof( horizWrap ), &horizWrap );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 2, sizeof(horizWrap), &horizWrap);
|
||||
test_error(error, "Unable to set kernel arguments");
|
||||
|
||||
glViewport( 0, 0, framebufferSize, framebufferSize );
|
||||
glClearColor( 0, 0, 0, 0 );
|
||||
glClear( GL_COLOR_BUFFER_BIT );
|
||||
glClear( GL_DEPTH_BUFFER_BIT );
|
||||
glDisable( GL_DEPTH_TEST );
|
||||
glEnable( GL_BLEND );
|
||||
glBlendFunc( GL_ONE, GL_ONE );
|
||||
glViewport(0, 0, framebufferSize, framebufferSize);
|
||||
glClearColor(0, 0, 0, 0);
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
glClear(GL_DEPTH_BUFFER_BIT);
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glEnable(GL_BLEND);
|
||||
glBlendFunc(GL_ONE, GL_ONE);
|
||||
|
||||
clEventWrapper fenceEvent;
|
||||
GLsync glFence = 0;
|
||||
|
||||
// Do a loop through 8 different horizontal stripes against the framebuffer
|
||||
RunThread thread( kernel, queue, streams[ 0 ], streams[ 1 ], (size_t)numHorizVertices );
|
||||
RunThread thread(kernel, queue, streams[0], streams[1],
|
||||
(size_t)numHorizVertices);
|
||||
|
||||
for( int i = 0; i < 8; i++ )
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
// if current rendering device is not the compute device and
|
||||
// separateThreads == false which means compute is going on same
|
||||
// thread and we are using implicit synchronization (no GLSync obj used)
|
||||
// then glFlush by clEnqueueAcquireGLObject is not sufficient ... we need
|
||||
// to wait for rendering to finish on other device before CL can start
|
||||
// writing to CL/GL shared mem objects. When separateThreads is true i.e.
|
||||
// we are using GLSync obj to synchronize then we dont need to call glFinish
|
||||
// here since CL should wait for rendering on other device before this
|
||||
// GLSync object to finish before it starts writing to shared mem object.
|
||||
// Also rend_device == compute_device no need to call glFinish
|
||||
if(rend_device != device && !separateThreads)
|
||||
glFinish();
|
||||
// then glFlush by clEnqueueAcquireGLObject is not sufficient ... we
|
||||
// need to wait for rendering to finish on other device before CL can
|
||||
// start writing to CL/GL shared mem objects. When separateThreads is
|
||||
// true i.e. we are using GLSync obj to synchronize then we dont need to
|
||||
// call glFinish here since CL should wait for rendering on other device
|
||||
// before this GLSync object to finish before it starts writing to
|
||||
// shared mem object. Also rend_device == compute_device no need to call
|
||||
// glFinish
|
||||
if (rend_device != device && !separateThreads) glFinish();
|
||||
|
||||
if( separateThreads )
|
||||
if (separateThreads)
|
||||
{
|
||||
if (fenceEvent != NULL)
|
||||
{
|
||||
clReleaseEvent(fenceEvent);
|
||||
glDeleteSyncFunc(glFence);
|
||||
}
|
||||
glDeleteSyncFunc(glFence);
|
||||
|
||||
glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
fenceEvent = clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
|
||||
fenceEvent =
|
||||
clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
|
||||
test_error(error, "Unable to create CL event from GL fence");
|
||||
|
||||
// in case of explicit synchronization, we just wait for the sync object to complete
|
||||
// in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility
|
||||
// to flush on the context on which glSync is created
|
||||
// in case of explicit synchronization, we just wait for the sync
|
||||
// object to complete in clEnqueueAcquireGLObject but we dont flush.
|
||||
// Its application's responsibility to flush on the context on which
|
||||
// glSync is created
|
||||
glFlush();
|
||||
|
||||
thread.SetRunData( (cl_int)i, fenceEvent );
|
||||
thread.SetRunData((cl_int)i, fenceEvent);
|
||||
thread.Start();
|
||||
|
||||
error = (cl_int)(size_t)thread.Join();
|
||||
}
|
||||
else
|
||||
{
|
||||
error = run_cl_kernel( kernel, queue, streams[ 0 ], streams[ 1 ], (cl_int)i, fenceEvent, (size_t)numHorizVertices );
|
||||
error =
|
||||
run_cl_kernel(kernel, queue, streams[0], streams[1], (cl_int)i,
|
||||
fenceEvent, (size_t)numHorizVertices);
|
||||
}
|
||||
test_error( error, "Unable to run CL kernel" );
|
||||
test_error(error, "Unable to run CL kernel");
|
||||
|
||||
glUseProgram(shaderprogram);
|
||||
glEnableVertexAttribArray(posLoc);
|
||||
glEnableVertexAttribArray(colLoc);
|
||||
glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer );
|
||||
glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0);
|
||||
glBindBuffer( GL_ARRAY_BUFFER, colorBuffer );
|
||||
glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0);
|
||||
glBindBuffer( GL_ARRAY_BUFFER, 0 );
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer);
|
||||
glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE,
|
||||
4 * sizeof(GLfloat), 0);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
|
||||
glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE,
|
||||
4 * sizeof(GLfloat), 0);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
|
||||
glDrawArrays( GL_TRIANGLE_STRIP, 0, numHorizVertices * 2 );
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, numHorizVertices * 2);
|
||||
|
||||
glDisableVertexAttribArray(posLoc);
|
||||
glDisableVertexAttribArray(colLoc);
|
||||
glUseProgram(0);
|
||||
|
||||
if( separateThreads )
|
||||
if (separateThreads)
|
||||
{
|
||||
// If we're on the same thread, then we're testing implicit syncing, so we
|
||||
// don't need the actual fence code
|
||||
if( fenceEvent != NULL )
|
||||
{
|
||||
clReleaseEvent( fenceEvent );
|
||||
glDeleteSyncFunc( glFence );
|
||||
}
|
||||
// If we're on the same thread, then we're testing implicit syncing,
|
||||
// so we don't need the actual fence code
|
||||
glDeleteSyncFunc(glFence);
|
||||
|
||||
glFence = glFenceSyncFunc( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 );
|
||||
fenceEvent = clCreateEventFromGLsyncKHR_ptr( context, glFence, &error );
|
||||
test_error( error, "Unable to create CL event from GL fence" );
|
||||
|
||||
// in case of explicit synchronization, we just wait for the sync object to complete
|
||||
// in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility
|
||||
// to flush on the context on which glSync is created
|
||||
glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
fenceEvent =
|
||||
clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
|
||||
test_error(error, "Unable to create CL event from GL fence");
|
||||
|
||||
// in case of explicit synchronization, we just wait for the sync
|
||||
// object to complete in clEnqueueAcquireGLObject but we dont flush.
|
||||
// Its application's responsibility to flush on the context on which
|
||||
// glSync is created
|
||||
glFlush();
|
||||
}
|
||||
else
|
||||
glFinish();
|
||||
}
|
||||
|
||||
if( glFence != 0 )
|
||||
// Don't need the final release for fenceEvent, because the wrapper will take care of that
|
||||
glDeleteSyncFunc( glFence );
|
||||
if (glFence != 0)
|
||||
// Don't need the final release for fenceEvent, because the wrapper will
|
||||
// take care of that
|
||||
glDeleteSyncFunc(glFence);
|
||||
|
||||
#ifdef __APPLE__
|
||||
CGLSetVirtualScreen(CGLGetCurrentContext(), read_vs);
|
||||
@@ -585,54 +641,62 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
|
||||
#endif
|
||||
#endif
|
||||
// Grab the contents of the final framebuffer
|
||||
BufferOwningPtr<char> resultData( ReadGLRenderbuffer( glFramebuffer, glRenderbuffer,
|
||||
GL_COLOR_ATTACHMENT0_EXT,
|
||||
GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar,
|
||||
framebufferSize, 128 ) );
|
||||
BufferOwningPtr<char> resultData(ReadGLRenderbuffer(
|
||||
glFramebuffer, glRenderbuffer, GL_COLOR_ATTACHMENT0_EXT, GL_RGBA,
|
||||
GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar, framebufferSize, 128));
|
||||
|
||||
// Check the contents now. We should end up with solid color bands 32 pixels high and the
|
||||
// full width of the framebuffer, at values (128,128,128) due to the additive blending
|
||||
for( int i = 0; i < 8; i++ )
|
||||
// Check the contents now. We should end up with solid color bands 32 pixels
|
||||
// high and the full width of the framebuffer, at values (128,128,128) due
|
||||
// to the additive blending
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
for( int y = 0; y < 4; y++ )
|
||||
for (int y = 0; y < 4; y++)
|
||||
{
|
||||
// Note: coverage will be double because the 63-0 triangle overwrites again at the end of the pass
|
||||
cl_uchar valA = ( ( ( i + 1 ) & 1 ) ) * numHorizVertices * 2 / framebufferSize;
|
||||
cl_uchar valB = ( ( ( i + 1 ) & 2 ) >> 1 ) * numHorizVertices * 2 / framebufferSize;
|
||||
cl_uchar valC = ( ( ( i + 1 ) & 4 ) >> 2 ) * numHorizVertices * 2 / framebufferSize;
|
||||
// Note: coverage will be double because the 63-0 triangle
|
||||
// overwrites again at the end of the pass
|
||||
cl_uchar valA =
|
||||
(((i + 1) & 1)) * numHorizVertices * 2 / framebufferSize;
|
||||
cl_uchar valB =
|
||||
(((i + 1) & 2) >> 1) * numHorizVertices * 2 / framebufferSize;
|
||||
cl_uchar valC =
|
||||
(((i + 1) & 4) >> 2) * numHorizVertices * 2 / framebufferSize;
|
||||
|
||||
cl_uchar *row = (cl_uchar *)&resultData[ ( i * 16 + y ) * framebufferSize * 4 ];
|
||||
for( int x = 0; x < ( framebufferSize - 1 ) - 1; x++ )
|
||||
cl_uchar *row =
|
||||
(cl_uchar *)&resultData[(i * 16 + y) * framebufferSize * 4];
|
||||
for (int x = 0; x < (framebufferSize - 1) - 1; x++)
|
||||
{
|
||||
if( ( row[ x * 4 ] != valA ) || ( row[ x * 4 + 1 ] != valB ) ||
|
||||
( row[ x * 4 + 2 ] != valC ) )
|
||||
if ((row[x * 4] != valA) || (row[x * 4 + 1] != valB)
|
||||
|| (row[x * 4 + 2] != valC))
|
||||
{
|
||||
log_error( "ERROR: Output framebuffer did not validate!\n" );
|
||||
DumpGLBuffer( GL_UNSIGNED_BYTE, framebufferSize, 128, resultData );
|
||||
log_error( "RUNS:\n" );
|
||||
log_error("ERROR: Output framebuffer did not validate!\n");
|
||||
DumpGLBuffer(GL_UNSIGNED_BYTE, framebufferSize, 128,
|
||||
resultData);
|
||||
log_error("RUNS:\n");
|
||||
uint32_t *p = (uint32_t *)(char *)resultData;
|
||||
size_t a = 0;
|
||||
for( size_t t = 1; t < framebufferSize * framebufferSize; t++ )
|
||||
for (size_t t = 1; t < framebufferSize * framebufferSize;
|
||||
t++)
|
||||
{
|
||||
if( p[ a ] != 0 )
|
||||
if (p[a] != 0)
|
||||
{
|
||||
if( p[ t ] == 0 )
|
||||
if (p[t] == 0)
|
||||
{
|
||||
log_error( "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n", a, t - 1,
|
||||
(int)( a % framebufferSize ), (int)( a / framebufferSize ),
|
||||
(int)( ( t - 1 ) % framebufferSize ), (int)( ( t - 1 ) / framebufferSize ),
|
||||
p[ a ] );
|
||||
log_error(
|
||||
"RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n",
|
||||
a, t - 1, (int)(a % framebufferSize),
|
||||
(int)(a / framebufferSize),
|
||||
(int)((t - 1) % framebufferSize),
|
||||
(int)((t - 1) / framebufferSize), p[a]);
|
||||
a = t;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( p[ t ] != 0 )
|
||||
if (p[t] != 0)
|
||||
{
|
||||
a = t;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
@@ -645,46 +709,56 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
int test_fence_sync(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int numElements)
|
||||
{
|
||||
GLint vs_count = 0;
|
||||
cl_device_id *device_list = NULL;
|
||||
|
||||
if( !is_extension_available( device, "cl_khr_gl_event" ) )
|
||||
if (!is_extension_available(device, "cl_khr_gl_event"))
|
||||
{
|
||||
log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" );
|
||||
log_info("NOTE: cl_khr_gl_event extension not present on this device; "
|
||||
"skipping fence sync test\n");
|
||||
return 0;
|
||||
}
|
||||
#ifdef __APPLE__
|
||||
CGLContextObj ctx = CGLGetCurrentContext();
|
||||
CGLPixelFormatObj pix = CGLGetPixelFormat(ctx);
|
||||
CGLError err = CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count);
|
||||
CGLError err =
|
||||
CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count);
|
||||
|
||||
device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*vs_count);
|
||||
clGetGLContextInfoAPPLE(context, ctx, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, sizeof(cl_device_id)*vs_count, device_list, NULL);
|
||||
device_list = (cl_device_id *)malloc(sizeof(cl_device_id) * vs_count);
|
||||
clGetGLContextInfoAPPLE(context, ctx,
|
||||
CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE,
|
||||
sizeof(cl_device_id) * vs_count, device_list, NULL);
|
||||
#else
|
||||
// Need platform specific way of getting devices from CL context to which OpenGL can render
|
||||
// If not available it can be replaced with clGetContextInfo with CL_CONTEXT_DEVICES
|
||||
// Need platform specific way of getting devices from CL context to which
|
||||
// OpenGL can render If not available it can be replaced with
|
||||
// clGetContextInfo with CL_CONTEXT_DEVICES
|
||||
size_t device_cb;
|
||||
cl_int err = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &device_cb);
|
||||
if( err != CL_SUCCESS )
|
||||
cl_int err =
|
||||
clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &device_cb);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
print_error( err, "Unable to get device count from context" );
|
||||
return -1;
|
||||
print_error(err, "Unable to get device count from context");
|
||||
return -1;
|
||||
}
|
||||
vs_count = (GLint)device_cb / sizeof(cl_device_id);
|
||||
|
||||
if (vs_count < 1) {
|
||||
log_error("No devices found.\n");
|
||||
return -1;
|
||||
if (vs_count < 1)
|
||||
{
|
||||
log_error("No devices found.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
device_list = (cl_device_id *) malloc(device_cb);
|
||||
err = clGetContextInfo( context, CL_CONTEXT_DEVICES, device_cb, device_list, NULL);
|
||||
if( err != CL_SUCCESS ) {
|
||||
free(device_list);
|
||||
print_error( err, "Unable to get device list from context" );
|
||||
return -1;
|
||||
device_list = (cl_device_id *)malloc(device_cb);
|
||||
err = clGetContextInfo(context, CL_CONTEXT_DEVICES, device_cb, device_list,
|
||||
NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
free(device_list);
|
||||
print_error(err, "Unable to get device list from context");
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -695,30 +769,38 @@ int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue q
|
||||
|
||||
// Loop through all the devices capable to OpenGL rendering
|
||||
// and set them as current rendering target
|
||||
for(rend_vs = 0; rend_vs < vs_count; rend_vs++)
|
||||
for (rend_vs = 0; rend_vs < vs_count; rend_vs++)
|
||||
{
|
||||
// Loop through all the devices and set them as current
|
||||
// compute target
|
||||
for(read_vs = 0; read_vs < vs_count; read_vs++)
|
||||
for (read_vs = 0; read_vs < vs_count; read_vs++)
|
||||
{
|
||||
cl_device_id rend_device = device_list[rend_vs], read_device = device_list[read_vs];
|
||||
cl_device_id rend_device = device_list[rend_vs],
|
||||
read_device = device_list[read_vs];
|
||||
char rend_name[200], read_name[200];
|
||||
|
||||
clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name), rend_name, NULL);
|
||||
clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name), read_name, NULL);
|
||||
clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name),
|
||||
rend_name, NULL);
|
||||
clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name),
|
||||
read_name, NULL);
|
||||
|
||||
log_info("Rendering on: %s, read back on: %s\n", rend_name, read_name);
|
||||
error = test_fence_sync_single( device, context, queue, false, rend_vs, read_vs, rend_device );
|
||||
log_info("Rendering on: %s, read back on: %s\n", rend_name,
|
||||
read_name);
|
||||
error = test_fence_sync_single(device, context, queue, false,
|
||||
rend_vs, read_vs, rend_device);
|
||||
any_failed |= error;
|
||||
if( error != 0 )
|
||||
log_error( "ERROR: Implicit syncing with GL sync events failed!\n\n" );
|
||||
if (error != 0)
|
||||
log_error(
|
||||
"ERROR: Implicit syncing with GL sync events failed!\n\n");
|
||||
else
|
||||
log_info("Implicit syncing Passed\n");
|
||||
|
||||
error = test_fence_sync_single( device, context, queue, true, rend_vs, read_vs, rend_device );
|
||||
error = test_fence_sync_single(device, context, queue, true,
|
||||
rend_vs, read_vs, rend_device);
|
||||
any_failed |= error;
|
||||
if( error != 0 )
|
||||
log_error( "ERROR: Explicit syncing with GL sync events failed!\n\n" );
|
||||
if (error != 0)
|
||||
log_error(
|
||||
"ERROR: Explicit syncing with GL sync events failed!\n\n");
|
||||
else
|
||||
log_info("Explicit syncing Passed\n");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user