test allocations: restore small number of work items in case of reduction (#1932)

This commit is contained in:
Grzegorz Wawiorko
2024-05-21 17:46:14 +02:00
committed by GitHub
parent 4fceb78b93
commit b377b8537b
10 changed files with 1239 additions and 866 deletions

View File

@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -31,66 +31,86 @@ int g_multiple_allocations = 0;
int g_execute_kernel = 1;
static size_t g_max_size;
static RandomSeed g_seed( gRandomSeed );
static RandomSeed g_seed(gRandomSeed);
cl_long g_max_individual_allocation_size;
cl_long g_global_mem_size;
cl_uint checksum;
static void printUsage( const char *execName );
static void printUsage(const char *execName);
test_status init_cl( cl_device_id device ) {
test_status init_cl(cl_device_id device)
{
int error;
error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(g_max_individual_allocation_size), &g_max_individual_allocation_size, NULL );
if ( error ) {
print_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
sizeof(g_max_individual_allocation_size),
&g_max_individual_allocation_size, NULL);
if (error)
{
print_error(error,
"clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
return TEST_FAIL;
}
error = clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(g_global_mem_size), &g_global_mem_size, NULL );
if ( error ) {
print_error( error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
error =
clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,
sizeof(g_global_mem_size), &g_global_mem_size, NULL);
if (error)
{
print_error(error,
"clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
return TEST_FAIL;
}
log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
llu( g_max_individual_allocation_size ), toMB( g_max_individual_allocation_size ),
llu( g_global_mem_size ), toMB( g_global_mem_size ) );
log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), "
"CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
llu(g_max_individual_allocation_size),
toMB(g_max_individual_allocation_size), llu(g_global_mem_size),
toMB(g_global_mem_size));
if( g_global_mem_size > (cl_ulong)SIZE_MAX )
if (g_global_mem_size > (cl_ulong)SIZE_MAX)
{
g_global_mem_size = (cl_ulong)SIZE_MAX;
}
if( g_max_individual_allocation_size > g_global_mem_size )
if (g_max_individual_allocation_size > g_global_mem_size)
{
log_error( "FAILURE: CL_DEVICE_MAX_MEM_ALLOC_SIZE (%llu) is greater than the CL_DEVICE_GLOBAL_MEM_SIZE (%llu)\n",
llu( g_max_individual_allocation_size ), llu( g_global_mem_size ) );
log_error("FAILURE: CL_DEVICE_MAX_MEM_ALLOC_SIZE (%llu) is greater "
"than the CL_DEVICE_GLOBAL_MEM_SIZE (%llu)\n",
llu(g_max_individual_allocation_size),
llu(g_global_mem_size));
return TEST_FAIL;
}
// We may need to back off the global_mem_size on unified memory devices to leave room for application and operating system code
// and associated data in the working set, so we dont start pathologically paging.
// Check to see if we are a unified memory device
// We may need to back off the global_mem_size on unified memory devices to
// leave room for application and operating system code and associated data
// in the working set, so we dont start pathologically paging. Check to see
// if we are a unified memory device
cl_bool hasUnifiedMemory = CL_FALSE;
if( ( error = clGetDeviceInfo( device, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof( hasUnifiedMemory ), &hasUnifiedMemory, NULL ) ) )
if ((error = clGetDeviceInfo(device, CL_DEVICE_HOST_UNIFIED_MEMORY,
sizeof(hasUnifiedMemory), &hasUnifiedMemory,
NULL)))
{
print_error( error, "clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY");
print_error(error,
"clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY");
return TEST_FAIL;
}
// we share unified memory so back off to 1/2 the global memory size.
if( CL_TRUE == hasUnifiedMemory )
if (CL_TRUE == hasUnifiedMemory)
{
g_global_mem_size -= g_global_mem_size /2;
log_info( "Device shares memory with the host, so backing off the maximum combined allocation size to be %gMB to avoid rampant paging.\n",
toMB( g_global_mem_size ) );
g_global_mem_size -= g_global_mem_size / 2;
log_info(
"Device shares memory with the host, so backing off the maximum "
"combined allocation size to be %gMB to avoid rampant paging.\n",
toMB(g_global_mem_size));
}
else
{
// Lets just use 60% of total available memory as framework/driver may not allow using all of it
// e.g. vram on GPU is used by window server and even for this test, we need some space for context,
// queue, kernel code on GPU.
// Lets just use 60% of total available memory as framework/driver may
// not allow using all of it e.g. vram on GPU is used by window server
// and even for this test, we need some space for context, queue, kernel
// code on GPU.
g_global_mem_size *= 0.60;
}
/* Cap the allocation size as the global size was deduced */
@@ -99,15 +119,16 @@ test_status init_cl( cl_device_id device ) {
g_max_individual_allocation_size = g_global_mem_size;
}
if( gReSeed )
if (gReSeed)
{
g_seed = RandomSeed( gRandomSeed );
g_seed = RandomSeed(gRandomSeed);
}
return TEST_PASS;
}
int doTest( cl_device_id device, cl_context context, cl_command_queue queue, AllocType alloc_type )
int doTest(cl_device_id device, cl_context context, cl_command_queue queue,
AllocType alloc_type)
{
int error;
int failure_counts = 0;
@@ -116,117 +137,141 @@ int doTest( cl_device_id device, cl_context context, cl_command_queue queue, All
cl_mem mems[MAX_NUMBER_TO_ALLOCATE];
int number_of_mems_used;
cl_ulong max_individual_allocation_size = g_max_individual_allocation_size;
cl_ulong global_mem_size = g_global_mem_size ;
cl_ulong global_mem_size = g_global_mem_size;
unsigned int number_of_work_itmes = 8192 * 32;
const bool allocate_image =
(alloc_type != BUFFER) && (alloc_type != BUFFER_NON_BLOCKING);
static const char* alloc_description[] = {
"buffer(s)",
"read-only image(s)",
"write-only image(s)",
"buffer(s)",
"read-only image(s)",
"write-only image(s)",
static const char *alloc_description[] = {
"buffer(s)", "read-only image(s)", "write-only image(s)",
"buffer(s)", "read-only image(s)", "write-only image(s)",
};
// Skip image tests if we don't support images on the device
if (allocate_image && checkForImageSupport(device))
{
log_info( "Can not test image allocation because device does not support images.\n" );
log_info("Can not test image allocation because device does not "
"support images.\n");
return 0;
}
// This section was added in order to fix a bug in the test
// If CL_DEVICE_MAX_MEM_ALLOC_SIZE is much grater than CL_DEVICE_IMAGE2D_MAX_WIDTH * CL_DEVICE_IMAGE2D_MAX_HEIGHT
// The test will fail in image allocations as the size requested for the allocation will be much grater than the maximum size allowed for image
// If CL_DEVICE_MAX_MEM_ALLOC_SIZE is much grater than
// CL_DEVICE_IMAGE2D_MAX_WIDTH * CL_DEVICE_IMAGE2D_MAX_HEIGHT The test will
// fail in image allocations as the size requested for the allocation will
// be much grater than the maximum size allowed for image
if (allocate_image)
{
size_t max_width, max_height;
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( max_width ), &max_width, NULL );
test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE2D_MAX_WIDTH" );
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH,
sizeof(max_width), &max_width, NULL);
test_error_abort(
error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE2D_MAX_WIDTH");
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( max_height ), &max_height, NULL );
test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE2D_MAX_HEIGHT" );
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
sizeof(max_height), &max_height, NULL);
test_error_abort(
error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE2D_MAX_HEIGHT");
cl_ulong max_image2d_size = (cl_ulong)max_height * max_width * 4 * sizeof(cl_uint);
cl_ulong max_image2d_size =
(cl_ulong)max_height * max_width * 4 * sizeof(cl_uint);
if( max_individual_allocation_size > max_image2d_size )
if (max_individual_allocation_size > max_image2d_size)
{
max_individual_allocation_size = max_image2d_size;
}
}
// Pick the baseline size based on whether we are doing a single large or multiple allocations
g_max_size = g_multiple_allocations ? (size_t)global_mem_size : (size_t)max_individual_allocation_size;
// Pick the baseline size based on whether we are doing a single large or
// multiple allocations
g_max_size = g_multiple_allocations
? (size_t)global_mem_size
: (size_t)max_individual_allocation_size;
// Adjust based on the percentage
if( g_reduction_percentage != 100 )
if (g_reduction_percentage != 100)
{
log_info( "NOTE: reducing max allocations to %d%%.\n", g_reduction_percentage );
g_max_size = (size_t)( (double)g_max_size * (double)g_reduction_percentage / 100.0 );
log_info("NOTE: reducing max allocations to %d%%.\n",
g_reduction_percentage);
g_max_size = (size_t)((double)g_max_size
* (double)g_reduction_percentage / 100.0);
number_of_work_itmes = 8192 * 2;
}
// Round to nearest MB.
g_max_size &= (size_t)(0xFFFFFFFFFF00000ULL);
log_info( "** Target allocation size (rounded to nearest MB) is: %llu bytes (%gMB).\n", llu( g_max_size ), toMB( g_max_size ) );
log_info( "** Allocating %s to size %gMB.\n", alloc_description[alloc_type], toMB( g_max_size ) );
log_info("** Target allocation size (rounded to nearest MB) is: %llu bytes "
"(%gMB).\n",
llu(g_max_size), toMB(g_max_size));
log_info("** Allocating %s to size %gMB.\n", alloc_description[alloc_type],
toMB(g_max_size));
for( int count = 0; count < g_repetition_count; count++ )
for (int count = 0; count < g_repetition_count; count++)
{
current_test_size = g_max_size;
error = FAILED_TOO_BIG;
log_info( " => Allocation %d\n", count + 1 );
log_info(" => Allocation %d\n", count + 1);
while( ( error == FAILED_TOO_BIG ) && ( current_test_size > g_max_size / 8 ) )
while ((error == FAILED_TOO_BIG)
&& (current_test_size > g_max_size / 8))
{
// Reset our checksum for each allocation
checksum = 0;
// Do the allocation
error = allocate_size( context, &queue, device, g_multiple_allocations, current_test_size, alloc_type,
mems, &number_of_mems_used, &final_size, g_write_allocations, g_seed );
error = allocate_size(context, &queue, device,
g_multiple_allocations, current_test_size,
alloc_type, mems, &number_of_mems_used,
&final_size, g_write_allocations, g_seed);
// If we succeeded and we're supposed to execute a kernel, do so.
if( error == SUCCEEDED && g_execute_kernel )
if (error == SUCCEEDED && g_execute_kernel)
{
log_info( "\tExecuting kernel with memory objects.\n" );
error = execute_kernel( context, &queue, device, alloc_type, mems, number_of_mems_used,
g_write_allocations );
log_info("\tExecuting kernel with memory objects.\n");
error =
execute_kernel(context, &queue, device, alloc_type, mems,
number_of_mems_used, g_write_allocations,
number_of_work_itmes);
}
// If we failed to allocate more than 1/8th of the requested amount return a failure.
if( final_size < (size_t)g_max_size / 8 )
// If we failed to allocate more than 1/8th of the requested amount
// return a failure.
if (final_size < (size_t)g_max_size / 8)
{
log_error( "===> Allocation %d failed to allocate more than 1/8th of the requested size.\n", count + 1 );
log_error("===> Allocation %d failed to allocate more than "
"1/8th of the requested size.\n",
count + 1);
failure_counts++;
}
// Clean up.
for( int i = 0; i < number_of_mems_used; i++ )
for (int i = 0; i < number_of_mems_used; i++)
{
clReleaseMemObject( mems[i] );
clReleaseMemObject(mems[i]);
}
if( error == FAILED_ABORT )
if (error == FAILED_ABORT)
{
log_error( " => Allocation %d failed.\n", count + 1 );
log_error(" => Allocation %d failed.\n", count + 1);
failure_counts++;
}
if( error == FAILED_TOO_BIG )
if (error == FAILED_TOO_BIG)
{
current_test_size -= g_max_size / 16;
log_info( "\tFailed at this size; trying a smaller size of %gMB.\n", toMB( current_test_size ) );
log_info(
"\tFailed at this size; trying a smaller size of %gMB.\n",
toMB(current_test_size));
}
}
if( error == SUCCEEDED && current_test_size == g_max_size )
if (error == SUCCEEDED && current_test_size == g_max_size)
{
log_info("\tPASS: Allocation succeeded.\n");
}
else if( error == SUCCEEDED && current_test_size > g_max_size / 8 )
else if (error == SUCCEEDED && current_test_size > g_max_size / 8)
{
log_info("\tPASS: Allocation succeeded at reduced size.\n");
}
@@ -240,41 +285,47 @@ int doTest( cl_device_id device, cl_context context, cl_command_queue queue, All
return failure_counts;
}
int test_buffer(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
int test_buffer(cl_device_id device, cl_context context, cl_command_queue queue,
int num_elements)
{
return doTest( device, context, queue, BUFFER );
return doTest(device, context, queue, BUFFER);
}
int test_image2d_read(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
int test_image2d_read(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
{
return doTest( device, context, queue, IMAGE_READ );
return doTest(device, context, queue, IMAGE_READ);
}
int test_image2d_write(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
int test_image2d_write(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
{
return doTest( device, context, queue, IMAGE_WRITE );
return doTest(device, context, queue, IMAGE_WRITE);
}
int test_buffer_non_blocking(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
int test_buffer_non_blocking(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
{
return doTest( device, context, queue, BUFFER_NON_BLOCKING );
return doTest(device, context, queue, BUFFER_NON_BLOCKING);
}
int test_image2d_read_non_blocking(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
int test_image2d_read_non_blocking(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
{
return doTest( device, context, queue, IMAGE_READ_NON_BLOCKING );
return doTest(device, context, queue, IMAGE_READ_NON_BLOCKING);
}
int test_image2d_write_non_blocking(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
int test_image2d_write_non_blocking(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
{
return doTest( device, context, queue, IMAGE_WRITE_NON_BLOCKING );
return doTest(device, context, queue, IMAGE_WRITE_NON_BLOCKING);
}
test_definition test_list[] = {
ADD_TEST( buffer ),
ADD_TEST( image2d_read ),
ADD_TEST( image2d_write ),
ADD_TEST( buffer_non_blocking ),
ADD_TEST( image2d_read_non_blocking ),
ADD_TEST( image2d_write_non_blocking ),
ADD_TEST(buffer),
ADD_TEST(image2d_read),
ADD_TEST(image2d_write),
ADD_TEST(buffer_non_blocking),
ADD_TEST(image2d_read_non_blocking),
ADD_TEST(image2d_write_non_blocking),
};
const int test_num = ARRAY_SIZE( test_list );
const int test_num = ARRAY_SIZE(test_list);
int main(int argc, const char *argv[])
{
@@ -287,11 +338,11 @@ int main(int argc, const char *argv[])
return 1;
}
const char ** argList = (const char **)calloc( argc, sizeof( char*) );
const char **argList = (const char **)calloc(argc, sizeof(char *));
if( NULL == argList )
if (NULL == argList)
{
log_error( "Failed to allocate memory for argList array.\n" );
log_error("Failed to allocate memory for argList array.\n");
return 1;
}
@@ -299,38 +350,40 @@ int main(int argc, const char *argv[])
size_t argCount = 1;
// Parse arguments
for( int i = 1; i < argc; i++ )
for (int i = 1; i < argc; i++)
{
if( strcmp( argv[i], "multiple" ) == 0 )
if (strcmp(argv[i], "multiple") == 0)
g_multiple_allocations = 1;
else if( strcmp( argv[i], "single" ) == 0 )
else if (strcmp(argv[i], "single") == 0)
g_multiple_allocations = 0;
else if( ( r = (int)strtol( argv[i], &endPtr, 10 ) ) && ( endPtr != argv[i] ) && ( *endPtr == 0 ) )
else if ((r = (int)strtol(argv[i], &endPtr, 10)) && (endPtr != argv[i])
&& (*endPtr == 0))
{
// By spec, that means the entire string was an integer, so take it as a repetition count
// By spec, that means the entire string was an integer, so take it
// as a repetition count
g_repetition_count = r;
}
else if( strchr( argv[i], '%' ) != NULL )
else if (strchr(argv[i], '%') != NULL)
{
// Reduction percentage (let strtol ignore the percentage)
g_reduction_percentage = (int)strtol( argv[i], NULL, 10 );
g_reduction_percentage = (int)strtol(argv[i], NULL, 10);
}
else if( strcmp( argv[i], "do_not_force_fill" ) == 0 )
else if (strcmp(argv[i], "do_not_force_fill") == 0)
{
g_write_allocations = 0;
}
else if( strcmp( argv[i], "do_not_execute" ) == 0 )
else if (strcmp(argv[i], "do_not_execute") == 0)
{
g_execute_kernel = 0;
}
else if ( strcmp( argv[i], "--help" ) == 0 || strcmp( argv[i], "-h" ) == 0 )
else if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0)
{
printUsage( argv[0] );
printUsage(argv[0]);
free(argList);
return -1;
}
@@ -342,35 +395,42 @@ int main(int argc, const char *argv[])
}
}
int ret = runTestHarnessWithCheck( argCount, argList, test_num, test_list, false, 0, init_cl );
int ret = runTestHarnessWithCheck(argCount, argList, test_num, test_list,
false, 0, init_cl);
free(argList);
return ret;
}
void printUsage( const char *execName )
void printUsage(const char *execName)
{
const char *p = strrchr( execName, '/' );
if( p != NULL )
execName = p + 1;
const char *p = strrchr(execName, '/');
if (p != NULL) execName = p + 1;
log_info( "Usage: %s [options] [test_names]\n", execName );
log_info( "Options:\n" );
log_info( "\trandomize - Uses random seed\n" );
log_info( "\tsingle - Tests using a single allocation as large as possible\n" );
log_info( "\tmultiple - Tests using as many allocations as possible\n" );
log_info( "\n" );
log_info( "\tnumReps - Optional integer specifying the number of repetitions to run and average the result (defaults to 1)\n" );
log_info( "\treduction%% - Optional integer, followed by a %% sign, that acts as a multiplier for the target amount of memory.\n" );
log_info( "\t Example: target amount of 512MB and a reduction of 75%% will result in a target of 384MB.\n" );
log_info( "\n" );
log_info( "\tdo_not_force_fill - Disable explicitly write data to all memory objects after creating them.\n" );
log_info( "\t Without this, the kernel execution can not verify its checksum.\n" );
log_info( "\tdo_not_execute - Disable executing a kernel that accesses all of the memory objects.\n" );
log_info( "\n" );
log_info( "Test names (Allocation Types):\n" );
for( int i = 0; i < test_num; i++ )
log_info("Usage: %s [options] [test_names]\n", execName);
log_info("Options:\n");
log_info("\trandomize - Uses random seed\n");
log_info(
"\tsingle - Tests using a single allocation as large as possible\n");
log_info("\tmultiple - Tests using as many allocations as possible\n");
log_info("\n");
log_info("\tnumReps - Optional integer specifying the number of "
"repetitions to run and average the result (defaults to 1)\n");
log_info("\treduction%% - Optional integer, followed by a %% sign, that "
"acts as a multiplier for the target amount of memory.\n");
log_info("\t Example: target amount of 512MB and a reduction "
"of 75%% will result in a target of 384MB.\n");
log_info("\n");
log_info("\tdo_not_force_fill - Disable explicitly write data to all "
"memory objects after creating them.\n");
log_info("\t Without this, the kernel execution can not "
"verify its checksum.\n");
log_info("\tdo_not_execute - Disable executing a kernel that accesses all "
"of the memory objects.\n");
log_info("\n");
log_info("Test names (Allocation Types):\n");
for (int i = 0; i < test_num; i++)
{
log_info( "\t%s\n", test_list[i].name );
log_info("\t%s\n", test_list[i].name);
}
}