allocations: Scale number of work-items in relation to input size (#2336)

Make the number of work-items proportional to size of the allocation so
we launch more work-items and do less work per work-item as buffer sizes
go up with device capabilities.

To test:
test_allocations multiple 5 buffer
test_allocations single 5 buffer
This commit is contained in:
Sreelakshmi Haridas Maruthur
2025-04-01 10:53:21 -06:00
committed by GitHub
parent a429ce771e
commit 78bd3ddece

View File

@@ -26,6 +26,8 @@ typedef long long unsigned llu;
#define REDUCTION_PERCENTAGE_DEFAULT 50
#define BYTES_PER_WORK_ITEM 2048ULL
int g_repetition_count = 1;
int g_reduction_percentage = REDUCTION_PERCENTAGE_DEFAULT;
int g_write_allocations = 1;
@@ -125,7 +127,7 @@ int doTest(cl_device_id device, cl_context context, cl_command_queue queue,
int number_of_mems_used;
cl_ulong max_individual_allocation_size = g_max_individual_allocation_size;
cl_ulong global_mem_size = g_global_mem_size;
unsigned int number_of_work_items = 8192 * 32;
unsigned int number_of_work_items;
const bool allocate_image =
(alloc_type != BUFFER) && (alloc_type != BUFFER_NON_BLOCKING);
@@ -183,12 +185,16 @@ int doTest(cl_device_id device, cl_context context, cl_command_queue queue,
g_reduction_percentage);
g_max_size = (size_t)((double)g_max_size
* (double)g_reduction_percentage / 100.0);
number_of_work_items = 8192 * 2;
}
// Round to nearest MB.
g_max_size &= (size_t)(0xFFFFFFFFFF00000ULL);
// Scales the number of work-items to keep the amount of bytes processed
// per work-item the same.
number_of_work_items =
std::max(g_max_size / BYTES_PER_WORK_ITEM, 8192ULL * 2ULL);
log_info("** Target allocation size (rounded to nearest MB) is: %llu bytes "
"(%gMB).\n",
llu(g_max_size), toMB(g_max_size));