// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "testBase.h" #include "allocation_functions.h" #include "allocation_fill.h" #include "allocation_execute.h" #include "harness/testHarness.h" #include "harness/parseParameters.h" #include typedef long long unsigned llu; #define REDUCTION_PERCENTAGE_DEFAULT 50 #define BYTES_PER_WORK_ITEM 2048ULL int g_repetition_count = 1; int g_reduction_percentage = REDUCTION_PERCENTAGE_DEFAULT; int g_write_allocations = 1; int g_multiple_allocations = 0; int g_execute_kernel = 1; static size_t g_max_size; static RandomSeed g_seed(gRandomSeed); cl_long g_max_individual_allocation_size; cl_long g_global_mem_size; cl_uint checksum; static void printUsage(const char *execName); test_status init_cl(cl_device_id device) { int error; g_max_individual_allocation_size = get_device_info_max_mem_alloc_size(device); g_global_mem_size = get_device_info_global_mem_size(device); log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), " "CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n", llu(g_max_individual_allocation_size), toMB(g_max_individual_allocation_size), llu(g_global_mem_size), toMB(g_global_mem_size)); if (g_global_mem_size > (cl_ulong)SIZE_MAX) { g_global_mem_size = (cl_ulong)SIZE_MAX; } if (g_max_individual_allocation_size > g_global_mem_size) { log_error("FAILURE: CL_DEVICE_MAX_MEM_ALLOC_SIZE (%llu) is greater " "than the CL_DEVICE_GLOBAL_MEM_SIZE (%llu)\n", llu(g_max_individual_allocation_size), llu(g_global_mem_size)); return TEST_FAIL; } // We may need to back off the global_mem_size on unified memory devices to // leave room for application and operating system code and associated data // in the working set, so we dont start pathologically paging. Check to see // if we are a unified memory device cl_bool hasUnifiedMemory = CL_FALSE; if ((error = clGetDeviceInfo(device, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(hasUnifiedMemory), &hasUnifiedMemory, NULL))) { print_error(error, "clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY"); return TEST_FAIL; } // we share unified memory so back off to 1/2 the global memory size. if (CL_TRUE == hasUnifiedMemory) { g_global_mem_size -= g_global_mem_size / 2; log_info( "Device shares memory with the host, so backing off the maximum " "combined allocation size to be %gMB to avoid rampant paging.\n", toMB(g_global_mem_size)); } else { // Lets just use 60% of total available memory as framework/driver may // not allow using all of it e.g. vram on GPU is used by window server // and even for this test, we need some space for context, queue, kernel // code on GPU. g_global_mem_size *= 0.60; } /* Cap the allocation size as the global size was deduced */ if (g_max_individual_allocation_size > g_global_mem_size) { g_max_individual_allocation_size = g_global_mem_size; } if (gReSeed) { g_seed = RandomSeed(gRandomSeed); } return TEST_PASS; } int doTest(cl_device_id device, cl_context context, cl_command_queue queue, AllocType alloc_type) { int error; int failure_counts = 0; size_t final_size; size_t current_test_size; cl_mem mems[MAX_NUMBER_TO_ALLOCATE]; int number_of_mems_used; cl_ulong max_individual_allocation_size = g_max_individual_allocation_size; cl_ulong global_mem_size = g_global_mem_size; unsigned int number_of_work_items; const bool allocate_image = (alloc_type != BUFFER) && (alloc_type != BUFFER_NON_BLOCKING); static const char *alloc_description[] = { "buffer(s)", "read-only image(s)", "write-only image(s)", "buffer(s)", "read-only image(s)", "write-only image(s)", }; // Skip image tests if we don't support images on the device if (allocate_image && checkForImageSupport(device)) { log_info("Can not test image allocation because device does not " "support images.\n"); return 0; } // This section was added in order to fix a bug in the test // If CL_DEVICE_MAX_MEM_ALLOC_SIZE is much grater than // CL_DEVICE_IMAGE2D_MAX_WIDTH * CL_DEVICE_IMAGE2D_MAX_HEIGHT The test will // fail in image allocations as the size requested for the allocation will // be much grater than the maximum size allowed for image if (allocate_image) { size_t max_width, max_height; error = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_width), &max_width, NULL); test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE2D_MAX_WIDTH"); error = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_height), &max_height, NULL); test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE2D_MAX_HEIGHT"); cl_ulong max_image2d_size = (cl_ulong)max_height * max_width * 4 * sizeof(cl_uint); if (max_individual_allocation_size > max_image2d_size) { max_individual_allocation_size = max_image2d_size; } } // Pick the baseline size based on whether we are doing a single large or // multiple allocations g_max_size = g_multiple_allocations ? (size_t)global_mem_size : (size_t)max_individual_allocation_size; // Adjust based on the percentage if (g_reduction_percentage != 100) { log_info("NOTE: reducing max allocations to %d%%.\n", g_reduction_percentage); g_max_size = (size_t)((double)g_max_size * (double)g_reduction_percentage / 100.0); } // Round to nearest MB. g_max_size &= (size_t)(0xFFFFFFFFFF00000ULL); // Scales the number of work-items to keep the amount of bytes processed // per work-item the same. number_of_work_items = std::max(g_max_size / BYTES_PER_WORK_ITEM, 8192ULL * 2ULL); log_info("** Target allocation size (rounded to nearest MB) is: %llu bytes " "(%gMB).\n", llu(g_max_size), toMB(g_max_size)); log_info("** Allocating %s to size %gMB.\n", alloc_description[alloc_type], toMB(g_max_size)); for (int count = 0; count < g_repetition_count; count++) { current_test_size = g_max_size; error = FAILED_TOO_BIG; log_info(" => Allocation %d\n", count + 1); while ((error == FAILED_TOO_BIG) && (current_test_size > g_max_size / 8)) { // Reset our checksum for each allocation checksum = 0; // Do the allocation error = allocate_size(context, &queue, device, g_multiple_allocations, current_test_size, alloc_type, mems, &number_of_mems_used, &final_size, g_write_allocations, g_seed); // If we succeeded and we're supposed to execute a kernel, do so. if (error == SUCCEEDED && g_execute_kernel) { log_info("\tExecuting kernel with memory objects.\n"); error = execute_kernel(context, &queue, device, alloc_type, mems, number_of_mems_used, g_write_allocations, number_of_work_items); } // If we failed to allocate more than 1/8th of the requested amount // return a failure. if (final_size < (size_t)g_max_size / 8) { log_error("===> Allocation %d failed to allocate more than " "1/8th of the requested size.\n", count + 1); failure_counts++; } // Clean up. for (int i = 0; i < number_of_mems_used; i++) { clReleaseMemObject(mems[i]); } if (error == FAILED_ABORT) { log_error(" => Allocation %d failed.\n", count + 1); failure_counts++; } if (error == FAILED_TOO_BIG) { current_test_size -= g_max_size / 16; log_info( "\tFailed at this size; trying a smaller size of %gMB.\n", toMB(current_test_size)); } } if (error == SUCCEEDED && current_test_size == g_max_size) { log_info("\tPASS: Allocation succeeded.\n"); } else if (error == SUCCEEDED && current_test_size > g_max_size / 8) { log_info("\tPASS: Allocation succeeded at reduced size.\n"); } else { log_error("\tFAIL: Allocation failed.\n"); failure_counts++; } } return failure_counts; } REGISTER_TEST(buffer) { return doTest(device, context, queue, BUFFER); } REGISTER_TEST(image2d_read) { return doTest(device, context, queue, IMAGE_READ); } REGISTER_TEST(image2d_write) { return doTest(device, context, queue, IMAGE_WRITE); } REGISTER_TEST(buffer_non_blocking) { return doTest(device, context, queue, BUFFER_NON_BLOCKING); } REGISTER_TEST(image2d_read_non_blocking) { return doTest(device, context, queue, IMAGE_READ_NON_BLOCKING); } REGISTER_TEST(image2d_write_non_blocking) { return doTest(device, context, queue, IMAGE_WRITE_NON_BLOCKING); } int main(int argc, const char *argv[]) { char *endPtr; int r; argc = parseCustomParam(argc, argv); if (argc == -1) { return 1; } const char **argList = (const char **)calloc(argc, sizeof(char *)); if (NULL == argList) { log_error("Failed to allocate memory for argList array.\n"); return 1; } argList[0] = argv[0]; size_t argCount = 1; // Parse arguments for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "multiple") == 0) g_multiple_allocations = 1; else if (strcmp(argv[i], "single") == 0) g_multiple_allocations = 0; else if ((r = (int)strtol(argv[i], &endPtr, 10)) && (endPtr != argv[i]) && (*endPtr == 0)) { // By spec, that means the entire string was an integer, so take it // as a repetition count g_repetition_count = r; } else if (strchr(argv[i], '%') != NULL) { // Reduction percentage (let strtol ignore the percentage) g_reduction_percentage = (int)strtol(argv[i], NULL, 10); } else if (strcmp(argv[i], "do_not_force_fill") == 0) { g_write_allocations = 0; } else if (strcmp(argv[i], "do_not_execute") == 0) { g_execute_kernel = 0; } else if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) { printUsage(argv[0]); free(argList); return -1; } else { argList[argCount] = argv[i]; argCount++; } } int ret = runTestHarnessWithCheck( argCount, argList, test_registry::getInstance().num_tests(), test_registry::getInstance().definitions(), false, 0, init_cl); free(argList); return ret; } void printUsage(const char *execName) { const char *p = strrchr(execName, '/'); if (p != NULL) execName = p + 1; log_info("Usage: %s [options] [test_names]\n", execName); log_info("Options:\n"); log_info("\trandomize - Uses random seed\n"); log_info( "\tsingle - Tests using a single allocation as large as possible\n"); log_info("\tmultiple - Tests using as many allocations as possible\n"); log_info("\n"); log_info("\tnumReps - Optional integer specifying the number of " "repetitions to run and average the result (defaults to 1)\n"); log_info("\treduction%% - Optional integer, followed by a %% sign, that " "acts as a multiplier for the target amount of memory.\n"); log_info("\t Example: target amount of 512MB and a reduction " "of 75%% will result in a target of 384MB.\n"); log_info("\n"); log_info("\tdo_not_force_fill - Disable explicitly write data to all " "memory objects after creating them.\n"); log_info("\t Without this, the kernel execution can not " "verify its checksum.\n"); log_info("\tdo_not_execute - Disable executing a kernel that accesses all " "of the memory objects.\n"); log_info("\n"); log_info("Test names (Allocation Types):\n"); for (int i = 0; i < test_registry::getInstance().num_tests(); i++) { log_info("\t%s\n", test_registry::getInstance().definitions()[i].name); } }