From 16a75dc0af2e0c55d27a91ffefd0aa1b97b3f484 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 13 Jun 2023 17:41:39 +0200 Subject: [PATCH] Added cl_khr_fp16 extension support for test_vector_creation from basic (#1728) * Added cl_khr_fp16 extension support for vector_creation test from basic * Added corrections related to vendor's review * Added protection to avoid similar creation cases * Added comment for recent correction * cosmetics * Corrected factor array to restore lost capacity of original test.. leaving only 16-sizes vector tests limited. --- .../basic/test_vector_creation.cpp | 485 +++++++++++------- 1 file changed, 292 insertions(+), 193 deletions(-) diff --git a/test_conformance/basic/test_vector_creation.cpp b/test_conformance/basic/test_vector_creation.cpp index d9530b4e..801c72b1 100644 --- a/test_conformance/basic/test_vector_creation.cpp +++ b/test_conformance/basic/test_vector_creation.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -17,48 +17,41 @@ #include "harness/conversions.h" #include "harness/typeWrappers.h" #include "harness/errorHelpers.h" +#include - - +#include #define DEBUG 0 #define DEPTH 16 // Limit the maximum code size for any given kernel. -#define MAX_CODE_SIZE (1024*32) +#define MAX_CODE_SIZE (1024 * 32) -const int sizes[] = {1, 2, 3, 4, 8, 16, -1, -1, -1, -1}; -const char *size_names[] = {"", "2", "3", "4", "8", "16" , "!!a", "!!b", "!!c", "!!d"}; +static const int sizes[] = { 1, 2, 3, 4, 8, 16, -1, -1, -1, -1 }; +static const int initial_no_sizes[] = { 0, 0, 0, 0, 0, 0, 2 }; +static const char *size_names[] = { "", "2", "3", "4", "8", + "16", "!!a", "!!b", "!!c", "!!d" }; +static char extension[128] = { 0 }; -// Creates a kernel by enumerating all possible ways of building the vector out of vloads -// skip_to_results will skip results up to a given number. If the amount of code generated -// is greater than MAX_CODE_SIZE, this function will return the number of results used, -// which can then be used as the skip_to_result value to continue where it left off. -int create_kernel(ExplicitType type, int output_size, char *program, int *number_of_results, int skip_to_result) { +// Creates a kernel by enumerating all possible ways of building the vector out +// of vloads skip_to_results will skip results up to a given number. If the +// amount of code generated is greater than MAX_CODE_SIZE, this function will +// return the number of results used, which can then be used as the +// skip_to_result value to continue where it left off. +int create_kernel(ExplicitType type, int output_size, char *program, + int *number_of_results, int skip_to_result) +{ int number_of_sizes; - switch (output_size) { - case 1: - number_of_sizes = 1; - break; - case 2: - number_of_sizes = 2; - break; - case 3: - number_of_sizes = 3; - break; - case 4: - number_of_sizes = 4; - break; - case 8: - number_of_sizes = 5; - break; - case 16: - number_of_sizes = 6; - break; - default: - log_error("Invalid size: %d\n", output_size); - return -1; + switch (output_size) + { + case 1: number_of_sizes = 1; break; + case 2: number_of_sizes = 2; break; + case 3: number_of_sizes = 3; break; + case 4: number_of_sizes = 4; break; + case 8: number_of_sizes = 5; break; + case 16: number_of_sizes = 6; break; + default: log_error("Invalid size: %d\n", output_size); return -1; } int total_results = 0; @@ -67,102 +60,125 @@ int create_kernel(ExplicitType type, int output_size, char *program, int *number int total_program_length = 0; int aborted_due_to_size = 0; - if (skip_to_result < 0) - skip_to_result = 0; + if (skip_to_result < 0) skip_to_result = 0; // The line of code for the vector creation char line[1024]; - // Keep track of what size vector we are using in each position so we can iterate through all fo them + // Keep track of what size vector we are using in each position so we can + // iterate through all fo them int pos[DEPTH]; int max_size = output_size; if (DEBUG > 1) log_info("max_size: %d\n", max_size); program[0] = '\0'; - sprintf(program, "%s\n__kernel void test_vector_creation(__global %s *src, __global %s%s *result) {\n", - type == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name(type), get_explicit_type_name(type), ( number_of_sizes == 3 ) ? "" : size_names[number_of_sizes-1]); + sprintf(program, + "%s\n__kernel void test_vector_creation(__global %s *src, __global " + "%s%s *result) {\n", + extension, get_explicit_type_name(type), + get_explicit_type_name(type), + (number_of_sizes == 3) ? "" : size_names[number_of_sizes - 1]); total_program_length += (int)strlen(program); - char storePrefix[ 128 ], storeSuffix[ 128 ]; + char storePrefix[128], storeSuffix[128]; - // Start out trying sizes 1,1,1,1,1... - for (int i=0; i 1) { + while (!done) + { + if (DEBUG > 1) + { log_info("pos size[] = ["); - for (int k=0; k 1) log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far); + if (DEBUG > 1) + log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far); - // If they did not fit the required size exactly it is too long, so there is no point in checking any other combinations + // If they did not fit the required size exactly it is too long, so + // there is no point in checking any other combinations // of the sizes to the right. Prune them from the search. - if (size_so_far != max_size) { + if (size_so_far != max_size) + { // Zero all the sizes to the right - for (int k=vloads+1; k=0; d--) { + for (int d = vloads; d >= 0; d--) + { pos[d]++; - if (pos[d] >= number_of_sizes) { + if (pos[d] >= number_of_sizes) + { pos[d] = 0; - if (d == 0) { + if (d == 0) + { // If we rolled over then we are done done = 1; break; } - } else { + } + else + { break; } } - // Go on to the next size since this one (and all others "under" it) didn't fit + // Go on to the next size since this one (and all others "under" it) + // didn't fit continue; } // Generate the actual load line if we are building this part - line[0]= '\0'; - if (skip_to_result == 0 || total_results >= skip_to_result) { - if( number_of_sizes == 3 ) + line[0] = '\0'; + if (skip_to_result == 0 || total_results >= skip_to_result) + { + if (number_of_sizes == 3) { - sprintf( storePrefix, "vstore3( " ); - sprintf( storeSuffix, ", %d, result )", current_result ); + sprintf(storePrefix, "vstore3( "); + sprintf(storeSuffix, ", %d, result )", current_result); } else { - sprintf( storePrefix, "result[%d] = ", current_result ); - storeSuffix[ 0 ] = 0; + sprintf(storePrefix, "result[%d] = ", current_result); + storeSuffix[0] = 0; } - sprintf(line, "\t%s(%s%d)(", storePrefix, get_explicit_type_name(type), output_size); + sprintf(line, "\t%s(%s%d)(", storePrefix, + get_explicit_type_name(type), output_size); current_result++; int offset = 0; - for (int i=0; i MAX_CODE_SIZE) { + if (total_program_length > MAX_CODE_SIZE) + { aborted_due_to_size = 1; done = 1; } @@ -179,132 +196,194 @@ int create_kernel(ExplicitType type, int output_size, char *program, int *number if (DEBUG) log_info("line is: %s", line); - // If we did not use all of them, then we ignore any changes further to the right. - // We do this by causing those loops to skip on the next iteration. - if (vloads < DEPTH) { + // If we did not use all of them, then we ignore any changes further to + // the right. We do this by causing those loops to skip on the next + // iteration. + if (vloads < DEPTH) + { if (DEBUG > 1) log_info("done with this depth\n"); - for (int k=vloads; k=0; d--) { + for (int d = DEPTH - 1; d >= 0; d--) + { pos[d]++; - if (pos[d] >= number_of_sizes) { + if (pos[d] >= number_of_sizes) + { pos[d] = 0; - if (d == 0) { + if (d == 0) + { // If we rolled over at the far-left then we are done done = 1; break; } - } else { + } + else + { break; } } - if (done) - break; + if (done) break; // Continue until we are done. } - strcat(program, "}\n\n"); //log_info("%s\n", program); + strcat(program, "}\n\n"); // log_info("%s\n", program); total_program_length += 3; - if (DEBUG) log_info("\t\t(Program for vector type %s%s contains %d vector creations, of total program length %gkB, with a total of %d vloads.)\n", - get_explicit_type_name(type), size_names[number_of_sizes-1], total_results, total_program_length/1024.0, total_vloads); + if (DEBUG) + log_info( + "\t\t(Program for vector type %s%s contains %d vector creations, " + "of total program length %gkB, with a total of %d vloads.)\n", + get_explicit_type_name(type), size_names[number_of_sizes - 1], + total_results, total_program_length / 1024.0, total_vloads); *number_of_results = current_result; - if (aborted_due_to_size) - return total_results; + if (aborted_due_to_size) return total_results; return 0; } - - -int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_vector_creation(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble }; - unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16}; + const std::vector vecType = { kChar, kUChar, kShort, kUShort, + kInt, kUInt, kLong, kULong, + kFloat, kHalf, kDouble }; + // should be in sync with global array size_names + const std::vector vecSizes = { 1, 2, 3, 4, 8, 16 }; - char *program_source; - int error; + int error = CL_SUCCESS; int total_errors = 0; + int number_of_results = 0; - cl_int input_data_int[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - cl_double input_data_double[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - void *input_data_converted; - void *output_data; - - int number_of_results;; - - input_data_converted = malloc(sizeof(cl_double)*16); - program_source = (char*)malloc(sizeof(char)*1024*1024*4); + std::vector input_data_converted(sizeof(cl_double) * 16); + std::vector program_source(sizeof(char) * 1024 * 1024 * 4); + std::vector output_data; // Iterate over all the types - for (int type_index=0; type_index<10; type_index++) { - if(!gHasLong && ((vecType[type_index] == kLong) || (vecType[type_index] == kULong))) + for (int type_index = 0; type_index < vecType.size(); type_index++) { - log_info("Long/ULong data type not supported on this device\n"); - continue; - } - clMemWrapper input; - - if (vecType[type_index] == kDouble) { - if (!is_extension_available(deviceID, "cl_khr_fp64")) { - log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n"); + if (!gHasLong + && ((vecType[type_index] == kLong) + || (vecType[type_index] == kULong))) + { + log_info("Long/ULong data type not supported on this device\n"); + continue; + } + else if (vecType[type_index] == kDouble) + { + if (!is_extension_available(deviceID, "cl_khr_fp64")) + { + log_info("Extension cl_khr_fp64 not supported; skipping double " + "tests.\n"); continue; } - log_info("Testing doubles.\n"); + snprintf(extension, sizeof(extension), "%s", + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"); } + else if (vecType[type_index] == kHalf) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info("Extension cl_khr_fp16 not supported; skipping half " + "tests.\n"); + continue; + } + snprintf(extension, sizeof(extension), "%s", + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"); + } + + log_info("Testing %s.\n", get_explicit_type_name(vecType[type_index])); // Convert the data to the right format for the test. - memset(input_data_converted, 0xff, sizeof(cl_double)*16); - if (vecType[type_index] != kDouble) { - for (int j=0; j<16; j++) { - convert_explicit_value(&input_data_int[j], ((char*)input_data_converted)+get_explicit_type_size(vecType[type_index])*j, - kInt, 0, kRoundToEven, vecType[type_index]); + memset(input_data_converted.data(), 0xff, sizeof(cl_double) * 16); + if (vecType[type_index] == kDouble) + { + const cl_double input_data_double[16] = { 0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15 }; + memcpy(input_data_converted.data(), &input_data_double, + sizeof(cl_double) * 16); + } + else if (vecType[type_index] == kHalf) + { + cl_half *buf = + reinterpret_cast(input_data_converted.data()); + for (int j = 0; j < 16; j++) + buf[j] = cl_half_from_float(float(j), CL_HALF_RTE); + } + else + { + for (int j = 0; j < 16; j++) + { + convert_explicit_value( + &j, + ((char *)input_data_converted.data()) + + get_explicit_type_size(vecType[type_index]) * j, + kInt, 0, kRoundToEven, vecType[type_index]); } - } else { - memcpy(input_data_converted, &input_data_double, sizeof(cl_double)*16); } - input = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, get_explicit_type_size(vecType[type_index])*16, - (vecType[type_index] != kDouble) ? input_data_converted : input_data_double, &error); - if (error) { + clMemWrapper input = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + get_explicit_type_size(vecType[type_index]) * 16, + input_data_converted.data(), &error); + if (error) + { print_error(error, "clCreateBuffer failed"); total_errors++; continue; } // Iterate over all the vector sizes. - for (int size_index=1; size_index< 5; size_index++) { - size_t global[] = {1,1,1}; + for (int size_index = 1; size_index < vecSizes.size(); size_index++) + { + size_t global[] = { 1, 1, 1 }; int number_generated = -1; int previous_number_generated = 0; - log_info("Testing %s%s...\n", get_explicit_type_name(vecType[type_index]), size_names[size_index]); - while (number_generated != 0) { + log_info("Testing %s%s...\n", + get_explicit_type_name(vecType[type_index]), + size_names[size_index]); + while (number_generated != 0) + { clMemWrapper output; clKernelWrapper kernel; clProgramWrapper program; - number_generated = create_kernel(vecType[type_index], vecSizes[size_index], program_source, &number_of_results, number_generated); - if (number_generated != 0) { + number_generated = + create_kernel(vecType[type_index], vecSizes[size_index], + program_source.data(), &number_of_results, + number_generated); + if (number_generated != 0) + { if (previous_number_generated == 0) - log_info("Code size greater than %gkB; splitting test into multiple kernels.\n", MAX_CODE_SIZE/1024.0); - log_info("\tExecuting vector permutations %d to %d...\n", previous_number_generated, number_generated-1); + log_info("Code size greater than %gkB; splitting test " + "into multiple kernels.\n", + MAX_CODE_SIZE / 1024.0); + log_info("\tExecuting vector permutations %d to %d...\n", + previous_number_generated, number_generated - 1); } - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&program_source, "test_vector_creation"); - if (error) { + char *src = program_source.data(); + error = create_single_kernel_helper(context, &program, &kernel, + 1, (const char **)&src, + "test_vector_creation"); + if (error) + { log_error("create_single_kernel_helper failed.\n"); total_errors++; break; } - output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, - number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index], - NULL, &error); - if (error) { + output = clCreateBuffer( + context, CL_MEM_WRITE_ONLY, + number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index], + NULL, &error); + if (error) + { print_error(error, "clCreateBuffer failed"); total_errors++; break; @@ -312,95 +391,115 @@ int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_q error = clSetKernelArg(kernel, 0, sizeof(input), &input); error |= clSetKernelArg(kernel, 1, sizeof(output), &output); - if (error) { + if (error) + { print_error(error, "clSetKernelArg failed"); total_errors++; break; } - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL); - if (error) { + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, + NULL, 0, NULL, NULL); + if (error) + { print_error(error, "clEnqueueNDRangeKernel failed"); total_errors++; break; } error = clFinish(queue); - if (error) { + if (error) + { print_error(error, "clFinish failed"); total_errors++; break; } - output_data = malloc(number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]); - if (output_data == NULL) { - log_error("Failed to allocate memory for output data.\n"); - total_errors++; - break; - } - memset(output_data, 0xff, number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]); - error = clEnqueueReadBuffer(queue, output, CL_TRUE, 0, - number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index], - output_data, 0, NULL, NULL); - if (error) { + output_data.resize(number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index]); + memset(output_data.data(), 0xff, + number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index]); + error = clEnqueueReadBuffer( + queue, output, CL_TRUE, 0, + number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index], + output_data.data(), 0, NULL, NULL); + if (error) + { print_error(error, "clEnqueueReadBuffer failed"); total_errors++; - free(output_data); break; } // Check the results - char *res = (char *)output_data; - char *exp = (char *)input_data_converted; - for (int i=0; i