mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
The maintenance of the conformance tests is moving to Github. This commit contains all the changes that have been done in Gitlab since the first public release of the conformance tests. Signed-off-by: Kevin Petit <kevin.petit@arm.com>
407 lines
16 KiB
C++
407 lines
16 KiB
C++
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "procs.h"
|
|
#include "../../test_common/harness/conversions.h"
|
|
#include "../../test_common/harness/typeWrappers.h"
|
|
#include "../../test_common/harness/errorHelpers.h"
|
|
|
|
|
|
|
|
|
|
#define DEBUG 0
|
|
#define DEPTH 16
|
|
// Limit the maximum code size for any given kernel.
|
|
#define MAX_CODE_SIZE (1024*32)
|
|
|
|
const int sizes[] = {1, 2, 3, 4, 8, 16, -1, -1, -1, -1};
|
|
const char *size_names[] = {"", "2", "3", "4", "8", "16" , "!!a", "!!b", "!!c", "!!d"};
|
|
|
|
// Creates a kernel by enumerating all possible ways of building the vector out of vloads
|
|
// skip_to_results will skip results up to a given number. If the amount of code generated
|
|
// is greater than MAX_CODE_SIZE, this function will return the number of results used,
|
|
// which can then be used as the skip_to_result value to continue where it left off.
|
|
int create_kernel(ExplicitType type, int output_size, char *program, int *number_of_results, int skip_to_result) {
|
|
|
|
int number_of_sizes;
|
|
|
|
switch (output_size) {
|
|
case 1:
|
|
number_of_sizes = 1;
|
|
break;
|
|
case 2:
|
|
number_of_sizes = 2;
|
|
break;
|
|
case 3:
|
|
number_of_sizes = 3;
|
|
break;
|
|
case 4:
|
|
number_of_sizes = 4;
|
|
break;
|
|
case 8:
|
|
number_of_sizes = 5;
|
|
break;
|
|
case 16:
|
|
number_of_sizes = 6;
|
|
break;
|
|
default:
|
|
log_error("Invalid size: %d\n", output_size);
|
|
return -1;
|
|
}
|
|
|
|
int total_results = 0;
|
|
int current_result = 0;
|
|
int total_vloads = 0;
|
|
int total_program_length = 0;
|
|
int aborted_due_to_size = 0;
|
|
|
|
if (skip_to_result < 0)
|
|
skip_to_result = 0;
|
|
|
|
// The line of code for the vector creation
|
|
char line[1024];
|
|
// Keep track of what size vector we are using in each position so we can iterate through all fo them
|
|
int pos[DEPTH];
|
|
int max_size = output_size;
|
|
if (DEBUG > 1) log_info("max_size: %d\n", max_size);
|
|
|
|
program[0] = '\0';
|
|
sprintf(program, "%s\n__kernel void test_vector_creation(__global %s *src, __global %s%s *result) {\n",
|
|
type == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
|
get_explicit_type_name(type), get_explicit_type_name(type), ( number_of_sizes == 3 ) ? "" : size_names[number_of_sizes-1]);
|
|
total_program_length += (int)strlen(program);
|
|
|
|
char storePrefix[ 128 ], storeSuffix[ 128 ];
|
|
|
|
// Start out trying sizes 1,1,1,1,1...
|
|
for (int i=0; i<DEPTH; i++)
|
|
pos[i] = 0;
|
|
|
|
int done = 0;
|
|
while (!done) {
|
|
if (DEBUG > 1) {
|
|
log_info("pos size[] = [");
|
|
for (int k=0; k<DEPTH; k++)
|
|
log_info(" %d ", pos[k]);
|
|
log_info("]\n");
|
|
}
|
|
|
|
// Go through the selected vector sizes and see if the first n of them fit the
|
|
// required size exactly.
|
|
int size_so_far = 0;
|
|
int vloads;
|
|
for ( vloads=0; vloads<DEPTH; vloads++) {
|
|
if (size_so_far + sizes[pos[vloads]] <= max_size) {
|
|
size_so_far += sizes[pos[vloads]];
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
if (DEBUG > 1) log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far);
|
|
|
|
// If they did not fit the required size exactly it is too long, so there is no point in checking any other combinations
|
|
// of the sizes to the right. Prune them from the search.
|
|
if (size_so_far != max_size) {
|
|
// Zero all the sizes to the right
|
|
for (int k=vloads+1; k<DEPTH; k++) {
|
|
pos[k] = 0;
|
|
}
|
|
// Increment this current size and propagate the values up if needed
|
|
for (int d=vloads; d>=0; d--) {
|
|
pos[d]++;
|
|
if (pos[d] >= number_of_sizes) {
|
|
pos[d] = 0;
|
|
if (d == 0) {
|
|
// If we rolled over then we are done
|
|
done = 1;
|
|
break;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
// Go on to the next size since this one (and all others "under" it) didn't fit
|
|
continue;
|
|
}
|
|
|
|
|
|
// Generate the actual load line if we are building this part
|
|
line[0]= '\0';
|
|
if (skip_to_result == 0 || total_results >= skip_to_result) {
|
|
if( number_of_sizes == 3 )
|
|
{
|
|
sprintf( storePrefix, "vstore3( " );
|
|
sprintf( storeSuffix, ", %d, result )", current_result );
|
|
}
|
|
else
|
|
{
|
|
sprintf( storePrefix, "result[%d] = ", current_result );
|
|
storeSuffix[ 0 ] = 0;
|
|
}
|
|
|
|
sprintf(line, "\t%s(%s%d)(", storePrefix, get_explicit_type_name(type), output_size);
|
|
current_result++;
|
|
|
|
int offset = 0;
|
|
for (int i=0; i<vloads; i++) {
|
|
if (pos[i] == 0)
|
|
sprintf(line + strlen(line), "src[%d]", offset);
|
|
else
|
|
sprintf(line + strlen(line), "vload%s(0,src+%d)", size_names[pos[i]], offset);
|
|
offset += sizes[pos[i]];
|
|
if (i<(vloads-1))
|
|
sprintf(line + strlen(line), ",");
|
|
}
|
|
sprintf(line + strlen(line), ")%s;\n", storeSuffix);
|
|
|
|
strcat(program, line);
|
|
total_vloads += vloads;
|
|
}
|
|
total_results++;
|
|
total_program_length += (int)strlen(line);
|
|
if (total_program_length > MAX_CODE_SIZE) {
|
|
aborted_due_to_size = 1;
|
|
done = 1;
|
|
}
|
|
|
|
|
|
if (DEBUG) log_info("line is: %s", line);
|
|
|
|
// If we did not use all of them, then we ignore any changes further to the right.
|
|
// We do this by causing those loops to skip on the next iteration.
|
|
if (vloads < DEPTH) {
|
|
if (DEBUG > 1) log_info("done with this depth\n");
|
|
for (int k=vloads; k<DEPTH; k++)
|
|
pos[k] = number_of_sizes;
|
|
}
|
|
|
|
// Increment the far right size by 1, rolling over as needed
|
|
for (int d=DEPTH-1; d>=0; d--) {
|
|
pos[d]++;
|
|
if (pos[d] >= number_of_sizes) {
|
|
pos[d] = 0;
|
|
if (d == 0) {
|
|
// If we rolled over at the far-left then we are done
|
|
done = 1;
|
|
break;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
if (done)
|
|
break;
|
|
|
|
// Continue until we are done.
|
|
}
|
|
strcat(program, "}\n\n"); //log_info("%s\n", program);
|
|
total_program_length += 3;
|
|
if (DEBUG) log_info("\t\t(Program for vector type %s%s contains %d vector creations, of total program length %gkB, with a total of %d vloads.)\n",
|
|
get_explicit_type_name(type), size_names[number_of_sizes-1], total_results, total_program_length/1024.0, total_vloads);
|
|
*number_of_results = current_result;
|
|
if (aborted_due_to_size)
|
|
return total_results;
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
|
|
int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
|
|
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16};
|
|
|
|
char *program_source;
|
|
int error;
|
|
int total_errors = 0;
|
|
|
|
cl_int input_data_int[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
|
cl_double input_data_double[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
|
void *input_data_converted;
|
|
void *output_data;
|
|
|
|
int number_of_results;;
|
|
|
|
input_data_converted = malloc(sizeof(cl_double)*16);
|
|
program_source = (char*)malloc(sizeof(char)*1024*1024*4);
|
|
|
|
// Iterate over all the types
|
|
for (int type_index=0; type_index<10; type_index++) {
|
|
if(!gHasLong && ((vecType[type_index] == kLong) || (vecType[type_index] == kULong)))
|
|
{
|
|
log_info("Long/ULong data type not supported on this device\n");
|
|
continue;
|
|
}
|
|
|
|
clMemWrapper input;
|
|
|
|
if (vecType[type_index] == kDouble) {
|
|
if (!is_extension_available(deviceID, "cl_khr_fp64")) {
|
|
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
|
|
continue;
|
|
}
|
|
log_info("Testing doubles.\n");
|
|
}
|
|
|
|
// Convert the data to the right format for the test.
|
|
memset(input_data_converted, 0xff, sizeof(cl_double)*16);
|
|
if (vecType[type_index] != kDouble) {
|
|
for (int j=0; j<16; j++) {
|
|
convert_explicit_value(&input_data_int[j], ((char*)input_data_converted)+get_explicit_type_size(vecType[type_index])*j,
|
|
kInt, 0, kRoundToEven, vecType[type_index]);
|
|
}
|
|
} else {
|
|
memcpy(input_data_converted, &input_data_double, sizeof(cl_double)*16);
|
|
}
|
|
|
|
input = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, get_explicit_type_size(vecType[type_index])*16,
|
|
(vecType[type_index] != kDouble) ? input_data_converted : input_data_double, &error);
|
|
if (error) {
|
|
print_error(error, "clCreateBuffer failed");
|
|
total_errors++;
|
|
continue;
|
|
}
|
|
|
|
// Iterate over all the vector sizes.
|
|
for (int size_index=1; size_index< 5; size_index++) {
|
|
size_t global[] = {1,1,1};
|
|
int number_generated = -1;
|
|
int previous_number_generated = 0;
|
|
|
|
log_info("Testing %s%s...\n", get_explicit_type_name(vecType[type_index]), size_names[size_index]);
|
|
while (number_generated != 0) {
|
|
clMemWrapper output;
|
|
clKernelWrapper kernel;
|
|
clProgramWrapper program;
|
|
|
|
number_generated = create_kernel(vecType[type_index], vecSizes[size_index], program_source, &number_of_results, number_generated);
|
|
if (number_generated != 0) {
|
|
if (previous_number_generated == 0)
|
|
log_info("Code size greater than %gkB; splitting test into multiple kernels.\n", MAX_CODE_SIZE/1024.0);
|
|
log_info("\tExecuting vector permutations %d to %d...\n", previous_number_generated, number_generated-1);
|
|
}
|
|
|
|
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&program_source, "test_vector_creation");
|
|
if (error) {
|
|
log_error("create_single_kernel_helper failed.\n");
|
|
total_errors++;
|
|
break;
|
|
}
|
|
|
|
output = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
|
number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index],
|
|
NULL, &error);
|
|
if (error) {
|
|
print_error(error, "clCreateBuffer failed");
|
|
total_errors++;
|
|
break;
|
|
}
|
|
|
|
error = clSetKernelArg(kernel, 0, sizeof(input), &input);
|
|
error |= clSetKernelArg(kernel, 1, sizeof(output), &output);
|
|
if (error) {
|
|
print_error(error, "clSetKernelArg failed");
|
|
total_errors++;
|
|
break;
|
|
}
|
|
|
|
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
|
|
if (error) {
|
|
print_error(error, "clEnqueueNDRangeKernel failed");
|
|
total_errors++;
|
|
break;
|
|
}
|
|
|
|
error = clFinish(queue);
|
|
if (error) {
|
|
print_error(error, "clFinish failed");
|
|
total_errors++;
|
|
break;
|
|
}
|
|
|
|
output_data = malloc(number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]);
|
|
if (output_data == NULL) {
|
|
log_error("Failed to allocate memory for output data.\n");
|
|
total_errors++;
|
|
break;
|
|
}
|
|
memset(output_data, 0xff, number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]);
|
|
error = clEnqueueReadBuffer(queue, output, CL_TRUE, 0,
|
|
number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index],
|
|
output_data, 0, NULL, NULL);
|
|
if (error) {
|
|
print_error(error, "clEnqueueReadBuffer failed");
|
|
total_errors++;
|
|
free(output_data);
|
|
break;
|
|
}
|
|
|
|
// Check the results
|
|
char *res = (char *)output_data;
|
|
char *exp = (char *)input_data_converted;
|
|
for (int i=0; i<number_of_results; i++) {
|
|
// If they do not match, then print out why
|
|
if (memcmp(input_data_converted,
|
|
res + i*(get_explicit_type_size(vecType[type_index])*vecSizes[size_index]),
|
|
get_explicit_type_size(vecType[type_index])*vecSizes[size_index])
|
|
) {
|
|
log_error("Data failed to validate for result %d\n", i);
|
|
|
|
// Find the line in the program that failed. This is ugly.
|
|
char search[32];
|
|
char found_line[1024];
|
|
found_line[0]='\0';
|
|
search[0]='\0';
|
|
sprintf(search, "result[%d] = (", i);
|
|
char *start_loc = strstr(program_source, search);
|
|
if (start_loc == NULL)
|
|
log_error("Failed to find program source for failure for %s in \n%s", search, program_source);
|
|
else {
|
|
char *end_loc = strstr(start_loc, "\n");
|
|
memcpy(&found_line, start_loc, (end_loc-start_loc));
|
|
found_line[end_loc-start_loc]='\0';
|
|
log_error("Failed vector line: %s\n", found_line);
|
|
}
|
|
|
|
for (int j=0; j<(int)vecSizes[size_index]; j++) {
|
|
char expected_value[64];
|
|
char returned_value[64];
|
|
expected_value[0]='\0';
|
|
returned_value[0]='\0';
|
|
print_type_to_string(vecType[type_index], (void*)(res+get_explicit_type_size(vecType[type_index])*(i*vecSizes[size_index]+j)), returned_value);
|
|
print_type_to_string(vecType[type_index], (void*)(exp+get_explicit_type_size(vecType[type_index])*j), expected_value);
|
|
log_error("index [%d, component %d]: got: %s expected: %s\n", i, j,
|
|
returned_value, expected_value);
|
|
}
|
|
|
|
total_errors++;
|
|
}
|
|
}
|
|
free(output_data);
|
|
previous_number_generated = number_generated;
|
|
} // number_generated != 0
|
|
|
|
} // vector sizes
|
|
} // vector types
|
|
|
|
free(input_data_converted);
|
|
free(program_source);
|
|
|
|
return total_errors;
|
|
}
|
|
|
|
|