From c4b16940e4c541da70210eb575e0ec4afb8b9a0f Mon Sep 17 00:00:00 2001 From: Neoming <33822653+neoming@users.noreply.github.com> Date: Tue, 8 Jul 2025 07:34:05 +0800 Subject: [PATCH 01/61] test_subgroups: Update local workgroup size to generate desired input and verify result (#2382) The `local_workgroup_size` is changed by `get_max_common_work_group_size`. But the input data still use the original `local_workgroup_size`, which will cause the check logic to failed. --- test_conformance/subgroups/subhelpers.h | 1 + 1 file changed, 1 insertion(+) diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index ab8ee797..f234eef4 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -1609,6 +1609,7 @@ template struct subgroup_test // Generate the desired input for the kernel test_params.subgroup_size = subgroup_size; + test_params.local_workgroup_size = local; Fns::gen(idata.data(), mapin.data(), sgmap.data(), test_params); test_status status = TEST_FAIL; From 3065a62f77a756d7c12ca94c46280e67e31413d6 Mon Sep 17 00:00:00 2001 From: Oskar Hubert Weber Date: Tue, 8 Jul 2025 01:34:44 +0200 Subject: [PATCH 02/61] Fix memory leaks in test_profiling execute_multipass (#2390) - Fixed leaks caused by missing clRelease calls - Improved error handling readability Signed-off-by: Oskar Hubert Weber [oskar.hubert.weber@intel.com](mailto:oskar.hubert.weber@intel.com) --------- Signed-off-by: Oskar Hubert Weber --- .../profiling/execute_multipass.cpp | 154 +++++------------- 1 file changed, 39 insertions(+), 115 deletions(-) diff --git a/test_conformance/profiling/execute_multipass.cpp b/test_conformance/profiling/execute_multipass.cpp index d3532ceb..7d654ca5 100644 --- a/test_conformance/profiling/execute_multipass.cpp +++ b/test_conformance/profiling/execute_multipass.cpp @@ -24,6 +24,7 @@ #include "procs.h" #include "harness/testHarness.h" +#include "harness/typeWrappers.h" #include "harness/errorHelpers.h" static const char *read3d_kernel_code = @@ -90,11 +91,11 @@ static int verifyImages( cl_uchar *ptr0, cl_uchar *ptr1, cl_uchar tolerance, int static int run_kernel( cl_device_id device, cl_context context, cl_command_queue queue, int w, int h, int d, int nChannels, cl_uchar *inptr, cl_uchar *outptr ) { - cl_program program[1]; - cl_kernel kernel[1]; - cl_mem memobjs[2]; + clProgramWrapper program; + clKernelWrapper kernel; + clMemWrapper memobjs[2]; cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 }; - cl_event executeEvent = NULL; + clEventWrapper executeEvent = NULL; cl_ulong queueStart, submitStart, writeStart, writeEnd; size_t threads[3]; size_t localThreads[3]; @@ -108,18 +109,11 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3 * sizeof(size_t), (size_t *)localThreads, NULL); - if (err) - { - log_error("clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES) failed\n"); - return -1; - } + test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES) failed"); err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &maxWorkgroupSize, NULL); - if (err) - { - log_error("clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed\n"); - return -1; - } + test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed"); + localThreads[0] = std::min({ localThreads[0], threads[0], maxWorkgroupSize }); localThreads[1] = std::min( @@ -128,121 +122,65 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue std::min({ localThreads[2], threads[2], maxWorkgroupSize / (localThreads[0] * localThreads[1]) }); - cl_sampler sampler = clCreateSampler( context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err ); - if( err ){ - log_error( " clCreateSampler failed.\n" ); - return -1; - } + clSamplerWrapper sampler = clCreateSampler( + context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err); + test_error(err, "clCreateSampler failed"); // allocate the input and output image memory objects memobjs[0] = create_image_3d(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &image_format_desc, w, h, d, 0, 0, inptr, &err); - if( memobjs[0] == (cl_mem)0 ){ - log_error( " unable to create 2D image using create_image_2d\n" ); - return -1; - } + test_error(err, "unable to create 3D image using create_image_3d"); // allocate an array memory object to load the filter weights size_t outptr_size = sizeof(cl_uchar) * w * h * d * nChannels; memobjs[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, outptr_size, NULL, &err); - if( memobjs[1] == (cl_mem)0 ){ - log_error( " unable to create array using clCreateBuffer\n" ); - clReleaseMemObject( memobjs[0] ); - return -1; - } + test_error(err, "unable to create array using clCreateBuffer"); // create the compute program - err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &read3d_kernel_code, "read3d" ); - if( err ){ - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } - + err = create_single_kernel_helper(context, &program, &kernel, 1, + &read3d_kernel_code, "read3d"); + test_error(err, "create_single_kernel_helper failed"); // create kernel args object and set arg values. // set the args values - err |= clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[0] ); - err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[1] ); - err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler); + err |= clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobjs[0]); + err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&memobjs[1]); + err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler); + test_error(err, "clSetKernelArg failed"); - if( err != CL_SUCCESS ){ - print_error( err, "clSetKernelArg failed\n" ); - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } - - err = clEnqueueNDRangeKernel( queue, kernel[0], 3, NULL, threads, localThreads, 0, NULL, &executeEvent ); - - if( err != CL_SUCCESS ){ - print_error( err, "clEnqueueNDRangeKernel failed\n" ); - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } + err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, localThreads, + 0, NULL, &executeEvent); + test_error(err, "clEnqueueNDRangeKernel failed"); if (executeEvent) { // This synchronization point is needed in order to assume the data is valid. // Getting profiling information is not a synchronization point. err = clWaitForEvents( 1, &executeEvent ); - if( err != CL_SUCCESS ) - { - print_error( err, "clWaitForEvents failed\n" ); - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } + test_error(err, "clWaitForEvents failed"); // test profiling - while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE ); - if( err != CL_SUCCESS ){ - print_error( err, "clGetEventProfilingInfo failed" ); - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } + while ((err = clGetEventProfilingInfo( + executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), + &queueStart, NULL)) + == CL_PROFILING_INFO_NOT_AVAILABLE) + ; + test_error(err, "clGetEventProfilingInfo failed"); - while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE ); - if( err != CL_SUCCESS ){ - print_error( err, "clGetEventProfilingInfo failed" ); - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } + while ((err = clGetEventProfilingInfo( + executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), + &submitStart, NULL)) + == CL_PROFILING_INFO_NOT_AVAILABLE) + ; + test_error(err, "clGetEventProfilingInfo failed"); err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL ); - if( err != CL_SUCCESS ){ - print_error( err, "clGetEventProfilingInfo failed" ); - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } + test_error(err, "clGetEventProfilingInfo failed"); err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL ); - if( err != CL_SUCCESS ){ - print_error( err, "clGetEventProfilingInfo failed" ); - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } + test_error(err, "clGetEventProfilingInfo failed"); log_info( "Profiling info:\n" ); log_info( "Time from queue to start of clEnqueueNDRangeKernel: %f seconds\n", (double)(writeStart - queueStart) / 1000000000000.f ); @@ -252,23 +190,9 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue // read output image err = clEnqueueReadBuffer(queue, memobjs[1], CL_TRUE, 0, outptr_size, outptr, 0, NULL, NULL); - if( err != CL_SUCCESS ){ - print_error( err, "clReadImage failed\n" ); - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } - - // release kernel, program, and memory objects - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); + test_error(err, "clReadImage failed"); return err; - } // end run_kernel() From 5997a00b2f048670c52b07830c72bad6be546cab Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 8 Jul 2025 00:35:20 +0100 Subject: [PATCH 03/61] Test releasing a command-buffer after submission but before execution has finished (#2414) Add cl_khr_command_buffer test that is it valid to release a command-buffer after it has been enqueued but before execution is finished. This stresses the semantics from [clReleaseCommandBufferKHR](https://registry.khronos.org/OpenCL/sdk/3.0/docs/man/html/clReleaseCommandBufferKHR.html#_description) that: "After the command_buffer reference count becomes zero **and has finished execution**, the command-buffer is deleted" --- .../basic_command_buffer.cpp | 37 +++++++++++++++++++ .../basic_command_buffer.h | 9 +++++ .../basic_command_buffer_tests.cpp | 6 +++ 3 files changed, 52 insertions(+) diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp index 43926b84..9c3a402b 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp @@ -435,3 +435,40 @@ bool InterleavedEnqueueTest::Skip() { return BasicCommandBufferTest::Skip() || !simultaneous_use_support; } + +cl_int EnqueueAndReleaseTest::Run() +{ + cl_int error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + cl_int pattern = 42; + error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0, + data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, nullptr, + nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Calls release on cl_command_buffer_khr handle inside wrapper class, and + // sets the handle to nullptr, so that release doesn't get called again at + // end of test when wrapper object is destroyed. + command_buffer.reset(); + + std::vector output_data(num_elements); + error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), + output_data.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + } + + return CL_SUCCESS; +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h index 7ad7d28d..241a08c5 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h @@ -128,6 +128,15 @@ struct InterleavedEnqueueTest : public BasicCommandBufferTest bool Skip() override; }; +// Test releasing a command-buffer after it has been submitted for execution, +// but before the user has waited on completion of the enqueue. +struct EnqueueAndReleaseTest : public BasicCommandBufferTest +{ + using BasicCommandBufferTest::BasicCommandBufferTest; + + cl_int Run() override; +}; + template int MakeAndRunTest(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer_tests.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer_tests.cpp index 0f95372a..69d554c4 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer_tests.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer_tests.cpp @@ -44,3 +44,9 @@ REGISTER_TEST(explicit_flush) return MakeAndRunTest(device, context, queue, num_elements); } + +REGISTER_TEST(enqueue_and_release) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} From 09f43ca9160349d8cac8c865f1e24006ec0b63d0 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Tue, 8 Jul 2025 17:59:08 +0100 Subject: [PATCH 04/61] Avoid some undefined behavior in test_bruteforce. (#2400) * Ulp_Error*: ilogb(reference) - 1 may overflow if reference is zero. * binary_i_double Test: DoubleFromUInt32's result is a cl_double and the attempt is to store it as a cl_double, but p was defined as a pointer to cl_ulong, resulting in an unintended implicit conversion that is not valid for out-of-range doubles. * exp2, tanpi: ensure early exit for NaN. * shift_right_sticky_128: avoid out-of-range shift if shift value is exactly 64. * scalbn: e += n may overflow if n is large, move it after the check for large n. --- test_common/harness/errorHelpers.cpp | 9 +++------ .../math_brute_force/binary_i_double.cpp | 5 ++--- test_conformance/math_brute_force/main.cpp | 3 +-- .../math_brute_force/reference_math.cpp | 15 +++++++++------ 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/test_common/harness/errorHelpers.cpp b/test_common/harness/errorHelpers.cpp index fe65f0cc..b367555a 100644 --- a/test_common/harness/errorHelpers.cpp +++ b/test_common/harness/errorHelpers.cpp @@ -387,8 +387,7 @@ static float Ulp_Error_Half_Float(float test, double reference) } // reference is a normal power of two or a zero - int ulp_exp = - HALF_MANT_DIG - 1 - std::max(ilogb(reference) - 1, HALF_MIN_EXP - 1); + int ulp_exp = HALF_MANT_DIG - std::max(ilogb(reference), HALF_MIN_EXP); // Scale the exponent of the error return (float)scalbn(testVal - reference, ulp_exp); @@ -469,8 +468,7 @@ float Ulp_Error(float test, double reference) // reference is a normal power of two or a zero // The unbiased exponent of the ulp unit place - int ulp_exp = - FLT_MANT_DIG - 1 - std::max(ilogb(reference) - 1, FLT_MIN_EXP - 1); + int ulp_exp = FLT_MANT_DIG - std::max(ilogb(reference), FLT_MIN_EXP); // Scale the exponent of the error return (float)scalbn(testVal - reference, ulp_exp); @@ -553,8 +551,7 @@ float Ulp_Error_Double(double test, long double reference) // reference is a normal power of two or a zero // The unbiased exponent of the ulp unit place - int ulp_exp = - DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1); + int ulp_exp = DBL_MANT_DIG - std::max(ilogbl(reference), DBL_MIN_EXP); // Scale the exponent of the error float result = (float)scalbnl(testVal - reference, ulp_exp); diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp index 4428b422..d8c8ad5c 100644 --- a/test_conformance/math_brute_force/binary_i_double.cpp +++ b/test_conformance/math_brute_force/binary_i_double.cpp @@ -248,7 +248,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) } // Init input array - cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements; + cl_double *p = (cl_double *)gIn + thread_id * buffer_elements; cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements; size_t idx = 0; int totalSpecialValueCount = specialValuesCount * specialValuesIntCount; @@ -257,7 +257,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) // Test edge cases if (job_id <= (cl_uint)lastSpecialJobIndex) { - cl_double *fp = (cl_double *)p; cl_int *ip2 = (cl_int *)p2; uint32_t x, y; @@ -266,7 +265,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) for (; idx < buffer_elements; idx++) { - fp[idx] = specialValues[x]; + p[idx] = specialValues[x]; ip2[idx] = specialValuesInt[y]; if (++x >= specialValuesCount) { diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index f0f2a4b6..6b72f326 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -1330,8 +1330,7 @@ float Bruteforce_Ulp_Error_Double(double test, long double reference) // reference is a normal power of two or a zero // The unbiased exponent of the ulp unit place - int ulp_exp = - DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1); + int ulp_exp = DBL_MANT_DIG - std::max(ilogbl(reference), DBL_MIN_EXP); // allow correctly rounded results to pass through unmolested. (We might add // error to it below.) There is something of a performance optimization here diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp index 4d312c1e..45dd6526 100644 --- a/test_conformance/math_brute_force/reference_math.cpp +++ b/test_conformance/math_brute_force/reference_math.cpp @@ -721,9 +721,9 @@ double reference_tanpi(double x) double z = reference_fabs(x); // if big and even -- caution: only works if x only has single precision - if (z >= HEX_DBL(+, 1, 0, +, 24)) + if (!(z < HEX_DBL(+, 1, 0, +, 24))) { - if (z == INFINITY) return x - x; // nan + if (!isfinite(z)) return x - x; // nan return reference_copysign( 0.0, x); // tanpi ( n ) is copysign( 0.0, n) for even integers n. @@ -1223,6 +1223,8 @@ double reference_relaxed_exp2(double x) { return reference_exp2(x); } double reference_exp2(double x) { // Note: only suitable for verifying single precision. Doesn't have range of a // full double exp2 implementation. + if (isnan(x)) return x; + if (x == 0.0) return 1.0; // separate x into fractional and integer parts @@ -2781,7 +2783,7 @@ static inline void shift_right_sticky_128(cl_ulong *hi, cl_ulong *lo, int shift) sticky |= (0 != l); l = 0; } - else + else if (shift > 0) { sticky |= (0 != (l << (64 - shift))); l >>= shift; @@ -3088,9 +3090,9 @@ long double reference_tanpil(long double x) long double z = reference_fabsl(x); // if big and even -- caution: only works if x only has single precision - if (z >= HEX_LDBL(+, 1, 0, +, 53)) + if (!(z < HEX_LDBL(+, 1, 0, +, 53))) { - if (z == INFINITY) return x - x; // nan + if (!isfinite(z)) return x - x; // nan return reference_copysignl( 0.0L, x); // tanpi ( n ) is copysign( 0.0, n) for even integers n. @@ -5027,8 +5029,9 @@ static double reference_scalbn(double x, int n) u.d -= 1.0; e = (int)((u.l & 0x7ff0000000000000LL) >> 52) - 1022; } + if (n >= 2098) return reference_copysign(INFINITY, x); e += n; - if (e >= 2047 || n >= 2098) return reference_copysign(INFINITY, x); + if (e >= 2047) return reference_copysign(INFINITY, x); if (e < -51 || n < -2097) return reference_copysign(0.0, x); if (e <= 0) { From 08738a6954024dbf276288b4f81be9312ea890f6 Mon Sep 17 00:00:00 2001 From: Ahmed <36049290+AhmedAmraniAkdi@users.noreply.github.com> Date: Tue, 8 Jul 2025 17:59:47 +0100 Subject: [PATCH 05/61] Add cl_khr_unified_svm to the list of known extensions in the compiler defines for extension test. (#2433) Adds cl_khr_unified_svm to the list of known extensions in the compiler defines for extension test. --- .../compiler/test_compiler_defines_for_extensions.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index baede608..067ee8ed 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -95,7 +95,8 @@ const char *known_extensions[] = { "cl_khr_command_buffer", "cl_khr_command_buffer_mutable_dispatch", "cl_khr_command_buffer_multi_device", - "cl_khr_external_memory_android_hardware_buffer" + "cl_khr_external_memory_android_hardware_buffer", + "cl_khr_unified_svm" }; // clang-format on From 933874f07022fefa0d476516cfa9be54851bc204 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 15 Jul 2025 09:00:20 -0700 Subject: [PATCH 06/61] allow specifying CL_DEVICE_TYPE_ALL as the harness device type (#2421) Currently, selecting a different device in a platform to test is rather cumbersome, for two reasons: 1. The default device type tested is the "default" device and there is at most one default device in a platform. This means that, by itself, choosing any non-zero device index is by definition out-of-range: ```sh $ CL_PLATFORM_INDEX=1 CL_DEVICE_INDEX=1 ./test_conformance/basic/test_basic Initializing random seed to 0. Requesting Default device based on command line for platform index 1 and device index 1 device index out of range -- choosen_device_index (1) >= num_devices (1) ``` 2. To choose a non-default device type you therefore need to explicitly specify another device type also, but "all" is not a valid device type in the harness. This means that you need to know both the device type and the index of the device within that device type to choose the device to test. ```sh $ CL_DEVICE_TYPE=all CL_PLATFORM_INDEX=1 CL_DEVICE_INDEX=1 ./test_conformance/basic/test_basic Unknown CL_DEVICE_TYPE env variable setting: all. Aborting... Aborted (core dumped) ``` This PR aims to fix (2), by allowing "all" as a device type. In the future, we could consider making the default device type "all" vs. "default", which would fix (1) also, but that will likely need more discussion and should be done in a separate PR. --- test_common/harness/testHarness.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp index df54a35d..611d0b32 100644 --- a/test_common/harness/testHarness.cpp +++ b/test_common/harness/testHarness.cpp @@ -197,8 +197,11 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, if (env_mode != NULL) { based_on_env_var = 1; - if (strcmp(env_mode, "gpu") == 0 - || strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0) + if (strcmp(env_mode, "all") == 0 + || strcmp(env_mode, "CL_DEVICE_TYPE_ALL") == 0) + device_type = CL_DEVICE_TYPE_ALL; + else if (strcmp(env_mode, "gpu") == 0 + || strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0) device_type = CL_DEVICE_TYPE_GPU; else if (strcmp(env_mode, "cpu") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_CPU") == 0) @@ -271,7 +274,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, "(default 0).\n"); log_info("\tid\t\tIndicates device at index should be used " "(default 0).\n"); - log_info("\t\tcpu|gpu|accelerator| " + log_info("\t\tall|cpu|gpu|accelerator| " "(default CL_DEVICE_TYPE_DEFAULT)\n"); log_info("\n"); log_info("\tNOTE: You may pass environment variable " @@ -320,8 +323,14 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, /* Do we have a CPU/GPU specification? */ if (argc > 1) { - if (strcmp(argv[argc - 1], "gpu") == 0 - || strcmp(argv[argc - 1], "CL_DEVICE_TYPE_GPU") == 0) + if (strcmp(argv[argc - 1], "all") == 0 + || strcmp(argv[argc - 1], "CL_DEVICE_TYPE_ALL") == 0) + { + device_type = CL_DEVICE_TYPE_ALL; + argc--; + } + else if (strcmp(argv[argc - 1], "gpu") == 0 + || strcmp(argv[argc - 1], "CL_DEVICE_TYPE_GPU") == 0) { device_type = CL_DEVICE_TYPE_GPU; argc--; @@ -376,6 +385,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, switch (device_type) { + case CL_DEVICE_TYPE_ALL: log_info("Requesting any device "); break; case CL_DEVICE_TYPE_GPU: log_info("Requesting GPU device "); break; case CL_DEVICE_TYPE_CPU: log_info("Requesting CPU device "); break; case CL_DEVICE_TYPE_ACCELERATOR: From 8d4a8700597dccbd81e9292620269854196b8a81 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 15 Jul 2025 09:01:19 -0700 Subject: [PATCH 07/61] fix correctly rounded behavior for math bruteforce tests (#2397) fixes #2387 Corrects the "correctly rounded" behavior for the math bruteforce tests. Specifically: * Only applies the `-cl-fp32-correctly-rounded-divide-sqrt` build option for the `divide_cr` and `sqrt_cr` tests. The other tests do not receive this build option. This means that there is a difference in the behavior of the `divide` and `divide_cr` tests and the `sqrt` and `sqrt_cr` tests, and the "correctly rounded" build option is not applied to the fp16 or fp64 tests. * Removes the build option to toggle testing the correctly rounded divide and square root tests since it no longer needed. Instead, the test names can be used to choose whether to test the correctly rounded functions or the non-correctly rounded functions. Additionally: * Relaxes the fp16 sqrt accuracy requirements to 1 ULP. This is needed to pass this test on some of our devices. This part is still under discussion, so I will keep this PR as a draft until it is settled. --- .../math_brute_force/binary_operator_float.cpp | 6 ++++-- test_conformance/math_brute_force/common.cpp | 16 ++++++++-------- test_conformance/math_brute_force/common.h | 3 +++ .../math_brute_force/function_list.cpp | 4 ++-- test_conformance/math_brute_force/main.cpp | 12 ------------ .../math_brute_force/unary_float.cpp | 6 ++++-- 6 files changed, 21 insertions(+), 26 deletions(-) diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index cce6e122..17eb998f 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -754,10 +754,12 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, test_info.tinfo[i].d = MTdataHolder(genrand_int32(d)); } + bool correctlyRounded = strcmp(f->name, "divide_cr") == 0; + // Init the kernels BuildKernelInfo build_info{ test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, - relaxedMode }; + test_info.programs, f->nameInCode, + relaxedMode, correctlyRounded }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/common.cpp b/test_conformance/math_brute_force/common.cpp index df45a700..257e2595 100644 --- a/test_conformance/math_brute_force/common.cpp +++ b/test_conformance/math_brute_force/common.cpp @@ -102,7 +102,7 @@ void EmitEnableExtension(std::ostringstream &kernel, if (needsFp16) kernel << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; } -std::string GetBuildOptions(bool relaxed_mode) +std::string GetBuildOptions(const BuildKernelInfo &info) { std::ostringstream options; @@ -111,16 +111,16 @@ std::string GetBuildOptions(bool relaxed_mode) options << " -cl-denorms-are-zero"; } - if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) - { - options << " -cl-fp32-correctly-rounded-divide-sqrt"; - } - - if (relaxed_mode) + if (info.relaxedMode) { options << " -cl-fast-relaxed-math"; } + if (info.correctlyRounded) + { + options << " -cl-fp32-correctly-rounded-divide-sqrt"; + } + return options.str(); } @@ -581,7 +581,7 @@ cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id, // Create the program. clProgramWrapper &program = info.programs[vector_size_index]; - auto options = GetBuildOptions(info.relaxedMode); + auto options = GetBuildOptions(info); int error = create_single_kernel_helper(gContext, &program, nullptr, sources.size(), sources.data(), nullptr, options.c_str()); diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h index 3f89ef6c..d7e70a71 100644 --- a/test_conformance/math_brute_force/common.h +++ b/test_conformance/math_brute_force/common.h @@ -84,6 +84,9 @@ struct BuildKernelInfo // Whether to build with -cl-fast-relaxed-math. bool relaxedMode; + + // Whether to build with -cl-fp32-correctly-rounded-divide-sqrt. + bool correctlyRounded; }; // Data common to all math tests. diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp index 90731ea0..408a394a 100644 --- a/test_conformance/math_brute_force/function_list.cpp +++ b/test_conformance/math_brute_force/function_list.cpp @@ -375,8 +375,8 @@ const Func functionList[] = { { NULL }, 3.0f, 0.0f, - 0.0f, - 1.0f, + 1.5f, + 1.5f, 4.0f, INFINITY, INFINITY, diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index 6b72f326..008ab307 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -82,7 +82,6 @@ static int gTestFastRelaxed = 1; OpenCL 2.0 spec then it has to be changed through a command line argument. */ int gFastRelaxedDerived = 1; -static int gToggleCorrectlyRoundedDivideSqrt = 0; int gHasHalf = 0; cl_device_fp_config gHalfCapabilities = 0; int gDeviceILogb0 = 1; @@ -469,8 +468,6 @@ static int ParseArgs(int argc, const char **argv) optionFound = 1; switch (*arg) { - case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break; - case 'd': gHasDouble ^= 1; break; case 'e': gFastRelaxedDerived ^= 1; break; @@ -629,8 +626,6 @@ static void PrintUsage(void) { vlog("%s [-cglsz]: \n", appName); vlog("\toptions:\n"); - vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: " - "off)\n"); vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 " "on)\n"); vlog("\t\t-f\tToggle float precision testing. (Default: on)\n"); @@ -942,13 +937,6 @@ test_status InitCL(cl_device_id device) vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n", no_yes[0 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]); - if (gToggleCorrectlyRoundedDivideSqrt) - { - gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT; - } - vlog("\tTesting with correctly rounded float divide and sqrt? %s\n", - no_yes[0 - != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]); vlog("\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]); vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]); diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp index ee8a61b8..2761ab97 100644 --- a/test_conformance/math_brute_force/unary_float.cpp +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -563,10 +563,12 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) INFINITY; // out of range resut from finite inputs must be numeric } + bool correctlyRounded = strcmp(f->name, "sqrt_cr") == 0; + // Init the kernels BuildKernelInfo build_info{ test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, - relaxedMode }; + test_info.programs, f->nameInCode, + relaxedMode, correctlyRounded }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) From 2fdefbdf34b3d1681b064b4176e192e0b734c269 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Wed, 16 Jul 2025 00:33:37 +0100 Subject: [PATCH 08/61] test_vulkan: fix build flags and warnings (#2443) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Do not override parent-provided value for CMAKE_CXX_FLAGS - Stop building with -fpermissive (not required/bad practice) - Delete unused variables - Remove unnecessary casts Signed-off-by: Kévin Petit --- test_conformance/vulkan/CMakeLists.txt | 1 - .../vulkan/test_vulkan_interop_buffer.cpp | 64 ++++++------------- .../vulkan/test_vulkan_interop_image.cpp | 38 ++++------- 3 files changed, 34 insertions(+), 69 deletions(-) diff --git a/test_conformance/vulkan/CMakeLists.txt b/test_conformance/vulkan/CMakeLists.txt index e658b31c..c057e84c 100644 --- a/test_conformance/vulkan/CMakeLists.txt +++ b/test_conformance/vulkan/CMakeLists.txt @@ -2,7 +2,6 @@ set (MODULE_NAME VULKAN) list(APPEND CLConform_LIBRARIES vulkan_wrapper) set(CMAKE_COMPILE_WARNING_AS_ERROR OFF) -set(CMAKE_CXX_FLAGS "-fpermissive") if(WIN32) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVK_USE_PLATFORM_WIN32_KHR") endif(WIN32) diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp index 5f7c7f48..b90514c3 100644 --- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp @@ -34,7 +34,6 @@ namespace { -cl_uchar uuid[CL_UUID_SIZE_KHR]; cl_device_id deviceId = nullptr; struct Params @@ -86,10 +85,10 @@ const char *kernel_text_verify = " \ int run_test_with_two_queue( - cl_context &context, cl_command_queue &cmd_queue1, - cl_command_queue &cmd_queue2, cl_kernel *kernel, cl_kernel &verify_kernel, - VulkanDevice &vkDevice, uint32_t numBuffers, uint32_t bufferSize, - bool use_fence, + cl_context context, cl_command_queue cmd_queue1, + cl_command_queue cmd_queue2, clKernelWrapper *kernel, + cl_kernel verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, + uint32_t bufferSize, bool use_fence, VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType) { int err = CL_SUCCESS; @@ -211,7 +210,6 @@ int run_test_with_two_queue( vkDescriptorSet.update(0, vkParamsBuffer); for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++) { - size_t buffer_size = vkBufferList[bIdx].getSize(); vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx], 0); buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer(); @@ -430,8 +428,8 @@ CLEANUP: } int run_test_with_one_queue( - cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel, - cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, + cl_context context, cl_command_queue cmd_queue1, clKernelWrapper *kernel, + cl_kernel verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, uint32_t bufferSize, VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType, bool use_fence) @@ -545,7 +543,6 @@ int run_test_with_one_queue( vkDescriptorSet.update(0, vkParamsBuffer); for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++) { - size_t buffer_size = vkBufferList[bIdx].getSize(); vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx], 0); buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer(); @@ -735,8 +732,8 @@ CLEANUP: } int run_test_with_multi_import_same_ctx( - cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel, - cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, + cl_context context, cl_command_queue cmd_queue1, clKernelWrapper *kernel, + cl_kernel verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, uint32_t bufferSize, bool use_fence, VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType) { @@ -858,7 +855,6 @@ int run_test_with_multi_import_same_ctx( vkDescriptorSet.update(0, vkParamsBuffer); for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++) { - size_t buffer_size = vkBufferList[bIdx].getSize(); vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx], 0); for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) @@ -1068,10 +1064,11 @@ CLEANUP: } int run_test_with_multi_import_diff_ctx( - cl_context &context, cl_context &context2, cl_command_queue &cmd_queue1, - cl_command_queue &cmd_queue2, cl_kernel *kernel1, cl_kernel *kernel2, - cl_kernel &verify_kernel, cl_kernel verify_kernel2, VulkanDevice &vkDevice, - uint32_t numBuffers, uint32_t bufferSize, bool use_fence, + cl_context context, cl_context context2, cl_command_queue cmd_queue1, + cl_command_queue cmd_queue2, clKernelWrapper *kernel1, + clKernelWrapper *kernel2, cl_kernel verify_kernel, cl_kernel verify_kernel2, + VulkanDevice &vkDevice, uint32_t numBuffers, uint32_t bufferSize, + bool use_fence, VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType) { size_t global_work_size[1]; @@ -1087,7 +1084,6 @@ int run_test_with_multi_import_diff_ctx( clExternalExportableSemaphore *clCl2VkExternalSemaphore2 = nullptr; int err = CL_SUCCESS; int calc_max_iter; - bool withOffset; uint32_t pBufferSize; const std::vector @@ -1180,7 +1176,6 @@ int run_test_with_multi_import_diff_ctx( pBufferSize = bufferSize; VulkanBufferList vkBufferList(numBuffers, vkDevice, pBufferSize, vkExternalMemoryHandleType); - uint32_t interBufferOffset = (uint32_t)(vkBufferList[0].getSize()); for (size_t bIdx = 0; bIdx < numBuffers; bIdx++) { @@ -1212,7 +1207,6 @@ int run_test_with_multi_import_diff_ctx( vkDescriptorSet.update(0, vkParamsBuffer); for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++) { - size_t buffer_size = vkBufferList[bIdx].getSize(); vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx], 0); for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) @@ -1597,14 +1591,7 @@ struct BufferTestBase : public VulkanTestBase int test_buffer_common(bool use_fence) { - int current_device = 0; - int device_count = 0; - int devices_prohibited = 0; cl_int errNum = CL_SUCCESS; - size_t extensionSize = 0; - const size_t bufsize = BUFFERSIZE; - char buf[BUFFERSIZE]; - char *extensions = NULL; clKernelWrapper verify_kernel; clKernelWrapper verify_kernel2; clKernelWrapper kernel[3] = { NULL, NULL, NULL }; @@ -1624,7 +1611,6 @@ struct BufferTestBase : public VulkanTestBase uint32_t numBuffersList[] = { 1, 2, 4 }; uint32_t bufferSizeList[] = { 4 * 1024, 64 * 1024, 2 * 1024 * 1024 }; - uint32_t bufferSizeListforOffset[] = { 256, 512, 1024 }; std::vector supportedSemaphoreTypes; @@ -1740,36 +1726,28 @@ struct BufferTestBase : public VulkanTestBase if (multiImport && !multiCtx) { errNum = run_test_with_multi_import_same_ctx( - context, (cl_command_queue &)cmd_queue1, - (cl_kernel *)&kernel, (cl_kernel &)verify_kernel, + context, cmd_queue1, kernel, verify_kernel, *vkDevice, numBuffers, bufferSize, use_fence, semaphoreType); } else if (multiImport && multiCtx) { errNum = run_test_with_multi_import_diff_ctx( - context, (cl_context &)context2, - (cl_command_queue &)cmd_queue1, - (cl_command_queue &)cmd_queue3, - (cl_kernel *)&kernel, (cl_kernel *)&kernel2, - (cl_kernel &)verify_kernel, verify_kernel2, - *vkDevice, numBuffers, bufferSize, use_fence, - semaphoreType); + context, context2, cmd_queue1, cmd_queue3, kernel, + kernel2, verify_kernel, verify_kernel2, *vkDevice, + numBuffers, bufferSize, use_fence, semaphoreType); } else if (numCQ == 2) { errNum = run_test_with_two_queue( - context, (cl_command_queue &)cmd_queue1, - (cl_command_queue &)cmd_queue2, - (cl_kernel *)&kernel, (cl_kernel &)verify_kernel, - *vkDevice, numBuffers + 1, bufferSize, use_fence, - semaphoreType); + context, cmd_queue1, cmd_queue2, kernel, + verify_kernel, *vkDevice, numBuffers + 1, + bufferSize, use_fence, semaphoreType); } else { errNum = run_test_with_one_queue( - context, (cl_command_queue &)cmd_queue1, - (cl_kernel *)&kernel, (cl_kernel &)verify_kernel, + context, cmd_queue1, kernel, verify_kernel, *vkDevice, numBuffers, bufferSize, semaphoreType, use_fence); } diff --git a/test_conformance/vulkan/test_vulkan_interop_image.cpp b/test_conformance/vulkan/test_vulkan_interop_image.cpp index a73dde3f..6969514f 100644 --- a/test_conformance/vulkan/test_vulkan_interop_image.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_image.cpp @@ -55,7 +55,6 @@ struct Params uint32_t numImage2DDescriptors; }; -cl_uchar uuid[CL_UUID_SIZE_KHR]; cl_device_id deviceId = NULL; size_t max_width = MAX_2D_IMAGE_WIDTH; size_t max_height = MAX_2D_IMAGE_HEIGHT; @@ -195,9 +194,10 @@ const cl_kernel getKernelType(VulkanFormat format, cl_kernel kernel_float, } int run_test_with_two_queue( - cl_context &context, cl_command_queue &cmd_queue1, - cl_command_queue &cmd_queue2, cl_kernel *kernel_unsigned, - cl_kernel *kernel_signed, cl_kernel *kernel_float, VulkanDevice &vkDevice, + cl_context context, cl_command_queue cmd_queue1, + cl_command_queue cmd_queue2, clKernelWrapper *kernel_unsigned, + clKernelWrapper *kernel_signed, clKernelWrapper *kernel_float, + VulkanDevice &vkDevice, VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType) { cl_int err = CL_SUCCESS; @@ -408,7 +408,6 @@ int run_test_with_two_queue( } size_t totalImageMemSize = 0; - uint64_t interImageOffset = 0; { VulkanImage2D vkImage2D( vkDevice, vkFormat, width, height, @@ -732,9 +731,6 @@ int run_test_with_two_queue( "Failed to signal CL semaphore\n"); } - unsigned int flags = 0; - size_t mipmapLevelOffset = 0; - cl_event eventReadImage = NULL; clFinish(cmd_queue2); for (int i = 0; i < num2DImages; i++) { @@ -817,9 +813,9 @@ CLEANUP: } int run_test_with_one_queue( - cl_context &context, cl_command_queue &cmd_queue1, - cl_kernel *kernel_unsigned, cl_kernel *kernel_signed, - cl_kernel *kernel_float, VulkanDevice &vkDevice, + cl_context context, cl_command_queue cmd_queue1, + clKernelWrapper *kernel_unsigned, clKernelWrapper *kernel_signed, + clKernelWrapper *kernel_float, VulkanDevice &vkDevice, VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType) { cl_int err = CL_SUCCESS; @@ -1027,7 +1023,6 @@ int run_test_with_one_queue( } } size_t totalImageMemSize = 0; - uint64_t interImageOffset = 0; { VulkanImage2D vkImage2D( vkDevice, vkFormat, width, height, @@ -1289,9 +1284,6 @@ int run_test_with_one_queue( "Failed to signal CL semaphore\n"); } - unsigned int flags = 0; - size_t mipmapLevelOffset = 0; - cl_event eventReadImage = NULL; for (int i = 0; i < num2DImages; i++) { err = clEnqueueReadImage( @@ -1508,20 +1500,16 @@ struct ImageCommonTest : public VulkanTestBase { if (numCQ == 2) { - err = run_test_with_two_queue( - context, (cl_command_queue &)cmd_queue1, - (cl_command_queue &)cmd_queue2, - (cl_kernel *)kernel_unsigned, (cl_kernel *)kernel_signed, - (cl_kernel *)kernel_float, *vkDevice, - externalSemaphoreType); + err = run_test_with_two_queue(context, cmd_queue1, cmd_queue2, + kernel_unsigned, kernel_signed, + kernel_float, *vkDevice, + externalSemaphoreType); } else { err = run_test_with_one_queue( - context, (cl_command_queue &)cmd_queue1, - (cl_kernel *)kernel_unsigned, (cl_kernel *)kernel_signed, - (cl_kernel *)kernel_float, *vkDevice, - externalSemaphoreType); + context, cmd_queue1, kernel_unsigned, kernel_signed, + kernel_float, *vkDevice, externalSemaphoreType); } test_error(err, "func_name failed \n"); } From d918b4165763a55d831a37efc8aea35eb5a09fdc Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Wed, 16 Jul 2025 01:36:49 +0200 Subject: [PATCH 09/61] clean unused variables (#2446) Do not remove calls to `sample_image_pixel_float_offset` as it is using the `verbose` mode. --- .../images/kernel_read_write/test_common.cpp | 103 ++++++++---------- .../kernel_read_write/test_iterations.cpp | 2 +- 2 files changed, 49 insertions(+), 56 deletions(-) diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp index 245de3ac..26baed75 100644 --- a/test_conformance/images/kernel_read_write/test_common.cpp +++ b/test_conformance/images/kernel_read_write/test_common.cpp @@ -878,18 +878,16 @@ int test_read_image(cl_context context, cl_command_queue queue, numTries, numClamped, true, lod); log_error("Step by step:\n"); - FloatPixel temp = - sample_image_pixel_float_offset( - imagePtr, imageInfo, - xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], - norm_offset_x, - norm_offset_y, - norm_offset_z, - imageSampler, tempOut, - 1 /*verbose*/, - &hasDenormals, lod); + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + norm_offset_x, + norm_offset_y, + norm_offset_z, imageSampler, + tempOut, 1 /*verbose*/, + &hasDenormals, lod); log_error( "\tulps: %2.2f (max " "allowed: %2.2f)\n\n", @@ -931,9 +929,6 @@ int test_read_image(cl_context context, cl_command_queue queue, // Validate float results float *resultPtr = (float *)(char *)resultValues; float expected[4], error = 0.0f; - float maxErr = get_max_relative_error( - imageInfo->format, imageSampler, image_type_3D, - CL_FILTER_LINEAR == imageSampler->filter_mode); for (size_t z = 0, j = 0; z < depth_lod; z++) { @@ -1242,26 +1237,25 @@ int test_read_image(cl_context context, cl_command_queue queue, j, numTries, numClamped, true, lod); log_error("Step by step:\n"); - FloatPixel temp = - sample_image_pixel_float_offset( - imagePtr, imageInfo, - xOffsetValues[j], - (num_dimensions > 1) - ? yOffsetValues[j] - : 0.0f, - image_type_3D - ? zOffsetValues[j] - : 0.0f, - norm_offset_x, - (num_dimensions > 1) - ? norm_offset_y - : 0.0f, - image_type_3D - ? norm_offset_z - : 0.0f, - imageSampler, tempOut, - 1 /*verbose*/, - &hasDenormals, lod); + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D + ? zOffsetValues[j] + : 0.0f, + norm_offset_x, + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D + ? norm_offset_z + : 0.0f, + imageSampler, tempOut, + 1 /*verbose*/, + &hasDenormals, lod); log_error( "\tulps: %2.2f, %2.2f, " "%2.2f, %2.2f (max " @@ -1632,26 +1626,25 @@ int test_read_image(cl_context context, cl_command_queue queue, j, numTries, numClamped, true, lod); log_error("Step by step:\n"); - FloatPixel temp = - sample_image_pixel_float_offset( - imagePtr, imageInfo, - xOffsetValues[j], - (num_dimensions > 1) - ? yOffsetValues[j] - : 0.0f, - image_type_3D - ? zOffsetValues[j] - : 0.0f, - norm_offset_x, - (num_dimensions > 1) - ? norm_offset_y - : 0.0f, - image_type_3D - ? norm_offset_z - : 0.0f, - imageSampler, tempOut, - 1 /*verbose*/, - &hasDenormals, lod); + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D + ? zOffsetValues[j] + : 0.0f, + norm_offset_x, + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D + ? norm_offset_z + : 0.0f, + imageSampler, tempOut, + 1 /*verbose*/, + &hasDenormals, lod); log_error( "\tulps: %2.2f, %2.2f, " "%2.2f, %2.2f (max " diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp index 9c4e332a..5c7c7b29 100644 --- a/test_conformance/images/kernel_read_write/test_iterations.cpp +++ b/test_conformance/images/kernel_read_write/test_iterations.cpp @@ -945,7 +945,7 @@ int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double // Validate float results float *resultPtr = (float *)(char *)resultValues; float expected[4], error=0.0f; - float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode ); + for( size_t y = 0, j = 0; y < height_lod; y++ ) { for( size_t x = 0; x < width_lod; x++, j++ ) From 82508f709a518062917e27258c6086cfb0dfedae Mon Sep 17 00:00:00 2001 From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com> Date: Thu, 24 Jul 2025 18:43:03 +0100 Subject: [PATCH 10/61] Fix test_spir not checking for the required extension (#2454) `clGetDeviceInfo` should fail with `CL_INVALID_VALUE` when queried for `CL_DEVICE_SPIR_VERSIONS` on devices that do not claim to support the extension that provides it, `cl_khr_spir`. Following this change, the test is skipped instead of failing on devices that do not support `cl_khr_spir`. Signed-off-by: Ahmed Hesham --- test_conformance/spir/main.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test_conformance/spir/main.cpp b/test_conformance/spir/main.cpp index 77754a2f..95e8c00f 100644 --- a/test_conformance/spir/main.cpp +++ b/test_conformance/spir/main.cpp @@ -6911,12 +6911,13 @@ int main (int argc, const char* argv[]) cl_device_id device = get_platform_device(device_type, choosen_device_index, choosen_platform_index); printDeviceHeader(device); + REQUIRE_EXTENSION("cl_khr_spir"); + std::vector versions; get_spir_version(device, versions); - if (!is_extension_available(device, "cl_khr_spir") - || (std::find(versions.begin(), versions.end(), Version{ 1, 2 }) - == versions.end())) + if (std::find(versions.begin(), versions.end(), Version{ 1, 2 }) + == versions.end()) { log_info("Spir extension version 1.2 is not supported by the device\n"); return 0; From 16dfa2217c8aa6e99e8da80082226112b3a115ed Mon Sep 17 00:00:00 2001 From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com> Date: Wed, 30 Jul 2025 18:49:38 +0100 Subject: [PATCH 11/61] Enable -Wnarrowing in non_uniform_work_group test suite. (#2464) Modify the prime numbers generator function to return `size_t` instead of `int` and use `0` as a sentinal value instead of hardcoded negative ones. `0` is not a prime number, so it is suitable to use to indicate an error. Fixes #1159 --------- Signed-off-by: Ahmed Hesham --- .../non_uniform_work_group/CMakeLists.txt | 6 - .../test_advanced_2d.cpp | 363 ++++++++------ .../test_advanced_3d.cpp | 441 ++++++++++-------- .../test_advanced_other.cpp | 303 +++++++----- .../non_uniform_work_group/test_basic.cpp | 300 +++++++----- .../non_uniform_work_group/tools.cpp | 33 +- .../non_uniform_work_group/tools.h | 7 +- 7 files changed, 843 insertions(+), 610 deletions(-) diff --git a/test_conformance/non_uniform_work_group/CMakeLists.txt b/test_conformance/non_uniform_work_group/CMakeLists.txt index 7db5bcbb..6029dbbd 100644 --- a/test_conformance/non_uniform_work_group/CMakeLists.txt +++ b/test_conformance/non_uniform_work_group/CMakeLists.txt @@ -1,9 +1,5 @@ set(MODULE_NAME NON_UNIFORM_WORK_GROUP) -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") - add_cxx_flag_if_supported(-Wno-narrowing) -endif() - set(${MODULE_NAME}_SOURCES main.cpp test_advanced_2d.cpp @@ -15,5 +11,3 @@ set(${MODULE_NAME}_SOURCES ) include(../CMakeCommon.txt) - -# end of file # diff --git a/test_conformance/non_uniform_work_group/test_advanced_2d.cpp b/test_conformance/non_uniform_work_group/test_advanced_2d.cpp index 9c0ed964..f764bae8 100644 --- a/test_conformance/non_uniform_work_group/test_advanced_2d.cpp +++ b/test_conformance/non_uniform_work_group/test_advanced_2d.cpp @@ -39,11 +39,13 @@ REGISTER_TEST(non_uniform_2d_basic) // non_uniform_2d_prime_number_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber, maxWgSize}; size_t localSize[] = {maxWgSize/2, 2}; @@ -52,25 +54,31 @@ REGISTER_TEST(non_uniform_2d_basic) // non_uniform_2d_two_prime_numbers_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - int primeNumber2 = 1759; - size_t globalSize[] = {primeNumber2, primeNumber}; - size_t localSize[] = {16, maxWgSize/16}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 1759; + size_t globalSize[] = { primeNumber2, primeNumber }; + size_t localSize[] = { 16, maxWgSize / 16 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_2d_prime_number_basic_2 { - int primeNumber = 1327; - size_t globalSize[] = {primeNumber, primeNumber}; - size_t localSize[] = {maxWgSize/32, 32}; + size_t primeNumber = 1327; + size_t globalSize[] = { primeNumber, primeNumber }; + size_t localSize[] = { maxWgSize / 32, 32 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_2d_combination_of_max_wg_size_basic @@ -83,56 +91,69 @@ REGISTER_TEST(non_uniform_2d_basic) // non_uniform_2d_two_prime_numbers_and_ls_null_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 1669; - size_t globalSize[] = {primeNumber, primeNumber2}; - size_t *localSize = NULL; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 1669; + size_t globalSize[] = { primeNumber, primeNumber2 }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_2d_prime_number_and_ls_null_basic { - unsigned int primeNumber = 1249; - size_t globalSize[] = {primeNumber, primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 1249; + size_t globalSize[] = { primeNumber, primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_2d_four_prime_numbers_basic { - unsigned int primeNumber = 1951; - unsigned int primeNumber2 = 911; - unsigned int primeNumber3 = 13; - unsigned int primeNumber4 = 17; + size_t primeNumber = 1951; + size_t primeNumber2 = 911; + size_t primeNumber3 = 13; + size_t primeNumber4 = 17; - PrimeNumbers::Result2d fit2dResult; - fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); + PrimeNumbers::Result2d fit2dResult; + fit2dResult = + PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2}; - size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2}; + size_t globalSize[] = { primeNumber, primeNumber2 }; + size_t localSize[] = { fit2dResult.Val1, fit2dResult.Val2 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_2d_three_prime_numbers_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 42967; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3}; - size_t localSize[] = {primeNumber, 1}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 42967; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3 }; + size_t localSize[] = { primeNumber, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } return exec.status(); @@ -160,11 +181,13 @@ REGISTER_TEST(non_uniform_2d_atomics) // non_uniform_2d_prime_number_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber, maxWgSize}; size_t localSize[] = {maxWgSize/2, 2}; @@ -173,25 +196,31 @@ REGISTER_TEST(non_uniform_2d_atomics) // non_uniform_2d_two_prime_numbers_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - int primeNumber2 = 1759; - size_t globalSize[] = {primeNumber2, primeNumber}; - size_t localSize[] = {16, maxWgSize/16}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 1759; + size_t globalSize[] = { primeNumber2, primeNumber }; + size_t localSize[] = { 16, maxWgSize / 16 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_2d_prime_number_atomics_2 { - int primeNumber = 1327; - size_t globalSize[] = {primeNumber, primeNumber}; - size_t localSize[] = {maxWgSize/32, 32}; + size_t primeNumber = 1327; + size_t globalSize[] = { primeNumber, primeNumber }; + size_t localSize[] = { maxWgSize / 32, 32 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_2d_combination_of_max_wg_size_atomics @@ -204,56 +233,69 @@ REGISTER_TEST(non_uniform_2d_atomics) // non_uniform_2d_two_prime_numbers_and_ls_null_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 1669; - size_t globalSize[] = {primeNumber, primeNumber2}; - size_t *localSize = NULL; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 1669; + size_t globalSize[] = { primeNumber, primeNumber2 }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_2d_prime_number_and_ls_null_atomics { - unsigned int primeNumber = 1249; - size_t globalSize[] = {primeNumber, primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 1249; + size_t globalSize[] = { primeNumber, primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_2d_four_prime_numbers_atomics { - unsigned int primeNumber = 1951; - unsigned int primeNumber2 = 911; - unsigned int primeNumber3 = 13; - unsigned int primeNumber4 = 17; + size_t primeNumber = 1951; + size_t primeNumber2 = 911; + size_t primeNumber3 = 13; + size_t primeNumber4 = 17; - PrimeNumbers::Result2d fit2dResult; - fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); + PrimeNumbers::Result2d fit2dResult; + fit2dResult = + PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2}; - size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2}; + size_t globalSize[] = { primeNumber, primeNumber2 }; + size_t localSize[] = { fit2dResult.Val1, fit2dResult.Val2 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_2d_three_prime_numbers_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 42967; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3}; - size_t localSize[] = {primeNumber, 1}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 42967; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3 }; + size_t localSize[] = { primeNumber, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } return exec.status(); @@ -281,11 +323,13 @@ REGISTER_TEST(non_uniform_2d_barriers) // non_uniform_2d_prime_number_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber, maxWgSize}; size_t localSize[] = {maxWgSize/2, 2}; @@ -294,25 +338,31 @@ REGISTER_TEST(non_uniform_2d_barriers) // non_uniform_2d_two_prime_numbers_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - int primeNumber2 = 1759; - size_t globalSize[] = {primeNumber2, primeNumber}; - size_t localSize[] = {16, maxWgSize/16}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 1759; + size_t globalSize[] = { primeNumber2, primeNumber }; + size_t localSize[] = { 16, maxWgSize / 16 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_2d_prime_number_barriers_2 { - int primeNumber = 1327; - size_t globalSize[] = {primeNumber, primeNumber}; - size_t localSize[] = {maxWgSize/32, 32}; + size_t primeNumber = 1327; + size_t globalSize[] = { primeNumber, primeNumber }; + size_t localSize[] = { maxWgSize / 32, 32 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_2d_combination_of_max_wg_size_barriers @@ -325,54 +375,67 @@ REGISTER_TEST(non_uniform_2d_barriers) // non_uniform_2d_two_prime_numbers_and_ls_null_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 1669; - size_t globalSize[] = {primeNumber, primeNumber2}; - size_t *localSize = NULL; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 1669; + size_t globalSize[] = { primeNumber, primeNumber2 }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_2d_prime_number_and_ls_null_barriers { - unsigned int primeNumber = 1249; - size_t globalSize[] = {primeNumber, primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 1249; + size_t globalSize[] = { primeNumber, primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_2d_four_prime_numbers_barriers { - unsigned int primeNumber = 1951; - unsigned int primeNumber2 = 911; - unsigned int primeNumber3 = 13; - unsigned int primeNumber4 = 17; - PrimeNumbers::Result2d fit2dResult; - fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2}; - size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2}; + size_t primeNumber = 1951; + size_t primeNumber2 = 911; + size_t primeNumber3 = 13; + size_t primeNumber4 = 17; + PrimeNumbers::Result2d fit2dResult; + fit2dResult = + PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); + size_t globalSize[] = { primeNumber, primeNumber2 }; + size_t localSize[] = { fit2dResult.Val1, fit2dResult.Val2 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_2d_three_prime_numbers_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 42967; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3}; - size_t localSize[] = {primeNumber, 1}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 42967; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3 }; + size_t localSize[] = { primeNumber, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } return exec.status(); diff --git a/test_conformance/non_uniform_work_group/test_advanced_3d.cpp b/test_conformance/non_uniform_work_group/test_advanced_3d.cpp index a159c8d6..89a30b3f 100644 --- a/test_conformance/non_uniform_work_group/test_advanced_3d.cpp +++ b/test_conformance/non_uniform_work_group/test_advanced_3d.cpp @@ -39,11 +39,13 @@ REGISTER_TEST(non_uniform_3d_basic) // non_uniform_3d_prime_number_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {maxWgSize/25, primeNumber, maxWgSize/25}; size_t localSize[] = {2, std::max(maxWgSize/4,1), 2}; @@ -52,95 +54,118 @@ REGISTER_TEST(non_uniform_3d_basic) // non_uniform_3d_two_prime_numbers_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - int primeNumber2 = 13; - size_t globalSize[] = {primeNumber2, maxWgSize/8, primeNumber}; - size_t localSize[] = {8, 4, std::max(maxWgSize/32,1)}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 13; + size_t globalSize[] = { primeNumber2, maxWgSize / 8, primeNumber }; + size_t localSize[] = { 8, 4, std::max(maxWgSize / 32, 1) }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_3d_prime_number_basic_2 { - int primeNumber = 113; - size_t globalSize[] = {primeNumber, primeNumber, primeNumber}; - size_t localSize[] = {8, std::max(maxWgSize/32,1), 4}; + size_t primeNumber = 113; + size_t globalSize[] = { primeNumber, primeNumber, primeNumber }; + size_t localSize[] = { 8, std::max(maxWgSize / 32, 1), 4 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_3d_two_prime_numbers_and_ls_null_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 23; - size_t globalSize[] = {primeNumber, primeNumber2, maxWgSize/16}; - size_t *localSize = NULL; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 23; + size_t globalSize[] = { primeNumber, primeNumber2, maxWgSize / 16 }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_3d_prime_number_and_ls_null_basic { - unsigned int primeNumber = 113; - size_t globalSize[] = {primeNumber, primeNumber, primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 113; + size_t globalSize[] = { primeNumber, primeNumber, primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_3d_three_prime_numbers_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 10711; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3, primeNumber3}; - size_t localSize[] = {primeNumber, 1, 1}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 10711; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3, primeNumber3 }; + size_t localSize[] = { primeNumber, 1, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_3d_four_prime_numbers_basic { - unsigned int primeNumber = 541; - unsigned int primeNumber2 = 251; - unsigned int primeNumber3 = 13; - unsigned int primeNumber4 = 17; - PrimeNumbers::Result2d fit2dResult; - fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); + size_t primeNumber = 541; + size_t primeNumber2 = 251; + size_t primeNumber3 = 13; + size_t primeNumber4 = 17; + PrimeNumbers::Result2d fit2dResult; + fit2dResult = + PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2, 1}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit2dResult.Val1, fit2dResult.Val2, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_3d_six_prime_numbers_basic { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4,primeNumber5,primeNumber6,maxWgSize ); + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; + PrimeNumbers::Result3d fit3dResult; + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } return exec.status(); @@ -168,11 +193,13 @@ REGISTER_TEST(non_uniform_3d_atomics) // non_uniform_3d_prime_number_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {maxWgSize/25, primeNumber, maxWgSize/25}; size_t localSize[] = {2, std::max(maxWgSize/4,1), 2}; @@ -181,95 +208,118 @@ REGISTER_TEST(non_uniform_3d_atomics) // non_uniform_3d_two_prime_numbers_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - int primeNumber2 = 13; - size_t globalSize[] = {primeNumber2, maxWgSize/8, primeNumber}; - size_t localSize[] = {8, 4, std::max(maxWgSize/32,1)}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 13; + size_t globalSize[] = { primeNumber2, maxWgSize / 8, primeNumber }; + size_t localSize[] = { 8, 4, std::max(maxWgSize / 32, 1) }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_3d_prime_number_atomics_2 { - int primeNumber = 113; - size_t globalSize[] = {primeNumber, primeNumber, primeNumber}; - size_t localSize[] = {8, std::max(maxWgSize/32,1), 4}; + size_t primeNumber = 113; + size_t globalSize[] = { primeNumber, primeNumber, primeNumber }; + size_t localSize[] = { 8, std::max(maxWgSize / 32, 1), 4 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_3d_two_prime_numbers_and_ls_null_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 23; - size_t globalSize[] = {primeNumber, primeNumber2, maxWgSize/16}; - size_t *localSize = NULL; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 23; + size_t globalSize[] = { primeNumber, primeNumber2, maxWgSize / 16 }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_3d_prime_number_and_ls_null_atomics { - unsigned int primeNumber = 113; - size_t globalSize[] = {primeNumber, primeNumber, primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 113; + size_t globalSize[] = { primeNumber, primeNumber, primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_3d_three_prime_numbers_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 10711; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3, primeNumber3}; - size_t localSize[] = {primeNumber, 1, 1}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 10711; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3, primeNumber3 }; + size_t localSize[] = { primeNumber, 1, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_3d_four_prime_numbers_atomics { - unsigned int primeNumber = 541; - unsigned int primeNumber2 = 251; - unsigned int primeNumber3 = 13; - unsigned int primeNumber4 = 17; - PrimeNumbers::Result2d fit2dResult; - fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); + size_t primeNumber = 541; + size_t primeNumber2 = 251; + size_t primeNumber3 = 13; + size_t primeNumber4 = 17; + PrimeNumbers::Result2d fit2dResult; + fit2dResult = + PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2, 1}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit2dResult.Val1, fit2dResult.Val2, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_3d_six_prime_numbers_atomics { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize); + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; + PrimeNumbers::Result3d fit3dResult; + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } return exec.status(); @@ -297,11 +347,13 @@ REGISTER_TEST(non_uniform_3d_barriers) // non_uniform_3d_prime_number_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {maxWgSize/25, primeNumber, maxWgSize/25}; size_t localSize[] = {2, std::max(maxWgSize/4,1), 2}; @@ -310,96 +362,119 @@ REGISTER_TEST(non_uniform_3d_barriers) // non_uniform_3d_two_prime_numbers_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - int primeNumber2 = 13; - size_t globalSize[] = {primeNumber2, maxWgSize/8, primeNumber}; - size_t localSize[] = {8, 4, std::max(maxWgSize/32,1)}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 13; + size_t globalSize[] = { primeNumber2, maxWgSize / 8, primeNumber }; + size_t localSize[] = { 8, 4, std::max(maxWgSize / 32, 1) }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_3d_prime_number_barriers_2 { - int primeNumber = 113; - size_t globalSize[] = {primeNumber, primeNumber, primeNumber}; - size_t localSize[] = {8, std::max(maxWgSize/32,1), 4}; + size_t primeNumber = 113; + size_t globalSize[] = { primeNumber, primeNumber, primeNumber }; + size_t localSize[] = { 8, std::max(maxWgSize / 32, 1), 4 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_3d_two_prime_numbers_and_ls_null_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 23; - size_t globalSize[] = {primeNumber, primeNumber2, maxWgSize/16}; - size_t *localSize = NULL; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 23; + size_t globalSize[] = { primeNumber, primeNumber2, maxWgSize / 16 }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_3d_prime_number_and_ls_null_barriers { - unsigned int primeNumber = 113; - size_t globalSize[] = {primeNumber, primeNumber, primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 113; + size_t globalSize[] = { primeNumber, primeNumber, primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_3d_three_prime_numbers_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 10711; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3, primeNumber3}; - size_t localSize[] = {primeNumber, 1, 1}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 10711; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3, primeNumber3 }; + size_t localSize[] = { primeNumber, 1, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_3d_four_prime_numbers_barriers { - unsigned int primeNumber = 541; - unsigned int primeNumber2 = 251; - unsigned int primeNumber3 = 13; - unsigned int primeNumber4 = 17; - PrimeNumbers::Result2d fit2dResult; - fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); + size_t primeNumber = 541; + size_t primeNumber2 = 251; + size_t primeNumber3 = 13; + size_t primeNumber4 = 17; + PrimeNumbers::Result2d fit2dResult; + fit2dResult = + PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2, 1}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit2dResult.Val1, fit2dResult.Val2, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_3d_six_prime_numbers_barriers { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4,primeNumber5,primeNumber6,maxWgSize ); + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; + PrimeNumbers::Result3d fit3dResult; + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } return exec.status(); diff --git a/test_conformance/non_uniform_work_group/test_advanced_other.cpp b/test_conformance/non_uniform_work_group/test_advanced_other.cpp index 63df8c0c..628cc951 100644 --- a/test_conformance/non_uniform_work_group/test_advanced_other.cpp +++ b/test_conformance/non_uniform_work_group/test_advanced_other.cpp @@ -31,73 +31,88 @@ REGISTER_TEST(non_uniform_other_basic) // non_uniform_1d_two_prime_numbers_offset_basic { - unsigned int primeNumber = 42967; - unsigned int primeNumber2 = 113; - PrimeNumbers::Result1d fit1dResult; + size_t primeNumber = 42967; + size_t primeNumber2 = 113; + PrimeNumbers::Result1d fit1dResult; - fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize ); + fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize); - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {fit1dResult.Val1}; - size_t offset[] = {23}; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { fit1dResult.Val1 }; + size_t offset[] = { 23 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BASIC); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::BASIC); } // non_uniform_2d_three_prime_numbers_offset_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 42967; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3}; - size_t localSize[] = {primeNumber, 1}; - size_t offset[] = {23, 17}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 42967; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3 }; + size_t localSize[] = { primeNumber, 1 }; + size_t offset[] = { 23, 17 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BASIC); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::BASIC); } // non_uniform_3d_six_prime_numbers_offset_basic { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; + PrimeNumbers::Result3d fit3dResult; - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize ); + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; - size_t offset[] = {11, 23, 17}; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; + size_t offset[] = { 11, 23, 17 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BASIC); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::BASIC); } // non_uniform_3d_six_prime_numbers_rwgs_basic { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; + PrimeNumbers::Result3d fit3dResult; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize ); + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; - size_t reqdWorkGroupSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; + size_t reqdWorkGroupSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, NULL, reqdWorkGroupSize, Range::BASIC); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + NULL, reqdWorkGroupSize, Range::BASIC); } return exec.status(); @@ -117,71 +132,86 @@ REGISTER_TEST(non_uniform_other_atomics) // non_uniform_1d_two_prime_numbers_offset_atomics { - unsigned int primeNumber = 42967; - unsigned int primeNumber2 = 113; - PrimeNumbers::Result1d fit1dResult; + size_t primeNumber = 42967; + size_t primeNumber2 = 113; + PrimeNumbers::Result1d fit1dResult; - fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize ); + fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize); - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {fit1dResult.Val1}; - size_t offset[] = {23}; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { fit1dResult.Val1 }; + size_t offset[] = { 23 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::ATOMICS); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::ATOMICS); } // non_uniform_2d_three_prime_numbers_offset_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 42967; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3}; - size_t localSize[] = {primeNumber, 1}; - size_t offset[] = {23, 17}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 42967; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3 }; + size_t localSize[] = { primeNumber, 1 }; + size_t offset[] = { 23, 17 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::ATOMICS); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::ATOMICS); } // non_uniform_3d_six_prime_numbers_offset_atomics { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; + PrimeNumbers::Result3d fit3dResult; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize ); + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; - size_t offset[] = {11, 23, 17}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; + size_t offset[] = { 11, 23, 17 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::ATOMICS); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::ATOMICS); } // non_uniform_3d_six_prime_numbers_rwgs_atomics { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; + PrimeNumbers::Result3d fit3dResult; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize ); + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; - size_t reqdWorkGroupSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; + size_t reqdWorkGroupSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, NULL, reqdWorkGroupSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + NULL, reqdWorkGroupSize, Range::ATOMICS); } return exec.status(); @@ -201,74 +231,89 @@ REGISTER_TEST(non_uniform_other_barriers) // non_uniform_1d_two_prime_numbers_offset_barriers { - unsigned int primeNumber = 42967; - unsigned int primeNumber2 = 113; - PrimeNumbers::Result1d fit1dResult; + size_t primeNumber = 42967; + size_t primeNumber2 = 113; + PrimeNumbers::Result1d fit1dResult; - fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize ); + fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize); - size_t globalSize[] = {primeNumber}; + size_t globalSize[] = { primeNumber }; - size_t localSize[] = {fit1dResult.Val1}; - size_t offset[] = {23}; + size_t localSize[] = { fit1dResult.Val1 }; + size_t offset[] = { 23 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BARRIERS); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::BARRIERS); } // non_uniform_2d_three_prime_numbers_offset_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 42967; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3}; - size_t localSize[] = {primeNumber, 1}; - size_t offset[] = {23, 17}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 42967; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3 }; + size_t localSize[] = { primeNumber, 1 }; + size_t offset[] = { 23, 17 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BARRIERS); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::BARRIERS); } // non_uniform_3d_six_prime_numbers_offset_barriers { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; + PrimeNumbers::Result3d fit3dResult; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize ); + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; - size_t offset[] = {11, 23, 17}; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; + size_t offset[] = { 11, 23, 17 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BARRIERS); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::BARRIERS); } // non_uniform_3d_six_prime_numbers_rwgs_barriers { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; + PrimeNumbers::Result3d fit3dResult; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize ); + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; - size_t reqdWorkGroupSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; + size_t reqdWorkGroupSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, NULL, reqdWorkGroupSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + NULL, reqdWorkGroupSize, Range::BARRIERS); } return exec.status(); diff --git a/test_conformance/non_uniform_work_group/test_basic.cpp b/test_conformance/non_uniform_work_group/test_basic.cpp index 537d7eeb..6abf0870 100644 --- a/test_conformance/non_uniform_work_group/test_basic.cpp +++ b/test_conformance/non_uniform_work_group/test_basic.cpp @@ -39,11 +39,13 @@ REGISTER_TEST(non_uniform_1d_basic) // non_uniform_1d_prime_number_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber}; size_t localSize[] = {maxWgSize}; @@ -52,20 +54,24 @@ REGISTER_TEST(non_uniform_1d_basic) // non_uniform_1d_max_wg_size_plus_prime_number_basic { - int primeNumber = 11; - size_t globalSize[] = {maxWgSize+primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 11; + size_t globalSize[] = { maxWgSize + primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_1d_max_wg_size_plus_prime_number_basic_2 { - int primeNumber = 53; - size_t globalSize[] = {maxWgSize+primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 53; + size_t globalSize[] = { maxWgSize + primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_1d_2max_wg_size_minus_1_basic @@ -78,38 +84,46 @@ REGISTER_TEST(non_uniform_1d_basic) // non_uniform_1d_prime_number_basic_2 { - unsigned int primeNumber = 20101; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 20101; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_1d_prime_number_basic_3 { - unsigned int primeNumber = 42967; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 42967; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_1d_prime_number_basic_4 { - unsigned int primeNumber = 65521; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 65521; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_1d_prime_number_and_ls_null_basic_2 { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber}; size_t *localSize = NULL; @@ -118,25 +132,29 @@ REGISTER_TEST(non_uniform_1d_basic) // non_uniform_1d_prime_number_and_ls_null_basic_3 { - unsigned int primeNumber = 65521; - size_t globalSize[] = {primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 65521; + size_t globalSize[] = { primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_1d_two_prime_numbers_basic { - unsigned int primeNumber = 42967; - unsigned int primeNumber2 = 113; - PrimeNumbers::Result1d fit1dResult; + size_t primeNumber = 42967; + size_t primeNumber2 = 113; + PrimeNumbers::Result1d fit1dResult; - fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize ); + fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize); - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {fit1dResult.Val1}; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { fit1dResult.Val1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } return exec.status(); @@ -164,11 +182,13 @@ REGISTER_TEST(non_uniform_1d_atomics) // non_uniform_1d_prime_number_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber}; size_t localSize[] = {maxWgSize}; @@ -177,20 +197,24 @@ REGISTER_TEST(non_uniform_1d_atomics) // non_uniform_1d_max_wg_size_plus_prime_number_atomics { - int primeNumber = 11; - size_t globalSize[] = {maxWgSize+primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 11; + size_t globalSize[] = { maxWgSize + primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_1d_max_wg_size_plus_prime_number_atomics_2 { - int primeNumber = 53; - size_t globalSize[] = {maxWgSize+primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 53; + size_t globalSize[] = { maxWgSize + primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_1d_2max_wg_size_minus_1_atomics @@ -203,38 +227,46 @@ REGISTER_TEST(non_uniform_1d_atomics) // non_uniform_1d_prime_number_atomics_2 { - unsigned int primeNumber = 20101; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 20101; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_1d_prime_number_atomics_3 { - unsigned int primeNumber = 42967; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 42967; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_1d_prime_number_atomics_4 { - unsigned int primeNumber = 65521; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 65521; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_1d_prime_number_and_ls_null_atomics_2 { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber}; size_t *localSize = NULL; @@ -243,25 +275,29 @@ REGISTER_TEST(non_uniform_1d_atomics) // non_uniform_1d_prime_number_and_ls_null_atomics_3 { - unsigned int primeNumber = 65521; - size_t globalSize[] = {primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 65521; + size_t globalSize[] = { primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_1d_two_prime_numbers_atomics { - unsigned int primeNumber = 42967; - unsigned int primeNumber2 = 113; - PrimeNumbers::Result1d fit1dResult; + size_t primeNumber = 42967; + size_t primeNumber2 = 113; + PrimeNumbers::Result1d fit1dResult; - fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize ); + fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize); - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {fit1dResult.Val1}; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { fit1dResult.Val1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } return exec.status(); @@ -289,11 +325,13 @@ REGISTER_TEST(non_uniform_1d_barriers) // non_uniform_1d_prime_number_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber}; size_t localSize[] = {maxWgSize}; @@ -302,20 +340,24 @@ REGISTER_TEST(non_uniform_1d_barriers) // non_uniform_1d_max_wg_size_plus_prime_number_barriers { - int primeNumber = 11; - size_t globalSize[] = {maxWgSize+primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 11; + size_t globalSize[] = { maxWgSize + primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_1d_max_wg_size_plus_prime_number_barriers_2 { - int primeNumber = 53; - size_t globalSize[] = {maxWgSize+primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 53; + size_t globalSize[] = { maxWgSize + primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_1d_2max_wg_size_minus_1_barriers @@ -328,38 +370,46 @@ REGISTER_TEST(non_uniform_1d_barriers) // non_uniform_1d_prime_number_barriers_2 { - unsigned int primeNumber = 20101; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 20101; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_1d_prime_number_barriers_3 { - unsigned int primeNumber = 42967; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 42967; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_1d_prime_number_barriers_4 { - unsigned int primeNumber = 65521; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 65521; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_1d_prime_number_and_ls_null_barriers_2 { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber}; size_t *localSize = NULL; @@ -368,26 +418,30 @@ REGISTER_TEST(non_uniform_1d_barriers) // non_uniform_1d_prime_number_and_ls_null_barriers_3 { - unsigned int primeNumber = 65521; - size_t globalSize[] = {primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 65521; + size_t globalSize[] = { primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_1d_two_prime_numbers_barriers { - unsigned int primeNumber = 42967; - unsigned int primeNumber2 = 113; + size_t primeNumber = 42967; + size_t primeNumber2 = 113; - PrimeNumbers::Result1d fit1dResult; + PrimeNumbers::Result1d fit1dResult; - fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize ); + fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize); - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {fit1dResult.Val1}; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { fit1dResult.Val1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } return exec.status(); diff --git a/test_conformance/non_uniform_work_group/tools.cpp b/test_conformance/non_uniform_work_group/tools.cpp index 9c0f8f6d..7cbd3500 100644 --- a/test_conformance/non_uniform_work_group/tools.cpp +++ b/test_conformance/non_uniform_work_group/tools.cpp @@ -46,28 +46,31 @@ void PrimeNumbers::generatePrimeNumbers (unsigned int maxValue) { } // Returns prime number for specified range -int PrimeNumbers::getPrimeNumberInRange (size_t lowerValue, size_t higherValue) { - if(lowerValue >= higherValue) - return -1; +size_t PrimeNumbers::getPrimeNumberInRange(size_t lowerValue, + size_t higherValue) +{ + if (lowerValue >= higherValue) return 0; - if(primeNumbers.back() < lowerValue) - return -2; + if (primeNumbers.back() < lowerValue) return 0; - PrimeNumbersCollection::iterator it = primeNumbers.begin(); + PrimeNumbersCollection::iterator it = primeNumbers.begin(); - for (; it != primeNumbers.end(); ++it) { - if (lowerValue<*it) { - if(higherValue>*it) - return *it; - else - return -3; + for (; it != primeNumbers.end(); ++it) + { + if (lowerValue < *it) + { + if (higherValue > *it) + return *it; + else + return 0; + } } - } - return -1; + return 0; } -int PrimeNumbers::getNextLowerPrimeNumber(size_t upperValue) { +size_t PrimeNumbers::getNextLowerPrimeNumber(size_t upperValue) +{ size_t retVal = 1; PrimeNumbersCollection::iterator it = primeNumbers.begin(); diff --git a/test_conformance/non_uniform_work_group/tools.h b/test_conformance/non_uniform_work_group/tools.h index 8e235c3a..29d31d39 100644 --- a/test_conformance/non_uniform_work_group/tools.h +++ b/test_conformance/non_uniform_work_group/tools.h @@ -23,8 +23,7 @@ #include #include -typedef std::vector PrimeNumbersCollection; - +typedef std::vector PrimeNumbersCollection; // Class responsible for distributing prime numbers @@ -47,8 +46,8 @@ public: }; static void generatePrimeNumbers (unsigned int maxValue); - static int getPrimeNumberInRange (size_t lowerValue, size_t higherValue); - static int getNextLowerPrimeNumber (size_t upperValue); + static size_t getPrimeNumberInRange(size_t lowerValue, size_t higherValue); + static size_t getNextLowerPrimeNumber(size_t upperValue); static Result1d fitMaxPrime1d(size_t Val1, size_t productMax); // Return val1 and Val2 which are largest prime numbers who's product is <= productMax static Result2d fitMaxPrime2d(size_t Val1, size_t Val2, size_t productMax); From 7c530dafa66108ad28d0215b7169a23571147b8e Mon Sep 17 00:00:00 2001 From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com> Date: Wed, 30 Jul 2025 18:51:05 +0100 Subject: [PATCH 12/61] Enable -Wnarrowing for conversions test suite (#2466) No code changes required. Fixes #1157 Signed-off-by: Ahmed Hesham --- test_conformance/conversions/CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test_conformance/conversions/CMakeLists.txt b/test_conformance/conversions/CMakeLists.txt index 32990eba..e2e97667 100644 --- a/test_conformance/conversions/CMakeLists.txt +++ b/test_conformance/conversions/CMakeLists.txt @@ -1,9 +1,5 @@ set(MODULE_NAME CONVERSIONS) -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") - add_cxx_flag_if_supported(-Wno-narrowing) -endif() - set (${MODULE_NAME}_SOURCES Sleep.cpp test_conversions.cpp basic_test_conversions.cpp ) From 77a987d547e2e6262e83bbc106a2400eea6486c5 Mon Sep 17 00:00:00 2001 From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com> Date: Wed, 30 Jul 2025 18:51:54 +0100 Subject: [PATCH 13/61] Enable -Wnarrowing in integer_ops test suite (#2465) Fixes #1158 --------- Signed-off-by: Ahmed Hesham --- test_conformance/integer_ops/CMakeLists.txt | 4 --- test_conformance/integer_ops/main.cpp | 34 ++++++++++++++----- .../verification_and_generation_functions.cpp | 8 +++-- 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/test_conformance/integer_ops/CMakeLists.txt b/test_conformance/integer_ops/CMakeLists.txt index 9966ca6f..7bc991f8 100644 --- a/test_conformance/integer_ops/CMakeLists.txt +++ b/test_conformance/integer_ops/CMakeLists.txt @@ -1,9 +1,5 @@ set(MODULE_NAME INTEGER_OPS) -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") - add_cxx_flag_if_supported(-Wno-narrowing) -endif() - set(${MODULE_NAME}_SOURCES main.cpp test_int_basic_ops.cpp diff --git a/test_conformance/integer_ops/main.cpp b/test_conformance/integer_ops/main.cpp index 32849af7..cd90d1bf 100644 --- a/test_conformance/integer_ops/main.cpp +++ b/test_conformance/integer_ops/main.cpp @@ -26,14 +26,32 @@ void fill_test_values( cl_long *outBufferA, cl_long *outBufferB, size_t numElements, MTdata d ) { - static const cl_long sUniqueValues[] = { 0x3333333333333333LL, 0x5555555555555555LL, 0x9999999999999999LL, 0xaaaaaaaaaaaaaaaaLL, 0xccccccccccccccccLL, - 0x3030303030303030LL, 0x5050505050505050LL, 0x9090909090909090LL, 0xa0a0a0a0a0a0a0a0LL, 0xc0c0c0c0c0c0c0c0LL, 0xf0f0f0f0f0f0f0f0LL, - 0x0303030303030303LL, 0x0505050505050505LL, 0x0909090909090909LL, 0x0a0a0a0a0a0a0a0aLL, 0x0c0c0c0c0c0c0c0cLL, 0x0f0f0f0f0f0f0f0fLL, - 0x3300330033003300LL, 0x5500550055005500LL, 0x9900990099009900LL, 0xaa00aa00aa00aa00LL, 0xcc00cc00cc00cc00LL, 0xff00ff00ff00ff00LL, - 0x0033003300330033LL, 0x0055005500550055LL, 0x0099009900990099LL, 0x00aa00aa00aa00aaLL, 0x00cc00cc00cc00ccLL, 0x00ff00ff00ff00ffLL, - 0x3333333300000000LL, 0x5555555500000000LL, 0x9999999900000000LL, 0xaaaaaaaa00000000LL, 0xcccccccc00000000LL, 0xffffffff00000000LL, - 0x0000000033333333LL, 0x0000000055555555LL, 0x0000000099999999LL, 0x00000000aaaaaaaaLL, 0x00000000ccccccccLL, 0x00000000ffffffffLL, - 0x3333000000003333LL, 0x5555000000005555LL, 0x9999000000009999LL, 0xaaaa00000000aaaaLL, 0xcccc00000000ccccLL, 0xffff00000000ffffLL}; + static const cl_long sUniqueValues[] = { + (cl_long)0x3333333333333333LL, (cl_long)0x5555555555555555LL, + (cl_long)0x9999999999999999LL, (cl_long)0xaaaaaaaaaaaaaaaaLL, + (cl_long)0xccccccccccccccccLL, (cl_long)0x3030303030303030LL, + (cl_long)0x5050505050505050LL, (cl_long)0x9090909090909090LL, + (cl_long)0xa0a0a0a0a0a0a0a0LL, (cl_long)0xc0c0c0c0c0c0c0c0LL, + (cl_long)0xf0f0f0f0f0f0f0f0LL, (cl_long)0x0303030303030303LL, + (cl_long)0x0505050505050505LL, (cl_long)0x0909090909090909LL, + (cl_long)0x0a0a0a0a0a0a0a0aLL, (cl_long)0x0c0c0c0c0c0c0c0cLL, + (cl_long)0x0f0f0f0f0f0f0f0fLL, (cl_long)0x3300330033003300LL, + (cl_long)0x5500550055005500LL, (cl_long)0x9900990099009900LL, + (cl_long)0xaa00aa00aa00aa00LL, (cl_long)0xcc00cc00cc00cc00LL, + (cl_long)0xff00ff00ff00ff00LL, (cl_long)0x0033003300330033LL, + (cl_long)0x0055005500550055LL, (cl_long)0x0099009900990099LL, + (cl_long)0x00aa00aa00aa00aaLL, (cl_long)0x00cc00cc00cc00ccLL, + (cl_long)0x00ff00ff00ff00ffLL, (cl_long)0x3333333300000000LL, + (cl_long)0x5555555500000000LL, (cl_long)0x9999999900000000LL, + (cl_long)0xaaaaaaaa00000000LL, (cl_long)0xcccccccc00000000LL, + (cl_long)0xffffffff00000000LL, (cl_long)0x0000000033333333LL, + (cl_long)0x0000000055555555LL, (cl_long)0x0000000099999999LL, + (cl_long)0x00000000aaaaaaaaLL, (cl_long)0x00000000ccccccccLL, + (cl_long)0x00000000ffffffffLL, (cl_long)0x3333000000003333LL, + (cl_long)0x5555000000005555LL, (cl_long)0x9999000000009999LL, + (cl_long)0xaaaa00000000aaaaLL, (cl_long)0xcccc00000000ccccLL, + (cl_long)0xffff00000000ffffLL + }; static cl_long sSpecialValues[ 128 + 128 + 128 + ( sizeof( sUniqueValues ) / sizeof( sUniqueValues[ 0 ] ) ) ] = { 0 }; if( sSpecialValues[ 0 ] == 0 ) diff --git a/test_conformance/integer_ops/verification_and_generation_functions.cpp b/test_conformance/integer_ops/verification_and_generation_functions.cpp index 9a7abf78..4262afb4 100644 --- a/test_conformance/integer_ops/verification_and_generation_functions.cpp +++ b/test_conformance/integer_ops/verification_and_generation_functions.cpp @@ -1370,7 +1370,9 @@ verify_ushort(int test, size_t vector_size, cl_ushort *inptrA, cl_ushort *inptrB void init_ushort_data(uint64_t indx, int num_elements, cl_ushort *input_ptr[], MTdata d) { - static const cl_ushort specialCaseList[] = { 0, -1, 1, CL_SHRT_MAX, CL_SHRT_MAX + 1, CL_USHRT_MAX }; + static const cl_ushort specialCaseList[] = { + 0, (cl_ushort)-1, 1, CL_SHRT_MAX, CL_SHRT_MAX + 1, CL_USHRT_MAX + }; int j; // Set the inputs to a random number @@ -1812,7 +1814,9 @@ verify_uchar(int test, size_t vector_size, cl_uchar *inptrA, cl_uchar *inptrB, c void init_uchar_data(uint64_t indx, int num_elements, cl_uchar *input_ptr[], MTdata d) { - static const cl_uchar specialCaseList[] = { 0, -1, 1, CL_CHAR_MAX, CL_CHAR_MAX + 1, CL_UCHAR_MAX }; + static const cl_uchar specialCaseList[] = { + 0, (cl_uchar)-1, 1, CL_CHAR_MAX, CL_CHAR_MAX + 1, CL_UCHAR_MAX + }; int j; // FIXME: we really should just check every char against every char here From 044e0be65321690ae18cae555b84ccf0c163e5a7 Mon Sep 17 00:00:00 2001 From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com> Date: Thu, 31 Jul 2025 12:24:03 +0100 Subject: [PATCH 14/61] Enable -Wnarrowing for the basic test suite (#2467) Fixes #1156 --------- Signed-off-by: Ahmed Hesham --- test_conformance/basic/CMakeLists.txt | 4 --- test_conformance/basic/test_arraycopy.cpp | 2 +- .../basic/test_arrayimagecopy.cpp | 6 ++-- test_conformance/basic/test_hostptr.cpp | 4 +-- test_conformance/basic/test_if.cpp | 5 +-- .../basic/test_image_multipass.cpp | 8 ++--- .../basic/test_imagearraycopy.cpp | 6 ++-- test_conformance/basic/test_imagecopy.cpp | 15 ++++---- test_conformance/basic/test_imagecopy3d.cpp | 6 ++-- test_conformance/basic/test_imagenpot.cpp | 4 +-- .../basic/test_imagerandomcopy.cpp | 10 +++--- .../basic/test_imagereadwrite.cpp | 21 ++++++----- .../basic/test_imagereadwrite3d.cpp | 35 ++++++++++++------- .../test_kernel_call_kernel_function.cpp | 2 +- .../basic/test_multireadimagemultifmt.cpp | 4 +-- .../basic/test_multireadimageonefmt.cpp | 4 +-- 16 files changed, 73 insertions(+), 63 deletions(-) diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index 7292bc9d..bf1f3bd6 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -1,9 +1,5 @@ set(MODULE_NAME BASIC) -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") - add_cxx_flag_if_supported(-Wno-narrowing) -endif() - set(${MODULE_NAME}_SOURCES main.cpp test_fpmath.cpp diff --git a/test_conformance/basic/test_arraycopy.cpp b/test_conformance/basic/test_arraycopy.cpp index a981cd02..332b10e2 100644 --- a/test_conformance/basic/test_arraycopy.cpp +++ b/test_conformance/basic/test_arraycopy.cpp @@ -161,7 +161,7 @@ REGISTER_TEST(arraycopy) err |= clSetKernelArg(kernel, 1, sizeof results, &results); test_error(err, "clSetKernelArg failed"); - size_t threads[3] = { num_elements, 0, 0 }; + size_t threads[3] = { static_cast(num_elements), 0, 0 }; err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL ); test_error(err, "clEnqueueNDRangeKernel failed"); diff --git a/test_conformance/basic/test_arrayimagecopy.cpp b/test_conformance/basic/test_arrayimagecopy.cpp index 8a8f9381..bb44abff 100644 --- a/test_conformance/basic/test_arrayimagecopy.cpp +++ b/test_conformance/basic/test_arrayimagecopy.cpp @@ -35,9 +35,9 @@ static int test_arrayimagecopy_single_format( std::unique_ptr bufptr{ nullptr, free }, imgptr{ nullptr, free }; clMemWrapper buffer, image; - int img_width = 512; - int img_height = 512; - int img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1; + size_t img_width = 512; + size_t img_height = 512; + size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1; size_t elem_size; size_t buffer_size; cl_int err; diff --git a/test_conformance/basic/test_hostptr.cpp b/test_conformance/basic/test_hostptr.cpp index e58b636e..9f3f700e 100644 --- a/test_conformance/basic/test_hostptr.cpp +++ b/test_conformance/basic/test_hostptr.cpp @@ -100,8 +100,8 @@ REGISTER_TEST(hostptr) cl_image_format img_format; cl_uchar *rgba8_inptr, *rgba8_outptr; void *lock_buffer; - int img_width = 512; - int img_height = 512; + size_t img_width = 512; + size_t img_height = 512; cl_int err; MTdata d; RoundingMode oldRoundMode; diff --git a/test_conformance/basic/test_if.cpp b/test_conformance/basic/test_if.cpp index a0640879..b6b8c449 100644 --- a/test_conformance/basic/test_if.cpp +++ b/test_conformance/basic/test_if.cpp @@ -56,8 +56,9 @@ __kernel void test_if(__global int *src, __global int *dst) int verify_if(std::vector input, std::vector output) { const cl_int results[] = { - 0x12345678, 0x23456781, 0x34567812, 0x45678123, - 0x56781234, 0x67812345, 0x78123456, 0x81234567, + (cl_int)0x12345678, (cl_int)0x23456781, (cl_int)0x34567812, + (cl_int)0x45678123, (cl_int)0x56781234, (cl_int)0x67812345, + (cl_int)0x78123456, (cl_int)0x81234567, }; auto predicate = [&results](cl_int a, cl_int b) { diff --git a/test_conformance/basic/test_image_multipass.cpp b/test_conformance/basic/test_image_multipass.cpp index 5d8ae993..e7af8726 100644 --- a/test_conformance/basic/test_image_multipass.cpp +++ b/test_conformance/basic/test_image_multipass.cpp @@ -144,8 +144,8 @@ verify_byte_image(unsigned char *image, unsigned char *outptr, int w, int h, int REGISTER_TEST(image_multipass_integer_coord) { - int img_width = 512; - int img_height = 512; + size_t img_width = 512; + size_t img_height = 512; cl_image_format img_format; int num_input_streams = 8; @@ -397,8 +397,8 @@ REGISTER_TEST(image_multipass_integer_coord) REGISTER_TEST(image_multipass_float_coord) { - int img_width = 512; - int img_height = 512; + size_t img_width = 512; + size_t img_height = 512; cl_image_format img_format; int num_input_streams = 8; diff --git a/test_conformance/basic/test_imagearraycopy.cpp b/test_conformance/basic/test_imagearraycopy.cpp index 7f177ff4..a400c460 100644 --- a/test_conformance/basic/test_imagearraycopy.cpp +++ b/test_conformance/basic/test_imagearraycopy.cpp @@ -35,9 +35,9 @@ static int test_imagearraycopy_single_format( std::unique_ptr bufptr{ nullptr, free }, imgptr{ nullptr, free }; clMemWrapper buffer, image; - const int img_width = 512; - const int img_height = 512; - const int img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1; + const size_t img_width = 512; + const size_t img_height = 512; + const size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1; size_t elem_size; size_t buffer_size; cl_int err; diff --git a/test_conformance/basic/test_imagecopy.cpp b/test_conformance/basic/test_imagecopy.cpp index 22bdea11..5e888594 100644 --- a/test_conformance/basic/test_imagecopy.cpp +++ b/test_conformance/basic/test_imagecopy.cpp @@ -111,8 +111,8 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context, std::unique_ptr rgba16_inptr, rgba16_outptr; std::unique_ptr rgbafp_inptr, rgbafp_outptr; clMemWrapper streams[6]; - int img_width = 512; - int img_height = 512; + size_t img_width = 512; + size_t img_height = 512; int i, err; MTdataHolder d(gRandomSeed); @@ -153,7 +153,7 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context, for (i = 0; i < 3; i++) { void *p, *outp; - int x, y, delta_w = img_width / 8, delta_h = img_height / 16; + size_t x, y, delta_w = img_width / 8, delta_h = img_height / 16; switch (i) { @@ -197,10 +197,11 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context, copy_origin, copy_region, 0, NULL, NULL); if (err) { - log_error("Copy %d (origin [%d, %d], size [%d, %d], image " - "size [%d x %d]) Failed\n", - copy_number, x, y, delta_w, delta_h, img_width, - img_height); + log_error( + "Copy %d (origin [%zu, %zu], size [%zu, %zu], image " + "size [%zu x %zu]) Failed\n", + copy_number, x, y, delta_w, delta_h, img_width, + img_height); } test_error(err, "clEnqueueCopyImage failed"); } diff --git a/test_conformance/basic/test_imagecopy3d.cpp b/test_conformance/basic/test_imagecopy3d.cpp index 5de2e3ae..53a88bd5 100644 --- a/test_conformance/basic/test_imagecopy3d.cpp +++ b/test_conformance/basic/test_imagecopy3d.cpp @@ -115,9 +115,9 @@ static int test_imagecopy3d_impl(cl_device_id device, cl_context context, std::unique_ptr rgba16_inptr, rgba16_outptr; std::unique_ptr rgbafp_inptr, rgbafp_outptr; clMemWrapper streams[6]; - int img_width = 128; - int img_height = 128; - int img_depth = 64; + size_t img_width = 128; + size_t img_height = 128; + size_t img_depth = 64; int i; cl_int err; unsigned num_elements = img_width * img_height * img_depth * 4; diff --git a/test_conformance/basic/test_imagenpot.cpp b/test_conformance/basic/test_imagenpot.cpp index 1e2c213e..566cb9c3 100644 --- a/test_conformance/basic/test_imagenpot.cpp +++ b/test_conformance/basic/test_imagenpot.cpp @@ -82,8 +82,8 @@ REGISTER_TEST(imagenpot) cl_kernel kernel; size_t global_threads[3], local_threads[3]; size_t local_workgroup_size; - int img_width; - int img_height; + size_t img_width; + size_t img_height; int err; cl_uint m; size_t max_local_workgroup_size[3]; diff --git a/test_conformance/basic/test_imagerandomcopy.cpp b/test_conformance/basic/test_imagerandomcopy.cpp index 79e6b749..748da6fb 100644 --- a/test_conformance/basic/test_imagerandomcopy.cpp +++ b/test_conformance/basic/test_imagerandomcopy.cpp @@ -123,15 +123,15 @@ REGISTER_TEST(imagerandomcopy) unsigned short *rgba16_inptr, *rgba16_outptr; float *rgbafp_inptr, *rgbafp_outptr; clMemWrapper streams[6]; - int img_width = 512; - int img_height = 512; + size_t img_width = 512; + size_t img_height = 512; int i, j; cl_int err; MTdata d; PASSIVE_REQUIRE_IMAGE_SUPPORT( device ) - log_info("Testing with image %d x %d.\n", img_width, img_height); + log_info("Testing with image %zu x %zu.\n", img_width, img_height); d = init_genrand( gRandomSeed ); rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d); @@ -191,8 +191,8 @@ REGISTER_TEST(imagerandomcopy) } size_t origin[3]={0,0,0}, region[3]={img_width, img_height,1}; - err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL); -// err = clWriteImage(context, streams[i*2], false, 0, 0, 0, img_width, img_height, 0, NULL, 0, 0, p, NULL); + err = clEnqueueWriteImage(queue, streams[i * 2], CL_TRUE, origin, + region, 0, 0, p, 0, NULL, NULL); test_error(err, "clEnqueueWriteImage failed"); for (j=0; j rgba16_inptr, rgba16_outptr; std::unique_ptr rgbafp_inptr, rgbafp_outptr; clMemWrapper streams[3]; - int img_width = 512; - int img_height = 512; + size_t img_width = 512; + size_t img_height = 512; int num_tries = 200; int i, j, err; MTdataHolder d(gRandomSeed); @@ -242,10 +242,10 @@ REGISTER_TEST(imagereadwrite) for (i = 0, j = 0; i < num_tries * image_formats_count; i++, j++) { - int x = (int)get_random_float(0, img_width, d); - int y = (int)get_random_float(0, img_height, d); - int w = (int)get_random_float(1, (img_width - x), d); - int h = (int)get_random_float(1, (img_height - y), d); + size_t x = (size_t)get_random_float(0, img_width, d); + size_t y = (size_t)get_random_float(0, img_height, d); + size_t w = (size_t)get_random_float(1, (img_width - x), d); + size_t h = (size_t)get_random_float(1, (img_height - y), d); size_t input_pitch; int set_input_pitch = (int)(genrand_int32(d) & 0x01); int packed_update = (int)(genrand_int32(d) & 0x01); @@ -386,7 +386,8 @@ REGISTER_TEST(imagereadwrite) img_width, img_height); if (err) { - log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i); + log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x, + y, w, h, (int)input_pitch, (int)i); log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name); } break; @@ -396,7 +397,8 @@ REGISTER_TEST(imagereadwrite) img_width, img_height); if (err) { - log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i); + log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x, + y, w, h, (int)input_pitch, (int)i); log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name); } break; @@ -406,7 +408,8 @@ REGISTER_TEST(imagereadwrite) img_width, img_height); if (err) { - log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i); + log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x, + y, w, h, (int)input_pitch, (int)i); log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name); } break; diff --git a/test_conformance/basic/test_imagereadwrite3d.cpp b/test_conformance/basic/test_imagereadwrite3d.cpp index f384c339..6fb220b8 100644 --- a/test_conformance/basic/test_imagereadwrite3d.cpp +++ b/test_conformance/basic/test_imagereadwrite3d.cpp @@ -205,10 +205,10 @@ REGISTER_TEST(imagereadwrite3d) std::unique_ptr rgba16_inptr, rgba16_outptr; std::unique_ptr rgbafp_inptr, rgbafp_outptr; clMemWrapper streams[3]; - int img_width = 64; - int img_height = 64; - int img_depth = 32; - int img_slice = img_width * img_height; + size_t img_width = 64; + size_t img_height = 64; + size_t img_depth = 32; + size_t img_slice = img_width * img_height; int num_tries = 30; int i, j, err; MTdataHolder mtData(gRandomSeed); @@ -257,12 +257,12 @@ REGISTER_TEST(imagereadwrite3d) for (i = 0, j = 0; i < num_tries * image_formats_count; i++, j++) { - int x = (int)get_random_float(0, (float)img_width - 1, mtData); - int y = (int)get_random_float(0, (float)img_height - 1, mtData); - int z = (int)get_random_float(0, (float)img_depth - 1, mtData); - int w = (int)get_random_float(1, (float)(img_width - x), mtData); - int h = (int)get_random_float(1, (float)(img_height - y), mtData); - int d = (int)get_random_float(1, (float)(img_depth - z), mtData); + size_t x = (size_t)get_random_float(0, (float)img_width - 1, mtData); + size_t y = (size_t)get_random_float(0, (float)img_height - 1, mtData); + size_t z = (size_t)get_random_float(0, (float)img_depth - 1, mtData); + size_t w = (size_t)get_random_float(1, (float)(img_width - x), mtData); + size_t h = (size_t)get_random_float(1, (float)(img_height - y), mtData); + size_t d = (size_t)get_random_float(1, (float)(img_depth - z), mtData); size_t input_pitch, input_slice_pitch; int set_input_pitch = (int)(genrand_int32(mtData) & 0x01); int packed_update = (int)(genrand_int32(mtData) & 0x01); @@ -401,7 +401,10 @@ REGISTER_TEST(imagereadwrite3d) img_width, img_height, img_depth); if (err) { - log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i); + log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, " + "slice_pitch=%d, try=%d\n", + x, y, z, w, h, d, (int)input_pitch, + (int)input_slice_pitch, (int)i); log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name); } break; @@ -411,7 +414,10 @@ REGISTER_TEST(imagereadwrite3d) img_width, img_height, img_depth); if (err) { - log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i); + log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, " + "slice_pitch=%d, try=%d\n", + x, y, z, w, h, d, (int)input_pitch, + (int)input_slice_pitch, (int)i); log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name); } break; @@ -421,7 +427,10 @@ REGISTER_TEST(imagereadwrite3d) img_width, img_height, img_depth); if (err) { - log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i); + log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, " + "slice_pitch=%d, try=%d\n", + x, y, z, w, h, d, (int)input_pitch, + (int)input_slice_pitch, (int)i); log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name); } break; diff --git a/test_conformance/basic/test_kernel_call_kernel_function.cpp b/test_conformance/basic/test_kernel_call_kernel_function.cpp index 0669ee24..82cb1a65 100644 --- a/test_conformance/basic/test_kernel_call_kernel_function.cpp +++ b/test_conformance/basic/test_kernel_call_kernel_function.cpp @@ -66,7 +66,7 @@ REGISTER_TEST(kernel_call_kernel_function) clKernelWrapper kernel1, kernel2, kernel_to_call; clMemWrapper streams[2]; - size_t threads[] = {num_elements,1,1}; + size_t threads[] = { static_cast(num_elements), 1, 1 }; cl_int *input, *output, *expected; cl_int times = 4; int pass = 0; diff --git a/test_conformance/basic/test_multireadimagemultifmt.cpp b/test_conformance/basic/test_multireadimagemultifmt.cpp index b92daf88..8a16ca85 100644 --- a/test_conformance/basic/test_multireadimagemultifmt.cpp +++ b/test_conformance/basic/test_multireadimagemultifmt.cpp @@ -117,8 +117,8 @@ REGISTER_TEST(mri_multiple) cl_program program; cl_kernel kernel; size_t threads[2]; - int img_width = 512; - int img_height = 512; + size_t img_width = 512; + size_t img_height = 512; int i, err; MTdata d; diff --git a/test_conformance/basic/test_multireadimageonefmt.cpp b/test_conformance/basic/test_multireadimageonefmt.cpp index 1d0b5b8d..8a37e29b 100644 --- a/test_conformance/basic/test_multireadimageonefmt.cpp +++ b/test_conformance/basic/test_multireadimageonefmt.cpp @@ -100,8 +100,8 @@ REGISTER_TEST(mri_one) cl_program program; cl_kernel kernel; size_t threads[2]; - int img_width = 512; - int img_height = 512; + size_t img_width = 512; + size_t img_height = 512; int i, err; size_t origin[3] = {0, 0, 0}; size_t region[3] = {img_width, img_height, 1}; From dae998ff9ded76ddf85967bf2e12f1675e1e980a Mon Sep 17 00:00:00 2001 From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com> Date: Thu, 31 Jul 2025 12:24:30 +0100 Subject: [PATCH 15/61] Enable -Wnarrowing in spirv_new test suite (#2468) --- test_conformance/spirv_new/CMakeLists.txt | 4 ---- test_conformance/spirv_new/test_spirv_14.cpp | 8 ++++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/test_conformance/spirv_new/CMakeLists.txt b/test_conformance/spirv_new/CMakeLists.txt index 13a258e5..e32d600b 100644 --- a/test_conformance/spirv_new/CMakeLists.txt +++ b/test_conformance/spirv_new/CMakeLists.txt @@ -38,10 +38,6 @@ set(TEST_HARNESS_SOURCES ../../test_conformance/math_brute_force/utility.cpp ) -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") - add_cxx_flag_if_supported(-Wno-narrowing) -endif() - set(${MODULE_NAME}_SOURCES ${${MODULE_NAME}_SOURCES} ${TEST_HARNESS_SOURCES}) include(../CMakeCommon.txt) diff --git a/test_conformance/spirv_new/test_spirv_14.cpp b/test_conformance/spirv_new/test_spirv_14.cpp index 6fc5e864..01df8dfd 100644 --- a/test_conformance/spirv_new/test_spirv_14.cpp +++ b/test_conformance/spirv_new/test_spirv_14.cpp @@ -26,10 +26,10 @@ static int test_image_operand_helper(cl_device_id deviceID, cl_context context, { const char* filename = signExtend ? "spv1.4/image_operand_signextend" : "spv1.4/image_operand_zeroextend"; - cl_image_format image_format = { - CL_RGBA, - signExtend ? CL_SIGNED_INT8 : CL_UNSIGNED_INT8, - }; + cl_image_format image_format = {}; + image_format.image_channel_order = CL_RGBA; + image_format.image_channel_data_type = + signExtend ? CL_SIGNED_INT8 : CL_UNSIGNED_INT8; cl_int error = CL_SUCCESS; From 430727cc8d74630f9d1c407befbf8048251b9078 Mon Sep 17 00:00:00 2001 From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com> Date: Thu, 31 Jul 2025 12:24:51 +0100 Subject: [PATCH 16/61] Enable -Wnarrowing in Vulkan test suite (#2469) --- test_conformance/common/vulkan_wrapper/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/test_conformance/common/vulkan_wrapper/CMakeLists.txt b/test_conformance/common/vulkan_wrapper/CMakeLists.txt index 42397998..f392f9c4 100644 --- a/test_conformance/common/vulkan_wrapper/CMakeLists.txt +++ b/test_conformance/common/vulkan_wrapper/CMakeLists.txt @@ -8,7 +8,6 @@ set(VULKAN_WRAPPER_SOURCES # needed by Vulkan wrapper to compile set(CMAKE_COMPILE_WARNING_AS_ERROR OFF) add_cxx_flag_if_supported(-Wmisleading-indentation) -add_cxx_flag_if_supported(-Wno-narrowing) add_cxx_flag_if_supported(-Wno-format) add_cxx_flag_if_supported(-Wno-error) add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive From 2507f7de841d9d577998565270d8bd00fe5ae3c9 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Fri, 1 Aug 2025 13:37:16 -0700 Subject: [PATCH 17/61] add cl_khr_spirv_queries to list of known extensions (#2463) While #2409 is under review, could we please add "cl_khr_spirv_queries" to the list of known extensions? This will prevent test "failures" for implementations that support the extension. --- .../compiler/test_compiler_defines_for_extensions.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index 067ee8ed..c3f3993d 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -96,7 +96,8 @@ const char *known_extensions[] = { "cl_khr_command_buffer_mutable_dispatch", "cl_khr_command_buffer_multi_device", "cl_khr_external_memory_android_hardware_buffer", - "cl_khr_unified_svm" + "cl_khr_unified_svm", + "cl_khr_spirv_queries" }; // clang-format on From 9ca0126c5474552f82a349663ab895ebec017179 Mon Sep 17 00:00:00 2001 From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com> Date: Mon, 4 Aug 2025 11:06:18 +0100 Subject: [PATCH 18/61] Fix warnings treated as errors in Vulkan tests. (#2461) Unused variables and mismatched sign comparisons. Signed-off-by: Ahmed Hesham --- test_conformance/vulkan/main.cpp | 2 +- .../vulkan/test_vulkan_api_consistency.cpp | 14 +++---- ...st_vulkan_api_consistency_for_1dimages.cpp | 4 +- ...st_vulkan_api_consistency_for_3dimages.cpp | 4 +- .../vulkan/test_vulkan_interop_buffer.cpp | 40 +++++++++---------- .../vulkan/test_vulkan_interop_image.cpp | 21 +++++----- 6 files changed, 43 insertions(+), 42 deletions(-) diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp index aa32dd3a..85c1c7db 100644 --- a/test_conformance/vulkan/main.cpp +++ b/test_conformance/vulkan/main.cpp @@ -50,7 +50,7 @@ static void printUsage(const char *execName) log_info("Usage: %s [test_names] [options]\n", execName); log_info("Test names:\n"); - for (int i = 0; i < test_registry::getInstance().num_tests(); i++) + for (unsigned int i = 0; i < test_registry::getInstance().num_tests(); i++) { log_info("\t%s\n", test_registry::getInstance().definitions()[i].name); } diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp index 7410cc7f..5305e48c 100644 --- a/test_conformance/vulkan/test_vulkan_api_consistency.cpp +++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp @@ -84,8 +84,8 @@ struct ConsistencyExternalBufferTest : public VulkanTestBase vkDeviceMem->bindBuffer(vkBufferList[0], 0); - void* handle = NULL; - int fd; + [[maybe_unused]] void* handle = NULL; + [[maybe_unused]] int fd; std::vector extMemProperties{ (cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_KHR, @@ -243,8 +243,8 @@ struct ConsistencyExternalImageTest : public VulkanTestBase vkExternalMemoryHandleType); vkDeviceMem->bindImage(vkImage2D, 0); - void* handle = NULL; - int fd; + [[maybe_unused]] void* handle = NULL; + [[maybe_unused]] int fd; std::vector extMemProperties{ (cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_KHR, (cl_mem_properties)device, @@ -386,9 +386,9 @@ struct ConsistencyExternalSemaphoreTest : public VulkanTestBase VulkanSemaphore vkCl2Vksemaphore(*vkDevice, semaphoreHandleType); cl_semaphore_khr clCl2Vksemaphore; cl_semaphore_khr clVk2Clsemaphore; - void* handle1 = NULL; - void* handle2 = NULL; - int fd1, fd2; + [[maybe_unused]] void* handle1 = NULL; + [[maybe_unused]] void* handle2 = NULL; + [[maybe_unused]] int fd1, fd2; std::vector sema_props1{ (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, diff --git a/test_conformance/vulkan/test_vulkan_api_consistency_for_1dimages.cpp b/test_conformance/vulkan/test_vulkan_api_consistency_for_1dimages.cpp index b6797f0c..ef7ae348 100644 --- a/test_conformance/vulkan/test_vulkan_api_consistency_for_1dimages.cpp +++ b/test_conformance/vulkan/test_vulkan_api_consistency_for_1dimages.cpp @@ -101,8 +101,8 @@ struct ConsistencyExternalImage1DTest : public VulkanTestBase vkExternalMemoryHandleType); vkDeviceMem->bindImage(vkImage1D, 0); - void* handle = NULL; - int fd; + [[maybe_unused]] void* handle = NULL; + [[maybe_unused]] int fd; std::vector extMemProperties{ (cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_KHR, (cl_mem_properties)device, diff --git a/test_conformance/vulkan/test_vulkan_api_consistency_for_3dimages.cpp b/test_conformance/vulkan/test_vulkan_api_consistency_for_3dimages.cpp index e84954e9..3d697b0e 100644 --- a/test_conformance/vulkan/test_vulkan_api_consistency_for_3dimages.cpp +++ b/test_conformance/vulkan/test_vulkan_api_consistency_for_3dimages.cpp @@ -103,8 +103,8 @@ struct ConsistencyExternalImage3DTest : public VulkanTestBase vkExternalMemoryHandleType); vkDeviceMem->bindImage(vkImage3D, 0); - void* handle = NULL; - int fd; + [[maybe_unused]] void* handle = NULL; + [[maybe_unused]] int fd; std::vector extMemProperties{ (cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_KHR, (cl_mem_properties)device, diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp index b90514c3..f50c457a 100644 --- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp @@ -272,7 +272,7 @@ int run_test_with_two_queue( err |= clSetKernelArg(kernel_cq, 1, sizeof(cl_mem), (void *)&(buffers[0])); - for (int i = 0; i < vkBufferList.size() - 1; i++) + for (size_t i = 0; i < vkBufferList.size() - 1; i++) { err |= clSetKernelArg(update_buffer_kernel, i + 1, @@ -352,7 +352,7 @@ int run_test_with_two_queue( "Error: Failed read output, error\n"); int calc_max_iter; - for (int i = 0; i < vkBufferList.size(); i++) + for (size_t i = 0; i < vkBufferList.size(); i++) { if (i == 0) calc_max_iter = (maxIter * 3); @@ -602,7 +602,7 @@ int run_test_with_one_queue( err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t), (void *)&bufferSize); - for (int i = 0; i < vkBufferList.size(); i++) + for (size_t i = 0; i < vkBufferList.size(); i++) { err |= clSetKernelArg(update_buffer_kernel, i + 1, @@ -662,7 +662,7 @@ int run_test_with_one_queue( "Error: clEnqueueWriteBuffer \n"); int calc_max_iter = (maxIter * 2); - for (int i = 0; i < vkBufferList.size(); i++) + for (size_t i = 0; i < vkBufferList.size(); i++) { err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem), (void *)&(buffers[i])); @@ -836,7 +836,7 @@ int run_test_with_multi_import_same_ctx( vkExternalMemoryHandleType)); std::vector pExternalMemory; - for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) + for (int cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) { pExternalMemory.push_back( new clExternalMemory(vkBufferListDeviceMemory[bIdx], @@ -857,7 +857,7 @@ int run_test_with_multi_import_same_ctx( { vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx], 0); - for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) + for (int cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) { buffers[bIdx][cl_bIdx] = externalMemory[bIdx][cl_bIdx] ->getExternalMemoryBuffer(); @@ -916,7 +916,7 @@ int run_test_with_multi_import_same_ctx( { err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t), (void *)&bufferSize); - for (int i = 0; i < numBuffers; i++) + for (uint32_t i = 0; i < numBuffers; i++) { err |= clSetKernelArg( update_buffer_kernel, i + 1, sizeof(cl_mem), @@ -939,7 +939,7 @@ int run_test_with_multi_import_same_ctx( "Error: Failed to launch " "update_buffer_kernel, error\n "); - for (int i = 0; i < numBuffers; i++) + for (uint32_t i = 0; i < numBuffers; i++) { err = clEnqueueReleaseExternalMemObjectsKHRptr( cmd_queue1, 1, &buffers[i][launchIter], 0, nullptr, @@ -979,7 +979,7 @@ int run_test_with_multi_import_same_ctx( calc_max_iter = maxIter * (numImports + 1); - for (int i = 0; i < vkBufferList.size(); i++) + for (size_t i = 0; i < vkBufferList.size(); i++) { err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem), (void *)&(buffers[i][0])); @@ -1014,7 +1014,7 @@ int run_test_with_multi_import_same_ctx( } for (size_t i = 0; i < vkBufferList.size(); i++) { - for (size_t j = 0; j < numImports; j++) + for (int j = 0; j < numImports; j++) { delete externalMemory[i][j]; } @@ -1184,7 +1184,7 @@ int run_test_with_multi_import_diff_ctx( vkExternalMemoryHandleType)); std::vector pExternalMemory1; std::vector pExternalMemory2; - for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) + for (int cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) { pExternalMemory1.push_back( new clExternalMemory(vkBufferListDeviceMemory[bIdx], @@ -1209,7 +1209,7 @@ int run_test_with_multi_import_diff_ctx( { vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx], 0); - for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) + for (int cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) { buffers1[bIdx][cl_bIdx] = externalMemory1[bIdx][cl_bIdx] ->getExternalMemoryBuffer(); @@ -1226,7 +1226,7 @@ int run_test_with_multi_import_diff_ctx( vkCommandBuffer.dispatch(512, 1, 1); vkCommandBuffer.end(); - for (int i = 0; i < numImports; i++) + for (uint32_t i = 0; i < numImports; i++) { update_buffer_kernel1[i] = (numBuffers == 1) ? kernel1[0] @@ -1282,7 +1282,7 @@ int run_test_with_multi_import_diff_ctx( test_error_and_cleanup(err, CLEANUP, "Failed to set kernel arg"); - for (int i = 0; i < numBuffers; i++) + for (uint32_t i = 0; i < numBuffers; i++) { err = clSetKernelArg( update_buffer_kernel1[launchIter], i + 1, @@ -1307,7 +1307,7 @@ int run_test_with_multi_import_diff_ctx( test_error_and_cleanup(err, CLEANUP, "Error: Failed to launch " "update_buffer_kernel, error\n"); - for (int i = 0; i < numBuffers; i++) + for (uint32_t i = 0; i < numBuffers; i++) { err = clEnqueueReleaseExternalMemObjectsKHRptr( cmd_queue1, 1, &buffers1[i][launchIter], 0, nullptr, @@ -1369,7 +1369,7 @@ int run_test_with_multi_import_diff_ctx( test_error_and_cleanup(err, CLEANUP, "Failed to set kernel arg"); - for (int i = 0; i < numBuffers; i++) + for (uint32_t i = 0; i < numBuffers; i++) { err = clSetKernelArg( update_buffer_kernel2[launchIter], i + 1, @@ -1394,7 +1394,7 @@ int run_test_with_multi_import_diff_ctx( test_error_and_cleanup(err, CLEANUP, "Error: Failed to launch " "update_buffer_kernel, error\n "); - for (int i = 0; i < numBuffers; i++) + for (uint32_t i = 0; i < numBuffers; i++) { err = clEnqueueReleaseExternalMemObjectsKHRptr( cmd_queue2, 1, &buffers2[i][launchIter], 0, nullptr, @@ -1442,7 +1442,7 @@ int run_test_with_multi_import_diff_ctx( "Error: Failed read output, error \n"); calc_max_iter = maxIter * 2 * (numBuffers + 1); - for (int i = 0; i < numBuffers; i++) + for (uint32_t i = 0; i < numBuffers; i++) { err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem), (void *)&(buffers1[i][0])); @@ -1477,7 +1477,7 @@ int run_test_with_multi_import_diff_ctx( } } *error_3 = 0; - for (int i = 0; i < vkBufferList.size(); i++) + for (size_t i = 0; i < vkBufferList.size(); i++) { err = clSetKernelArg(verify_kernel2, 0, sizeof(cl_mem), (void *)&(buffers2[i][0])); @@ -1513,7 +1513,7 @@ int run_test_with_multi_import_diff_ctx( } for (size_t i = 0; i < vkBufferList.size(); i++) { - for (size_t j = 0; j < numImports; j++) + for (int j = 0; j < numImports; j++) { delete externalMemory1[i][j]; delete externalMemory2[i][j]; diff --git a/test_conformance/vulkan/test_vulkan_interop_image.cpp b/test_conformance/vulkan/test_vulkan_interop_image.cpp index 6969514f..0c1887aa 100644 --- a/test_conformance/vulkan/test_vulkan_interop_image.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_image.cpp @@ -463,7 +463,7 @@ int run_test_with_two_queue( cl_mem external_mem_image1[5]; cl_mem external_mem_image2[5]; - for (int i = 0; i < num2DImages; i++) + for (uint32_t i = 0; i < num2DImages; i++) { external_mem_image1[i] = externalMemory1[i] @@ -631,7 +631,8 @@ int run_test_with_two_queue( err |= clSetKernelArg(updateKernelCQ2, 7, sizeof(unsigned int), &numMipLevels); - for (int i = 0; i < num2DImages - 1; i++, ++j) + for (uint32_t i = 0; i < num2DImages - 1; + i++, ++j) { err = clSetKernelArg( updateKernelCQ1, j, sizeof(cl_mem), @@ -732,7 +733,7 @@ int run_test_with_two_queue( } clFinish(cmd_queue2); - for (int i = 0; i < num2DImages; i++) + for (uint32_t i = 0; i < num2DImages; i++) { err = clEnqueueReadImage( cmd_queue1, external_mem_image2[i], CL_TRUE, @@ -772,7 +773,7 @@ int run_test_with_two_queue( break; } } - for (int i = 0; i < num2DImages; i++) + for (uint32_t i = 0; i < num2DImages; i++) { delete vkImage2DListDeviceMemory1[i]; delete vkImage2DListDeviceMemory2[i]; @@ -1083,7 +1084,7 @@ int run_test_with_one_queue( cl_mem external_mem_image1[4]; cl_mem external_mem_image2[4]; - for (int i = 0; i < num2DImages; i++) + for (uint32_t i = 0; i < num2DImages; i++) { external_mem_image1[i] = externalMemory1[i] @@ -1218,7 +1219,7 @@ int run_test_with_one_queue( break; } int j = 0; - for (int i = 0; i < num2DImages; i++, ++j) + for (uint32_t i = 0; i < num2DImages; i++, ++j) { err = clSetKernelArg( updateKernelCQ1, j, sizeof(cl_mem), @@ -1284,7 +1285,7 @@ int run_test_with_one_queue( "Failed to signal CL semaphore\n"); } - for (int i = 0; i < num2DImages; i++) + for (uint32_t i = 0; i < num2DImages; i++) { err = clEnqueueReadImage( cmd_queue1, external_mem_image2[i], CL_TRUE, @@ -1324,7 +1325,7 @@ int run_test_with_one_queue( break; } } - for (int i = 0; i < num2DImages; i++) + for (uint32_t i = 0; i < num2DImages; i++) { delete vkImage2DListDeviceMemory1[i]; delete vkImage2DListDeviceMemory2[i]; @@ -1423,7 +1424,7 @@ struct ImageCommonTest : public VulkanTestBase log_info("clCreateCommandQueue2 successful \n"); - for (int i = 0; i < num_kernels; i++) + for (uint32_t i = 0; i < num_kernels; i++) { switch (i) { @@ -1474,7 +1475,7 @@ struct ImageCommonTest : public VulkanTestBase } const char *sourceTexts[num_kernel_types] = { source_1, source_2, source_3 }; - for (int k = 0; k < num_kernel_types; k++) + for (uint32_t k = 0; k < num_kernel_types; k++) { program_source_length = strlen(sourceTexts[k]); program[k] = clCreateProgramWithSource( From e15c6eb760dc8167303c8dba986707b3b84605f7 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Wed, 6 Aug 2025 00:08:04 +0800 Subject: [PATCH 19/61] Fix 'fpclassify: ambiguous call' compile fail in MSVC 2022 (#2426) Similar to #2219, we see "'fpclassify': ambiguous call" error in test_conformance\basic\test_fpmath.cpp due to missing constexpr at https://github.com/KhronosGroup/OpenCL-CTS/blob/9265cbb2c274/test_conformance/basic/test_fpmath.cpp#L104 This PR fixes the issue by moving utility function isnan_fp in testHarness.h and use it. Note this PR doesn't modify use of isnan in many tests where only float/double values are checked. --- test_common/harness/mathHelpers.h | 35 +++++++++++++++ test_conformance/basic/test_explicit_s2v.cpp | 37 ++++++---------- test_conformance/basic/test_fpmath.cpp | 20 +-------- .../conversions/basic_test_conversions.cpp | 19 +------- .../binary_two_results_i_half.cpp | 4 +- .../math_brute_force/ternary_half.cpp | 44 +++++++++---------- .../unary_two_results_half.cpp | 14 +++--- test_conformance/math_brute_force/utility.h | 11 +---- .../relationals/test_comparisons_fp.cpp | 6 +-- test_conformance/select/util_select.cpp | 8 ++-- test_conformance/spirv_new/testBase.h | 1 + test_conformance/spirv_new/test_decorate.cpp | 2 +- 12 files changed, 94 insertions(+), 107 deletions(-) create mode 100644 test_common/harness/mathHelpers.h diff --git a/test_common/harness/mathHelpers.h b/test_common/harness/mathHelpers.h new file mode 100644 index 00000000..cdbf2bfe --- /dev/null +++ b/test_common/harness/mathHelpers.h @@ -0,0 +1,35 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef _mathHelpers_h +#define _mathHelpers_h + +#if defined(__APPLE__) +#include +#else +#include +#endif +#include + +template inline bool isnan_fp(const T &v) { return std::isnan(v); } + +template <> inline bool isnan_fp(const cl_half &v) +{ + uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F; + uint16_t h_mant = ((cl_half)v) & 0x3FF; + return (h_exp == 0x1F && h_mant != 0); +} + +#endif // _mathHelpers_h diff --git a/test_conformance/basic/test_explicit_s2v.cpp b/test_conformance/basic/test_explicit_s2v.cpp index a5ae452f..418e184a 100644 --- a/test_conformance/basic/test_explicit_s2v.cpp +++ b/test_conformance/basic/test_explicit_s2v.cpp @@ -14,7 +14,6 @@ // limitations under the License. // #include -using std::isnan; #include "harness/compat.h" #include @@ -26,6 +25,7 @@ using std::isnan; #include #include "harness/conversions.h" +#include "harness/mathHelpers.h" #include "harness/typeWrappers.h" extern cl_half_rounding_mode halfRoundingMode; @@ -102,16 +102,6 @@ const char * kernel_explicit_s2v_set[NUM_VEC_TYPES][NUM_VEC_TYPES][5] = { // clang-format on -bool IsHalfNaN(cl_half v) -{ - // Extract FP16 exponent and mantissa - uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F; - uint16_t h_mant = ((cl_half)v) & 0x3FF; - - // NaN test - return (h_exp == 0x1F && h_mant != 0); -} - static int test_explicit_s2v_function(cl_context context, cl_command_queue queue, cl_kernel kernel, ExplicitType srcType, unsigned int count, @@ -183,20 +173,21 @@ static int test_explicit_s2v_function(cl_context context, { bool isSrcNaN = (((srcType == kHalf) - && IsHalfNaN(*reinterpret_cast(inPtr))) + && isnan_fp(*reinterpret_cast(inPtr))) || ((srcType == kFloat) - && isnan(*reinterpret_cast(inPtr))) + && isnan_fp(*reinterpret_cast(inPtr))) || ((srcType == kDouble) - && isnan(*reinterpret_cast(inPtr)))); - bool isDestNaN = (((destType == kHalf) - && IsHalfNaN(*reinterpret_cast( - outPtr + destTypeSize * s))) - || ((destType == kFloat) - && isnan(*reinterpret_cast( - outPtr + destTypeSize * s))) - || ((destType == kDouble) - && isnan(*reinterpret_cast( - outPtr + destTypeSize * s)))); + && isnan_fp(*reinterpret_cast(inPtr)))); + bool isDestNaN = + (((destType == kHalf) + && isnan_fp(*reinterpret_cast( + outPtr + destTypeSize * s))) + || ((destType == kFloat) + && isnan_fp(*reinterpret_cast( + outPtr + destTypeSize * s))) + || ((destType == kDouble) + && isnan_fp(*reinterpret_cast( + outPtr + destTypeSize * s)))); if (isSrcNaN && isDestNaN) { diff --git a/test_conformance/basic/test_fpmath.cpp b/test_conformance/basic/test_fpmath.cpp index c39a2fec..f8f39ae0 100644 --- a/test_conformance/basic/test_fpmath.cpp +++ b/test_conformance/basic/test_fpmath.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "harness/compat.h" +#include "harness/mathHelpers.h" #include "harness/rounding_mode.h" #include "harness/stringHelpers.h" @@ -57,16 +58,6 @@ template double toDouble(T val) return val; } -bool isHalfNan(cl_half v) -{ - // Extract FP16 exponent and mantissa - uint16_t h_exp = (v >> (CL_HALF_MANT_DIG - 1)) & 0x1F; - uint16_t h_mant = v & 0x3FF; - - // NaN test - return (h_exp == 0x1F && h_mant != 0); -} - cl_half half_plus(cl_half a, cl_half b) { return HFF(std::plus()(HTF(a), HTF(b))); @@ -101,14 +92,7 @@ int verify_fp(std::vector (&input)[2], std::vector &output, T r = test.ref(inA[i], inB[i]); bool both_nan = false; - if (std::is_same::value) - { - both_nan = isHalfNan(r) && isHalfNan(output[i]); - } - else if (std::is_floating_point::value) - { - both_nan = std::isnan(r) && std::isnan(output[i]); - } + both_nan = isnan_fp(r) && isnan_fp(output[i]); // If not both nan, check if the result is the same if (!both_nan && (r != output[i])) diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp index 59d41e55..d4f6d366 100644 --- a/test_conformance/conversions/basic_test_conversions.cpp +++ b/test_conformance/conversions/basic_test_conversions.cpp @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. // +#include "harness/mathHelpers.h" #include "harness/testHarness.h" #include "harness/compat.h" #include "harness/ThreadPool.h" @@ -955,24 +956,6 @@ void MapResultValuesComplete(const std::unique_ptr &info) // destroyed automatically soon after we exit. } -template static bool isnan_fp(const T &v) -{ - if (std::is_same::value) - { - uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F; - uint16_t h_mant = ((cl_half)v) & 0x3FF; - return (h_exp == 0x1F && h_mant != 0); - } - else - { -#if !defined(_WIN32) - return std::isnan(v); -#else - return _isnan(v); -#endif - } -} - template void ZeroNanToIntCases(cl_uint count, void *mapped, Type outType, void *input) { diff --git a/test_conformance/math_brute_force/binary_two_results_i_half.cpp b/test_conformance/math_brute_force/binary_two_results_i_half.cpp index a2379431..0b48e33e 100644 --- a/test_conformance/math_brute_force/binary_two_results_i_half.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i_half.cpp @@ -260,7 +260,7 @@ int TestFunc_HalfI_Half_Half(const Func *f, MTdata d, bool relaxedMode) if (t[j] == q[j] && t2[j] == q2[j]) continue; // Check for paired NaNs - if (IsHalfNaN(t[j]) && IsHalfNaN(q[j]) && t2[j] == q2[j]) + if (isnan_fp(t[j]) && isnan_fp(q[j]) && t2[j] == q2[j]) continue; cl_half test = ((cl_half *)q)[j]; @@ -282,7 +282,7 @@ int TestFunc_HalfI_Half_Half(const Func *f, MTdata d, bool relaxedMode) // then the standard either neglects to say what is returned // in iptr or leaves it undefined or implementation defined. int iptrUndefined = IsHalfInfinity(p[j]) || (HTF(p2[j]) == 0.0f) - || IsHalfNaN(p2[j]) || IsHalfNaN(p[j]); + || isnan_fp(p2[j]) || isnan_fp(p[j]); if (iptrUndefined) iErr = 0; int fail = !(fabsf(err) <= half_ulps && iErr == 0); diff --git a/test_conformance/math_brute_force/ternary_half.cpp b/test_conformance/math_brute_force/ternary_half.cpp index 843ceaa0..08c39900 100644 --- a/test_conformance/math_brute_force/ternary_half.cpp +++ b/test_conformance/math_brute_force/ternary_half.cpp @@ -274,10 +274,10 @@ int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode) if (skipNanInf) { if (overflow[j] || IsHalfInfinity(correct) - || IsHalfNaN(correct) || IsHalfInfinity(hp0[j]) - || IsHalfNaN(hp0[j]) || IsHalfInfinity(hp1[j]) - || IsHalfNaN(hp1[j]) || IsHalfInfinity(hp2[j]) - || IsHalfNaN(hp2[j])) + || isnan_fp(correct) || IsHalfInfinity(hp0[j]) + || isnan_fp(hp0[j]) || IsHalfInfinity(hp1[j]) + || isnan_fp(hp1[j]) || IsHalfInfinity(hp2[j]) + || isnan_fp(hp2[j])) continue; } @@ -318,9 +318,9 @@ int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode) // Note: no double rounding here. Reference // functions calculate in single precision. if (IsHalfInfinity(correct2) - || IsHalfNaN(correct2) + || isnan_fp(correct2) || IsHalfInfinity(correct3) - || IsHalfNaN(correct3)) + || isnan_fp(correct3)) continue; } @@ -381,13 +381,13 @@ int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode) // Note: no double rounding here. Reference // functions calculate in single precision. if (IsHalfInfinity(correct2) - || IsHalfNaN(correct2) + || isnan_fp(correct2) || IsHalfInfinity(correct3) - || IsHalfNaN(correct3) + || isnan_fp(correct3) || IsHalfInfinity(correct4) - || IsHalfNaN(correct4) + || isnan_fp(correct4) || IsHalfInfinity(correct5) - || IsHalfNaN(correct5)) + || isnan_fp(correct5)) continue; } @@ -474,13 +474,13 @@ int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode) // Note: no double rounding here. Reference // functions calculate in single precision. if (IsHalfInfinity(correct2) - || IsHalfNaN(correct2) + || isnan_fp(correct2) || IsHalfInfinity(correct3) - || IsHalfNaN(correct3) + || isnan_fp(correct3) || IsHalfInfinity(correct4) - || IsHalfNaN(correct4) + || isnan_fp(correct4) || IsHalfInfinity(correct5) - || IsHalfNaN(correct5)) + || isnan_fp(correct5)) continue; } @@ -551,9 +551,9 @@ int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode) // Note: no double rounding here. Reference // functions calculate in single precision. if (IsHalfInfinity(correct2) - || IsHalfNaN(correct2) + || isnan_fp(correct2) || IsHalfInfinity(correct3) - || IsHalfNaN(correct3)) + || isnan_fp(correct3)) continue; } @@ -613,13 +613,13 @@ int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode) // Note: no double rounding here. Reference // functions calculate in single precision. if (IsHalfInfinity(correct2) - || IsHalfNaN(correct2) + || isnan_fp(correct2) || IsHalfInfinity(correct3) - || IsHalfNaN(correct3) + || isnan_fp(correct3) || IsHalfInfinity(correct4) - || IsHalfNaN(correct4) + || isnan_fp(correct4) || IsHalfInfinity(correct5) - || IsHalfNaN(correct5)) + || isnan_fp(correct5)) continue; } @@ -689,9 +689,9 @@ int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode) // Note: no double rounding here. Reference // functions calculate in single precision. if (IsHalfInfinity(correct2) - || IsHalfNaN(correct2) + || isnan_fp(correct2) || IsHalfInfinity(correct3) - || IsHalfNaN(correct3)) + || isnan_fp(correct3)) continue; } diff --git a/test_conformance/math_brute_force/unary_two_results_half.cpp b/test_conformance/math_brute_force/unary_two_results_half.cpp index 683e1492..63398028 100644 --- a/test_conformance/math_brute_force/unary_two_results_half.cpp +++ b/test_conformance/math_brute_force/unary_two_results_half.cpp @@ -249,9 +249,9 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode) if (skipNanInf && overflow[j]) continue; // Note: no double rounding here. Reference functions // calculate in single precision. - if (IsHalfInfinity(correct1) || IsHalfNaN(correct1) - || IsHalfInfinity(correct2) || IsHalfNaN(correct2) - || IsHalfInfinity(pIn[j]) || IsHalfNaN(pIn[j])) + if (IsHalfInfinity(correct1) || isnan_fp(correct1) + || IsHalfInfinity(correct2) || isnan_fp(correct2) + || IsHalfInfinity(pIn[j]) || isnan_fp(pIn[j])) continue; } @@ -320,13 +320,13 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode) // Note: no double rounding here. Reference // functions calculate in single precision. if (IsHalfInfinity(correctp) - || IsHalfNaN(correctp) + || isnan_fp(correctp) || IsHalfInfinity(correctn) - || IsHalfNaN(correctn) + || isnan_fp(correctn) || IsHalfInfinity(correct2p) - || IsHalfNaN(correct2p) + || isnan_fp(correct2p) || IsHalfInfinity(correct2n) - || IsHalfNaN(correct2n)) + || isnan_fp(correct2n)) continue; } diff --git a/test_conformance/math_brute_force/utility.h b/test_conformance/math_brute_force/utility.h index a43f3a64..f5a30f86 100644 --- a/test_conformance/math_brute_force/utility.h +++ b/test_conformance/math_brute_force/utility.h @@ -19,6 +19,7 @@ #include "harness/compat.h" #include "harness/rounding_mode.h" #include "harness/fpcontrol.h" +#include "harness/mathHelpers.h" #include "harness/testHarness.h" #include "harness/ThreadPool.h" #include "harness/conversions.h" @@ -172,16 +173,6 @@ inline int IsFloatNaN(double x) return ((u.u & 0x7fffffffU) > 0x7F800000U); } -inline bool IsHalfNaN(const cl_half v) -{ - // Extract FP16 exponent and mantissa - uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F; - uint16_t h_mant = ((cl_half)v) & 0x3FF; - - // NaN test - return (h_exp == 0x1F && h_mant != 0); -} - inline bool IsHalfInfinity(const cl_half v) { // Extract FP16 exponent and mantissa diff --git a/test_conformance/relationals/test_comparisons_fp.cpp b/test_conformance/relationals/test_comparisons_fp.cpp index 66ab0729..79de562a 100644 --- a/test_conformance/relationals/test_comparisons_fp.cpp +++ b/test_conformance/relationals/test_comparisons_fp.cpp @@ -22,6 +22,7 @@ #include #include +#include "harness/mathHelpers.h" #include "harness/stringHelpers.h" #include @@ -368,9 +369,8 @@ int RelationalsFPTest::test_equiv_kernel(unsigned int vecSize, { if (gInfNanSupport == 0) { - float a = inDataA[i * vecSize + j]; - float b = inDataB[i * vecSize + j]; - if (isnan(a) || isnan(b)) + if (isnan_fp(inDataA[i * vecSize + j]) + || isnan_fp(inDataB[i * vecSize + j])) fail = 0; else fail = 1; diff --git a/test_conformance/select/util_select.cpp b/test_conformance/select/util_select.cpp index a685b7f6..71653a87 100644 --- a/test_conformance/select/util_select.cpp +++ b/test_conformance/select/util_select.cpp @@ -14,6 +14,8 @@ // limitations under the License. // #include "harness/errorHelpers.h" +#include "harness/mathHelpers.h" +#include "harness/testHarness.h" #include #include @@ -834,9 +836,9 @@ size_t check_half(const void *const test, const void *const correct, // Allow nans to be binary different for (i = 0; i < count; i++) { - float fcorrect = cl_half_to_float(c[i]); - float ftest = cl_half_to_float(t[i]); - if ((t[i] != c[i]) && !(isnan(fcorrect) && isnan(ftest))) + if ((t[i] != c[i]) + && !(isnan_fp(cl_half_to_float(c[i])) + && isnan_fp(cl_half_to_float(t[i])))) { log_error("\n(check_half) Error for vector size %zu found at " "0x%8.8zx (of 0x%8.8zx): " diff --git a/test_conformance/spirv_new/testBase.h b/test_conformance/spirv_new/testBase.h index 54fe15bd..5ea415a9 100644 --- a/test_conformance/spirv_new/testBase.h +++ b/test_conformance/spirv_new/testBase.h @@ -20,6 +20,7 @@ #define _testBase_h #include "harness/compat.h" +#include "harness/mathHelpers.h" #include "harness/rounding_mode.h" #include diff --git a/test_conformance/spirv_new/test_decorate.cpp b/test_conformance/spirv_new/test_decorate.cpp index fc9fc522..f9380611 100644 --- a/test_conformance/spirv_new/test_decorate.cpp +++ b/test_conformance/spirv_new/test_decorate.cpp @@ -231,7 +231,7 @@ static inline f = cl_half_to_float(cl_half_from_float(f, half_rounding)); To val = static_cast(std::min(std::max(f, loVal), hiVal)); - if (isnan(cl_half_to_float(rhs))) + if (isnan_fp(rhs)) { val = 0; } From c7abbe57a4311150724d88b90d2ffc176633d60b Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 5 Aug 2025 18:09:16 +0200 Subject: [PATCH 20/61] Added test to verify negative result of clSetKernelArg with CL_INVALID_ARG_VALUE (#2445) Related to #2282, according to work plan with mismatched read/write only qualifier for image_t objects --- test_conformance/api/test_kernels.cpp | 68 +++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/test_conformance/api/test_kernels.cpp b/test_conformance/api/test_kernels.cpp index 30452caa..a8d02558 100644 --- a/test_conformance/api/test_kernels.cpp +++ b/test_conformance/api/test_kernels.cpp @@ -87,6 +87,19 @@ const char *sample_two_kernel_program[] = { "\n" "}\n" }; +const char *sample_read_only_image_test_kernel = R"( + __kernel void read_only_image_test(__write_only image2d_t img, __global uint4 *src) + { + write_imageui(img, (int2)(get_global_id(0), get_global_id(1)), src[0]); + } +)"; + +const char *sample_write_only_image_test_kernel = R"( + __kernel void write_only_image_test(__read_only image2d_t src, __global uint4 *dst) + { + dst[0]=read_imageui(src, (int2)(get_global_id(0), get_global_id(1))); + } +)"; REGISTER_TEST(get_kernel_info) { @@ -704,3 +717,58 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg) return TEST_PASS; } + +REGISTER_TEST(negative_set_read_write_image_arg) +{ + cl_int error = CL_SUCCESS; + clProgramWrapper program; + clKernelWrapper write_image_kernel, read_image_kernel; + clMemWrapper write_only_image, read_only_image; + const char *test_kernels[2] = { sample_read_only_image_test_kernel, + sample_write_only_image_test_kernel }; + constexpr cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT8 }; + const int size_dim = 128; + + // Setup the test + error = create_single_kernel_helper(context, &program, nullptr, 2, + test_kernels, nullptr); + test_error(error, "Unable to build test program"); + + read_image_kernel = clCreateKernel(program, "read_only_image_test", &error); + test_error(error, + "Unable to get read_only_image_test kernel for built program"); + + write_image_kernel = + clCreateKernel(program, "write_only_image_test", &error); + test_error(error, + "Unable to get write_only_image_test kernel for built program"); + + read_only_image = create_image_2d(context, CL_MEM_READ_ONLY, &format, + size_dim, size_dim, 0, nullptr, &error); + test_error(error, "create_image_2d failed"); + + write_only_image = create_image_2d(context, CL_MEM_WRITE_ONLY, &format, + size_dim, size_dim, 0, nullptr, &error); + test_error(error, "create_image_2d failed"); + + // Run the test + error = clSetKernelArg(read_image_kernel, 0, sizeof(read_only_image), + &read_only_image); + test_failure_error_ret(error, CL_INVALID_ARG_VALUE, + "clSetKernelArg is supposed to fail " + "with CL_INVALID_ARG_VALUE when an image is " + "created with CL_MEM_READ_ONLY is " + "passed to a write_only kernel argument", + TEST_FAIL); + + error = clSetKernelArg(write_image_kernel, 0, sizeof(write_only_image), + &write_only_image); + test_failure_error_ret(error, CL_INVALID_ARG_VALUE, + "clSetKernelArg is supposed to fail " + "with CL_INVALID_ARG_VALUE when an image is " + "created with CL_MEM_WRITE_ONLY is " + "passed to a read_only kernel argument", + TEST_FAIL); + + return TEST_PASS; +} From 678ad78233081bb6425e74fe744f825c61bb879c Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 5 Aug 2025 18:10:00 +0200 Subject: [PATCH 21/61] Remove semaphores_simple_2 and external_semaphores_simple_2 tests (#2447) While the cl_khr_semaphore extension spec does state that are no implicit dependencies between already enqueued commands and the clEnqueueSignalSemaphoresKHR, it's nothing special as this is already true for any other event that's not a barrier or marker. Also, the CTS can't reliably assume implementations to reorder events even in an out of order queue as this is highly implementation defined behavior and implementations may or may not choose to reorder events in a specific order. I don't see a reason why this should be tested for semaphores, but not for any other commands, especially as it imposes a restriction on how to implement out of order queues that wasn't enforced before. Closes: https://github.com/KhronosGroup/OpenCL-CTS/issues/2439 --- .../test_external_semaphore.cpp | 103 ------------------ .../cl_khr_semaphore/test_semaphores.cpp | 89 --------------- 2 files changed, 192 deletions(-) diff --git a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp index 7611fbf3..198bf046 100644 --- a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp +++ b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp @@ -388,109 +388,6 @@ REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2)) return TEST_PASS; } -// Confirm that signal a semaphore with no event dependencies will not result -// in an implicit dependency on everything previously submitted -REGISTER_TEST_VERSION(external_semaphores_simple_2, Version(1, 2)) -{ - REQUIRE_EXTENSION("cl_khr_external_semaphore"); - - if (init_vulkan_device(1, &device)) - { - log_info("Cannot initialise Vulkan. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - VulkanDevice vkDevice; - - // Obtain pointers to semaphore's API - GET_PFN(device, clEnqueueSignalSemaphoresKHR); - GET_PFN(device, clEnqueueWaitSemaphoresKHR); - - std::vector - vkExternalSemaphoreHandleTypeList = - getSupportedInteropExternalSemaphoreHandleTypes(device, vkDevice); - - if (vkExternalSemaphoreHandleTypeList.empty()) - { - test_fail("No external semaphore handle types found\n"); - } - - for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType : - vkExternalSemaphoreHandleTypeList) - { - log_info_semaphore_type(vkExternalSemaphoreHandleType); - VulkanSemaphore vkVk2CLSemaphore(vkDevice, - vkExternalSemaphoreHandleType); - - auto sema_ext = clExternalImportableSemaphore( - vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, device); - - cl_int err = CL_SUCCESS; - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create user event - clEventWrapper user_event = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Create Kernel - clProgramWrapper program; - clKernelWrapper kernel; - err = create_single_kernel_helper(context, &program, &kernel, 1, - &source, "empty"); - test_error(err, "Could not create kernel"); - - // Enqueue task_1 (dependency on user_event) - clEventWrapper task_1_event; - err = clEnqueueTask(queue, kernel, 1, &user_event, &task_1_event); - test_error(err, "Could not enqueue task 1"); - - // Signal semaphore - clEventWrapper signal_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 0, nullptr, &signal_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore - clEventWrapper wait_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 0, nullptr, &wait_event); - test_error(err, "Could not wait semaphore"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - - cl_event event_list[] = { signal_event, wait_event }; - err = clWaitForEvents(2, event_list); - test_error(err, "Could not wait on events"); - - // Ensure all events are completed except for task_1 - test_assert_event_inprogress(task_1_event); - test_assert_event_complete(signal_event); - test_assert_event_complete(wait_event); - - // Complete user_event - err = clSetUserEventStatus(user_event, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(task_1_event); - test_assert_event_complete(signal_event); - test_assert_event_complete(wait_event); - } - - return TEST_PASS; -} - // Confirm that a semaphore can be reused multiple times REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2)) { diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp index e3351cd8..ce146b41 100644 --- a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp +++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp @@ -76,87 +76,6 @@ struct SimpleSemaphore1 : public SemaphoreTestBase } }; -struct SimpleSemaphore2 : public SemaphoreTestBase -{ - SimpleSemaphore2(cl_device_id device, cl_context context, - cl_command_queue queue, cl_int nelems) - : SemaphoreTestBase(device, context, queue, nelems) - {} - - cl_int Run() override - { - cl_int err = CL_SUCCESS; - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create semaphore - cl_semaphore_properties_khr sema_props[] = { - static_cast(CL_SEMAPHORE_TYPE_KHR), - static_cast( - CL_SEMAPHORE_TYPE_BINARY_KHR), - 0 - }; - semaphore = - clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err); - test_error(err, "Could not create semaphore"); - - // Create user event - clEventWrapper user_event = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Create Kernel - clProgramWrapper program; - clKernelWrapper kernel; - err = create_single_kernel_helper(context, &program, &kernel, 1, - &source, "empty"); - test_error(err, "Could not create kernel"); - - // Enqueue task_1 (dependency on user_event) - clEventWrapper task_1_event; - err = clEnqueueTask(queue, kernel, 1, &user_event, &task_1_event); - test_error(err, "Could not enqueue task 1"); - - // Signal semaphore - clEventWrapper signal_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, semaphore, nullptr, 0, - nullptr, &signal_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore - clEventWrapper wait_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, semaphore, nullptr, 0, - nullptr, &wait_event); - test_error(err, "Could not wait semaphore"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure all events are completed except for task_1 - test_assert_event_inprogress(task_1_event); - test_assert_event_complete(signal_event); - test_assert_event_complete(wait_event); - - // Complete user_event - err = clSetUserEventStatus(user_event, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(task_1_event); - test_assert_event_complete(signal_event); - test_assert_event_complete(wait_event); - - return CL_SUCCESS; - } -}; - struct SemaphoreReuse : public SemaphoreTestBase { SemaphoreReuse(cl_device_id device, cl_context context, @@ -387,14 +306,6 @@ REGISTER_TEST_VERSION(semaphores_simple_1, Version(1, 2)) num_elements); } -// Confirm that signal a semaphore with no event dependencies will not result -// in an implicit dependency on everything previously submitted -REGISTER_TEST_VERSION(semaphores_simple_2, Version(1, 2)) -{ - return MakeAndRunTest(device, context, queue, - num_elements); -} - // Confirm that a semaphore can be reused multiple times REGISTER_TEST_VERSION(semaphores_reuse, Version(1, 2)) { From da1008c5dad5b9cdba4911b74f46265b4a051664 Mon Sep 17 00:00:00 2001 From: Yilong Guo Date: Wed, 6 Aug 2025 00:11:07 +0800 Subject: [PATCH 22/61] Fix interger overflow for work-group size calculation (#2455) --- test_conformance/basic/test_work_item_functions.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test_conformance/basic/test_work_item_functions.cpp b/test_conformance/basic/test_work_item_functions.cpp index 046640b3..6098374c 100644 --- a/test_conformance/basic/test_work_item_functions.cpp +++ b/test_conformance/basic/test_work_item_functions.cpp @@ -476,7 +476,7 @@ struct TestWorkItemFnsOutOfRange maxWorkItemSizes[2] }; // check if maximum work group size for current dimention is not // exceeded - cl_uint work_group_size = max_workgroup_size + 1; + size_t work_group_size = max_workgroup_size + 1; while (max_workgroup_size < work_group_size && work_group_size != 1) { work_group_size = 1; @@ -492,9 +492,9 @@ struct TestWorkItemFnsOutOfRange // compute max number of work groups based on buffer size and max // group size - cl_uint max_work_groups = testData.size() / work_group_size; + size_t max_work_groups = testData.size() / work_group_size; // take into account number of dimentions - cl_uint work_groups_per_dim = + size_t work_groups_per_dim = std::max(1, (int)pow(max_work_groups, 1.f / dim)); for (size_t j = 0; j < dim; j++) From dacb944cf30d8033f639632eefa4413d48591ce9 Mon Sep 17 00:00:00 2001 From: Yilong Guo Date: Wed, 6 Aug 2025 00:12:25 +0800 Subject: [PATCH 23/61] Retain output memory objects for simultaneous command buffer tests (#2429) Memory objects created in `EnqueueSimultaneousPass()` are used by kernels that don't execute until the user event is signaled. Without retaining these objects, they would be destroyed before the deferred kernel execution occurs. --- .../mutable_command_simultaneous.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_simultaneous.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_simultaneous.cpp index b4945e77..c30b6c61 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_simultaneous.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_simultaneous.cpp @@ -337,6 +337,8 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest * buffer_size_multiplier, nullptr, &error); test_error(error, "clCreateBuffer failed"); + // Retain new output memory object until the end of the test. + retained_output_buffers.push_back(new_out_mem); cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem), &new_out_mem }; @@ -429,6 +431,8 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest clKernelWrapper kernel_fill; clProgramWrapper program_fill; + std::vector retained_output_buffers; + const size_t test_global_work_size = 3 * sizeof(cl_int); const cl_int pattern_pri = 42; From e7e753f1a9c43538dbf6eaac522a76a9165ba693 Mon Sep 17 00:00:00 2001 From: Yilong Guo Date: Wed, 6 Aug 2025 00:13:05 +0800 Subject: [PATCH 24/61] Fix data race in mutable command buffer simultaneous execution test (#2434) Prior to this change, both `clEnqueueReadBuffer` calls before and after updating the command buffer were writing to the same `output_buffer`, causing a data race condition and the first call's result to be overwritten. This commit introduces separate destination vectors (`output_buffer` and `updated_output_buffer`) for these operations and verifies both results independently to ensure test integrity. --- .../mutable_command_simultaneous.cpp | 34 ++++++++++++++----- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_simultaneous.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_simultaneous.cpp index c30b6c61..4b1610f5 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_simultaneous.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_simultaneous.cpp @@ -297,6 +297,7 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest { cl_int offset; std::vector output_buffer; + std::vector updated_output_buffer; // 0:user event, 1:offset-buffer fill event, 2:kernel done event clEventWrapper wait_events[3]; }; @@ -375,7 +376,7 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest error = clEnqueueReadBuffer(work_queue, new_out_mem, CL_FALSE, pd.offset * sizeof(cl_int), data_size(), - pd.output_buffer.data(), 1, + pd.updated_output_buffer.data(), 1, &pd.wait_events[2], nullptr); test_error(error, "clEnqueueReadBuffer failed"); @@ -390,8 +391,10 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest cl_int offset = static_cast(num_elements); std::vector simul_passes = { - { 0, std::vector(num_elements) }, - { offset, std::vector(num_elements) } + { 0, std::vector(num_elements), + std::vector(num_elements) }, + { offset, std::vector(num_elements), + std::vector(num_elements) } }; for (auto&& pass : simul_passes) @@ -409,13 +412,26 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest test_error(error, "clFinish failed"); // verify the result buffers - for (auto&& pass : simul_passes) + auto& first_pass_output = simul_passes[0].output_buffer; + auto& first_pass_updated_output = simul_passes[0].updated_output_buffer; + auto& second_pass_output = simul_passes[1].output_buffer; + auto& second_pass_updated_output = + simul_passes[1].updated_output_buffer; + for (size_t i = 0; i < num_elements; i++) { - auto& res_data = pass.output_buffer; - for (size_t i = 0; i < num_elements; i++) - { - CHECK_VERIFICATION_ERROR(pattern_pri, res_data[i], i); - } + // First pass: + // Before updating, out_mem is copied from in_mem (pattern_pri) + CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_output[i], i); + // After updating, new_out_mem is copied from in_mem (pattern_pri) + CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_updated_output[i], + i); + // Second pass: + // Before updating, out_mem is filled with overwritten_pattern + CHECK_VERIFICATION_ERROR(overwritten_pattern, second_pass_output[i], + i); + // After updating, new_out_mem is copied from in_mem (pattern_pri) + CHECK_VERIFICATION_ERROR(pattern_pri, second_pass_updated_output[i], + i); } return CL_SUCCESS; From 466049474c0f457106100d5e8be04304271b54e7 Mon Sep 17 00:00:00 2001 From: Yilong Guo Date: Wed, 6 Aug 2025 00:14:35 +0800 Subject: [PATCH 25/61] Fix buffer size for mutable dispatch command buffer test (#2437) --- .../mutable_command_global_size.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp index 8fb6b643..3cfc4db1 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp @@ -135,7 +135,7 @@ struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest for (size_t i = 0; i < num_elements; i++) if (i >= update_global_size && global_work_size != resultData[i]) { - log_error("Data failed to verify: update_global_size != " + log_error("Data failed to verify: global_work_size != " "resultData[%zu]=%d\n", i, resultData[i]); return TEST_FAIL; @@ -154,7 +154,7 @@ struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest size_t info_global_size = 0; const size_t update_global_size = 3; - const size_t sizeToAllocate = global_work_size; + const size_t sizeToAllocate = global_work_size * sizeof(cl_int); const size_t num_elements = sizeToAllocate / sizeof(cl_int); cl_mutable_command_khr command = nullptr; }; From 9809cc931ab8b94eebc5d717bb507982b9da6361 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 5 Aug 2025 17:15:04 +0100 Subject: [PATCH 26/61] Test for mutable-dispatch original work-dim (#2438) New test following on from OpenCL-Docs Issues discussion https://github.com/KhronosGroup/OpenCL-Docs/issues/1390#issuecomment-3023818903 Noting that we have no test coverage for using the original value of `work_dim` during command-buffer update. All of our current CTS testing uses `0` for the `work_dim` to signify no update from the original value, however this test explicitly uses the original value. --- .../CMakeLists.txt | 1 + .../mutable_command_work_dim.cpp | 220 ++++++++++++++++++ 2 files changed, 221 insertions(+) create mode 100644 test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_work_dim.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt index 8fa84162..c848f733 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt @@ -14,6 +14,7 @@ set(${MODULE_NAME}_SOURCES mutable_command_multiple_dispatches.cpp mutable_command_iterative_arg_update.cpp mutable_command_work_groups.cpp + mutable_command_work_dim.cpp ../basic_command_buffer.cpp ) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_work_dim.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_work_dim.cpp new file mode 100644 index 00000000..43df9fbe --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_work_dim.cpp @@ -0,0 +1,220 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include "mutable_command_basic.h" + +#include +#include + +#include + +// mutable dispatch tests setting `work_dim` to the original 3D value +// behaves as expected. + +struct MutableDispatchWorkDim : public InfoMutableCommandBufferTest +{ + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; + + MutableDispatchWorkDim(cl_device_id device, cl_context context, + cl_command_queue queue) + : InfoMutableCommandBufferTest(device, context, queue) + {} + + cl_int SetUp(int elements) override + { + result_data.resize(update_elements); + return InfoMutableCommandBufferTest::SetUp(elements); + } + + bool Skip() override + { + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && (mutable_capabilities & CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR); + + return !mutable_support || InfoMutableCommandBufferTest::Skip(); + } + + bool Verify(cl_mem buffer, cl_uint gid_elements) + { + std::memset(result_data.data(), 0, alloc_size); + cl_int error = + clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, alloc_size, + result_data.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < gid_elements; i++) + { + if (result_data[i] != gid_elements) + { + log_error("Data failed to verify at index %zu. " + "Expected %u, result was %u\n", + i, gid_elements, result_data[i]); + return false; + } + } + return true; + } + + cl_int Run() override + { + const char *global_size_kernel = + R"( + __kernel void three_dim(__global uint *dst0, + __global uint *dst1, + __global uint *dst2) + { + size_t gid = get_global_linear_id(); + dst0[gid] = get_global_size(0); + dst1[gid] = get_global_size(1); + dst2[gid] = get_global_size(2); + })"; + + cl_int error = create_single_kernel_helper( + context, &program, &kernel, 1, &global_size_kernel, "three_dim"); + test_error(error, "Creating kernel failed"); + + // Create a buffer for each of the three dimensions to write the + // global size into. + clMemWrapper stream1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + alloc_size, nullptr, &error); + test_error(error, "Creating test array failed"); + + clMemWrapper stream2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + alloc_size, nullptr, &error); + test_error(error, "Creating test array failed"); + + clMemWrapper stream3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + alloc_size, nullptr, &error); + test_error(error, "Creating test array failed"); + + // Set the arguments + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream1); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clSetKernelArg(kernel, 1, sizeof(cl_mem), &stream2); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &stream3); + test_error(error, "Unable to set indexed kernel arguments"); + + // Command-buffer contains a single kernel + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, work_dim, nullptr, + global_size_3D.data(), nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + // Enqueue command-buffer and wait on completion + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + // Verify results before any update + if (!Verify(stream1, global_size_3D[0])) + { + return TEST_FAIL; + } + if (!Verify(stream2, global_size_3D[1])) + { + return TEST_FAIL; + } + if (!Verify(stream3, global_size_3D[2])) + { + return TEST_FAIL; + } + + // Update command with a mutable config where we use a different 3D + // global size, but hardcode `work_dim` to 3 (the original value). + cl_mutable_dispatch_config_khr dispatch_config{ + command, + 0 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + work_dim /* work_dim */, + nullptr /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + update_global_size_3D.data() /* global_work_size */, + nullptr /* local_work_size */ + }; + + cl_uint num_configs = 1; + cl_command_buffer_update_type_khr config_types[1] = { + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR + }; + const void *configs[1] = { &dispatch_config }; + error = clUpdateMutableCommandsKHR(command_buffer, num_configs, + config_types, configs); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + // Enqueue updated command-buffer + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Verify update is reflected in buffer output. + if (!Verify(stream1, update_global_size_3D[0])) + { + return TEST_FAIL; + } + if (!Verify(stream2, update_global_size_3D[1])) + { + return TEST_FAIL; + } + if (!Verify(stream3, update_global_size_3D[2])) + { + return TEST_FAIL; + } + + return CL_SUCCESS; + } + + static const cl_uint work_dim = 3; + // 3D global size of kernel command when created + static const size_t original_elements = 2; + static constexpr std::array global_size_3D = { + original_elements, original_elements, original_elements + }; + // 3D global size to update kernel command to. + static const size_t update_elements = 4; + static constexpr std::array update_global_size_3D = { + update_elements, update_elements, update_elements + }; + // Size in bytes of each of the 3 cl_mem buffers + static const size_t alloc_size = update_elements * sizeof(cl_uint); + + cl_mutable_command_khr command = nullptr; + std::vector result_data; +}; + +// get_global_linear() used in kernel is an OpenCL 2.0 API +REGISTER_TEST_VERSION(mutable_dispatch_work_dim, Version(2, 0)) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} From 4c54ecbd1fa8c5065655482e1cd4d0dc1b1605fb Mon Sep 17 00:00:00 2001 From: Ahmed <36049290+AhmedAmraniAkdi@users.noreply.github.com> Date: Tue, 5 Aug 2025 17:19:37 +0100 Subject: [PATCH 27/61] Fix printf vector for RTZ devices (#2459) The failure is : 7)testing printf("%.1v4hla",(0.25f,0.5f,1.f,1.5f)) verifyOutputBuffer failed with kernel: __kernel void test14(void) { float4 tmp = (float4)(0.25f,0.5f,1.f,1.5f); printf("%.1v4hla\n",tmp);} expected: 0x1p-2,0x1p-1,0x1p+0,0x1.8p+0 got: 0x1.0p-2,0x1.0p-1,0x1.0p+0,0x1.8p+0 %.1 requires 1 decimal. --- test_conformance/printf/util_printf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp index f982f15e..83a21fbb 100644 --- a/test_conformance/printf/util_printf.cpp +++ b/test_conformance/printf/util_printf.cpp @@ -1352,7 +1352,7 @@ std::vector correctBufferVectorRTZ = { "1.23e+03,9.87e+05,4.99e-04", - "0x1p-2,0x1p-1,0x1p+0,0x1.8p+0", + "0x1.0p-2,0x1.0p-1,0x1.0p+0,0x1.8p+0", "1,2,3,4,1.5,3.13999,2.5,3.5", From 086a6c67fb901790b23e8d9c5c4f42ec93b2b81e Mon Sep 17 00:00:00 2001 From: dcrawleyqti Date: Tue, 5 Aug 2025 12:32:02 -0400 Subject: [PATCH 28/61] cl_khr_external_memory_ahb test (#2413) Android AHardwareBuffer test. Initial testing for buffers and image. --------- Co-authored-by: joselope --- test_conformance/extensions/CMakeLists.txt | 3 + .../cl_khr_external_memory_ahb/CMakeLists.txt | 12 + .../cl_khr_external_memory_ahb/debug_ahb.cpp | 190 ++ .../cl_khr_external_memory_ahb/debug_ahb.h | 42 + .../cl_khr_external_memory_ahb/main.cpp | 23 + .../cl_khr_external_memory_ahb/test_ahb.cpp | 1911 +++++++++++++++++ .../test_ahb_negative.cpp | 246 +++ 7 files changed, 2427 insertions(+) create mode 100644 test_conformance/extensions/cl_khr_external_memory_ahb/CMakeLists.txt create mode 100644 test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.cpp create mode 100644 test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.h create mode 100644 test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp create mode 100644 test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp create mode 100644 test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb_negative.cpp diff --git a/test_conformance/extensions/CMakeLists.txt b/test_conformance/extensions/CMakeLists.txt index aa57990b..2fee828a 100644 --- a/test_conformance/extensions/CMakeLists.txt +++ b/test_conformance/extensions/CMakeLists.txt @@ -5,6 +5,9 @@ add_subdirectory( cl_ext_cxx_for_opencl ) add_subdirectory( cl_khr_command_buffer ) add_subdirectory( cl_khr_dx9_media_sharing ) +if(ANDROID_PLATFORM GREATER 28) + add_subdirectory( cl_khr_external_memory_ahb ) +endif () add_subdirectory( cl_khr_external_memory_dma_buf ) add_subdirectory( cl_khr_semaphore ) add_subdirectory( cl_khr_kernel_clock ) diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/CMakeLists.txt b/test_conformance/extensions/cl_khr_external_memory_ahb/CMakeLists.txt new file mode 100644 index 00000000..54ace3cf --- /dev/null +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/CMakeLists.txt @@ -0,0 +1,12 @@ +set(MODULE_NAME CL_KHR_EXTERNAL_MEMORY_AHB) + +set(${MODULE_NAME}_SOURCES + main.cpp + test_ahb.cpp + test_ahb_negative.cpp + debug_ahb.cpp +) + +link_libraries(OpenCL nativewindow) + +include(../../CMakeCommon.txt) diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.cpp new file mode 100644 index 00000000..3964c0f6 --- /dev/null +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.cpp @@ -0,0 +1,190 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "debug_ahb.h" + +constexpr AHardwareBuffer_UsageFlags flag_list[] = { + AHARDWAREBUFFER_USAGE_CPU_READ_RARELY, + AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, + AHARDWAREBUFFER_USAGE_CPU_WRITE_NEVER, + AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY, + AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, + AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK, + AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE, + AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER, + AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY, + AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT, + AHARDWAREBUFFER_USAGE_VIDEO_ENCODE, + AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA, + AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER, + AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP, + AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE, + AHARDWAREBUFFER_USAGE_FRONT_BUFFER, +}; + +std::string +ahardwareBufferDecodeUsageFlagsToString(const AHardwareBuffer_UsageFlags flags) +{ + if (flags == 0) + { + return "UNKNOWN FLAG"; + } + + std::vector active_flags; + for (const auto flag : flag_list) + { + if (flag & flags) + { + active_flags.push_back(ahardwareBufferUsageFlagToString(flag)); + } + } + + if (active_flags.empty()) + { + return "UNKNOWN FLAG"; + } + + return std::accumulate(active_flags.begin() + 1, active_flags.end(), + active_flags.front(), + [](std::string acc, const std::string& flag) { + return std::move(acc) + "|" + flag; + }); +} + +std::string +ahardwareBufferUsageFlagToString(const AHardwareBuffer_UsageFlags flag) +{ + std::string result; + switch (flag) + { + case AHARDWAREBUFFER_USAGE_CPU_READ_NEVER: + result = "AHARDWAREBUFFER_USAGE_CPU_READ_NEVER"; + break; + case AHARDWAREBUFFER_USAGE_CPU_READ_RARELY: + result = "AHARDWAREBUFFER_USAGE_CPU_READ_RARELY"; + break; + case AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN: + result = "AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN"; + break; + case AHARDWAREBUFFER_USAGE_CPU_READ_MASK: + result = "AHARDWAREBUFFER_USAGE_CPU_READ_MASK"; + break; + case AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY: + result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY"; + break; + case AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN: + result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN"; + break; + case AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK: + result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK"; + break; + case AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE: + result = "AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE"; + break; + case AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER: + result = "AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER"; + break; + case AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY: + result = "AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY"; + break; + case AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT: + result = "AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT"; + break; + case AHARDWAREBUFFER_USAGE_VIDEO_ENCODE: + result = "AHARDWAREBUFFER_USAGE_VIDEO_ENCODE"; + break; + case AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA: + result = "AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA"; + break; + case AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER: + result = "AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER"; + break; + case AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP: + result = "AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP"; + break; + case AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE: + result = "AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE"; + break; + default: result = "Unknown flag"; + } + return result; +} + +std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format) +{ + std::string result; + switch (format) + { + case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM: + result = "AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM"; + break; + case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM: + result = "AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM"; + break; + case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM: + result = "AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM"; + break; + case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM: + result = "AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM"; + break; + case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT: + result = "AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT"; + break; + case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM: + result = "AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM"; + break; + case AHARDWAREBUFFER_FORMAT_BLOB: + result = "AHARDWAREBUFFER_FORMAT_BLOB"; + break; + case AHARDWAREBUFFER_FORMAT_D16_UNORM: + result = "AHARDWAREBUFFER_FORMAT_D16_UNORM"; + break; + case AHARDWAREBUFFER_FORMAT_D24_UNORM: + result = "AHARDWAREBUFFER_FORMAT_D24_UNORM"; + break; + case AHARDWAREBUFFER_FORMAT_D24_UNORM_S8_UINT: + result = "AHARDWAREBUFFER_FORMAT_D24_UNORM_S8_UINT"; + break; + case AHARDWAREBUFFER_FORMAT_D32_FLOAT: + result = "AHARDWAREBUFFER_FORMAT_D32_FLOAT"; + break; + case AHARDWAREBUFFER_FORMAT_D32_FLOAT_S8_UINT: + result = "AHARDWAREBUFFER_FORMAT_D32_FLOAT_S8_UINT"; + break; + case AHARDWAREBUFFER_FORMAT_S8_UINT: + result = "AHARDWAREBUFFER_FORMAT_S8_UINT"; + break; + case AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420: + result = "AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420"; + break; + case AHARDWAREBUFFER_FORMAT_YCbCr_P010: + result = "AHARDWAREBUFFER_FORMAT_YCbCr_P010"; + break; + case AHARDWAREBUFFER_FORMAT_R8_UNORM: + result = "AHARDWAREBUFFER_FORMAT_R8_UNORM"; + break; + case AHARDWAREBUFFER_FORMAT_R16_UINT: + result = "AHARDWAREBUFFER_FORMAT_R16_UINT"; + break; + case AHARDWAREBUFFER_FORMAT_R16G16_UINT: + result = "AHARDWAREBUFFER_FORMAT_R16G16_UINT"; + break; + case AHARDWAREBUFFER_FORMAT_R10G10B10A10_UNORM: + result = "AHARDWAREBUFFER_FORMAT_R10G10B10A10_UNORM"; + break; + } + return result; +} \ No newline at end of file diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.h b/test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.h new file mode 100644 index 00000000..037bc6c2 --- /dev/null +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.h @@ -0,0 +1,42 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#pragma once + +#include +#include +#include +#include + +#define CHECK_AHARDWARE_BUFFER_SUPPORT(ahardwareBuffer_Desc, format) \ + if (!AHardwareBuffer_isSupported(&ahardwareBuffer_Desc)) \ + { \ + const std::string usage_string = \ + ahardwareBufferDecodeUsageFlagsToString( \ + static_cast( \ + ahardwareBuffer_Desc.usage)); \ + log_info("Unsupported format %s:\n Usage flags %s\n Size (%u, " \ + "%u, layers = %u)\n", \ + ahardwareBufferFormatToString(format.aHardwareBufferFormat) \ + .c_str(), \ + usage_string.c_str(), ahardwareBuffer_Desc.width, \ + ahardwareBuffer_Desc.height, ahardwareBuffer_Desc.layers); \ + continue; \ + } + +std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format); +std::string ahardwareBufferUsageFlagToString(AHardwareBuffer_UsageFlags flag); +std::string +ahardwareBufferDecodeUsageFlagsToString(AHardwareBuffer_UsageFlags flags); \ No newline at end of file diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp new file mode 100644 index 00000000..8900e6ff --- /dev/null +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp @@ -0,0 +1,23 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "harness/testHarness.h" + +int main(int argc, const char *argv[]) +{ + return runTestHarness(argc, argv, test_registry::getInstance().num_tests(), + test_registry::getInstance().definitions(), false, 0); +} \ No newline at end of file diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp new file mode 100644 index 00000000..ada80027 --- /dev/null +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp @@ -0,0 +1,1911 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "harness/compat.h" +#include "harness/kernelHelpers.h" +#include "harness/imageHelpers.h" +#include "harness/typeWrappers.h" +#include "harness/errorHelpers.h" +#include "harness/extensionHelpers.h" +#include +#include "debug_ahb.h" + +static bool isAHBUsageReadable(const AHardwareBuffer_UsageFlags usage) +{ + return (AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE & usage) != 0; +} + +struct ahb_format_table +{ + AHardwareBuffer_Format aHardwareBufferFormat; + cl_image_format clImageFormat; + cl_mem_object_type clMemObjectType; +}; + +struct ahb_usage_table +{ + AHardwareBuffer_UsageFlags usageFlags; +}; + +struct ahb_image_size_table +{ + uint32_t width; + uint32_t height; +}; + +ahb_image_size_table test_sizes[] = { + { 64, 64 }, { 128, 128 }, { 256, 256 }, { 512, 512 } +}; + +ahb_usage_table test_usages[] = { + { static_cast( + AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN + | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN + | AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE + | AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER) }, + { static_cast( + AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE + | AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN + | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN) }, + { static_cast( + AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER + | AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN + | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN) }, +}; + +ahb_format_table test_formats[] = { + { AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT, + { CL_RGBA, CL_HALF_FLOAT }, + CL_MEM_OBJECT_IMAGE2D }, + { AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM, + { CL_RGBA, CL_UNORM_INT8 }, + CL_MEM_OBJECT_IMAGE2D }, + + { AHARDWAREBUFFER_FORMAT_R8_UNORM, + { CL_R, CL_UNORM_INT8 }, + CL_MEM_OBJECT_IMAGE2D }, +}; + +static const char *diff_images_kernel_source = { + R"( + #define PIXEL_FORMAT %s4 + __kernel void verify_image( read_only image2d_t ahb_image , read_only image2d_t ocl_image, global PIXEL_FORMAT *ocl_pixel, global PIXEL_FORMAT *ahb_pixel) + { + int tidX = get_global_id(0); + int tidY = get_global_id(1); + int idx = tidY * get_global_size(0) + tidX; + + sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + PIXEL_FORMAT a = read_image%s(ahb_image, sampler, (int2)( tidX, tidY ) ); + PIXEL_FORMAT o = read_image%s(ocl_image, sampler, (int2)( tidX, tidY ) ); + ahb_pixel[idx] = a; + ocl_pixel[idx] = o; + })" +}; + +// Checks that the inferred image format is correct +REGISTER_TEST(test_images) +{ + cl_int err = CL_SUCCESS; + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + for (const auto format : test_formats) + { + log_info("Testing %s\n", + ahardwareBufferFormatToString(format.aHardwareBufferFormat) + .c_str()); + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = format.aHardwareBufferFormat; + for (auto usage : test_usages) + { + aHardwareBufferDesc.usage = usage.usageFlags; + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + + CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, + &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + const cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + cl_mem image = clCreateImageWithProperties( + context, props, CL_MEM_READ_WRITE, nullptr, nullptr, + nullptr, &err); + test_error(err, + "Failed to create CL image from AHardwareBuffer"); + + cl_image_format imageFormat = { 0 }; + err = clGetImageInfo(image, CL_IMAGE_FORMAT, + sizeof(cl_image_format), &imageFormat, + nullptr); + test_error(err, "Failed to query image format"); + + if (imageFormat.image_channel_order + != format.clImageFormat.image_channel_order) + { + log_error("Expected channel order %d, got %d\n", + format.clImageFormat.image_channel_order, + imageFormat.image_channel_order); + return TEST_FAIL; + } + + if (imageFormat.image_channel_data_type + != format.clImageFormat.image_channel_data_type) + { + log_error("Expected image_channel_data_type %d, got %d\n", + format.clImageFormat.image_channel_data_type, + imageFormat.image_channel_data_type); + return TEST_FAIL; + } + + test_error(clReleaseMemObject(image), + "Failed to release image"); + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + } + } + } + + return TEST_PASS; +} + +REGISTER_TEST(test_images_read) +{ + cl_int err = CL_SUCCESS; + RandomSeed seed(gRandomSeed); + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + GET_PFN(device, clEnqueueAcquireExternalMemObjectsKHR); + GET_PFN(device, clEnqueueReleaseExternalMemObjectsKHR); + + for (auto format : test_formats) + { + log_info("Testing %s\n", + ahardwareBufferFormatToString(format.aHardwareBufferFormat) + .c_str()); + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = format.aHardwareBufferFormat; + for (auto usage : test_usages) + { + // Filter out usage flags that are not readable on device + if (!isAHBUsageReadable(usage.usageFlags)) + { + continue; + } + + aHardwareBufferDesc.usage = usage.usageFlags; + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + + CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, + &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + // Determine AHB memory layout + AHardwareBuffer_Desc hardware_buffer_desc = {}; + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + test_assert_error(hardware_buffer_desc.width + == resolution.width, + "AHB has unexpected width"); + test_assert_error(hardware_buffer_desc.height + == resolution.height, + "AHB has unexpected height"); + + // Populate AHB with random data + size_t pixelSize = get_pixel_size(&format.clImageFormat); + image_descriptor imageInfo = { 0 }; + imageInfo.format = &format.clImageFormat; + imageInfo.type = format.clMemObjectType; + imageInfo.width = resolution.width; + imageInfo.height = resolution.height; + imageInfo.rowPitch = hardware_buffer_desc.stride * pixelSize; + test_assert_error(imageInfo.rowPitch + >= pixelSize * imageInfo.width, + "Row pitch is smaller than width"); + + size_t srcBytes = get_image_size(&imageInfo); + test_assert_error(srcBytes > 0, "Image cannot have zero size"); + + BufferOwningPtr srcData; + generate_random_image_data(&imageInfo, srcData, seed); + + void *hardware_buffer_data = nullptr; + ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, + nullptr, &hardware_buffer_data); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + memcpy(hardware_buffer_data, srcData, srcBytes); + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + clMemWrapper imported_image = clCreateImageWithProperties( + context, props, CL_MEM_READ_ONLY, nullptr, nullptr, nullptr, + &err); + test_error(err, + "Failed to create CL image from AHardwareBuffer"); + + cl_image_desc imageDesc = { 0 }; + imageDesc.image_type = imageInfo.type; + imageDesc.image_width = imageInfo.width; + imageDesc.image_height = imageInfo.height; + imageDesc.image_row_pitch = imageInfo.rowPitch; + + clMemWrapper opencl_image = clCreateImage( + context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + imageInfo.format, &imageDesc, srcData, &err); + test_error(err, "Failed to create CL image"); + + ExplicitTypes outputType; + const char *readFormat; + + if (format.clImageFormat.image_channel_data_type + == CL_UNSIGNED_INT8) + { + readFormat = "ui"; + outputType = kUInt; + } + else + { + readFormat = "f"; + outputType = kFloat; + } + + size_t verify_buffer_size = imageInfo.width * imageInfo.height + * get_explicit_type_size(outputType) * 4; + + clMemWrapper ocl_pixel_buffer = + clCreateBuffer(context, CL_MEM_READ_WRITE, + verify_buffer_size, nullptr, &err); + test_error(err, "Failed to create ocl pixel buffer"); + + clMemWrapper ahb_pixel_buffer = + clCreateBuffer(context, CL_MEM_READ_WRITE, + verify_buffer_size, nullptr, &err); + test_error(err, "Failed to crete ahb pixel buffer"); + + // Populate kernel + std::vector programSrc( + 2 * strlen(diff_images_kernel_source)); + const char *outputTypeName = get_explicit_type_name(outputType); + + sprintf(programSrc.data(), diff_images_kernel_source, + outputTypeName, // Read image format 1 + readFormat, // Read image return type 1 + readFormat // Read image return type 2 + ); + const char *ptr = programSrc.data(); + clProgramWrapper program; + clKernelWrapper kernel; + err = create_single_kernel_helper(context, &program, &kernel, 1, + &ptr, "verify_image"); + + // Set kernel args + + err = + clSetKernelArg(kernel, 0, sizeof(cl_mem), &imported_image); + test_error(err, "clSetKernelArg failed"); + + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &opencl_image); + test_error(err, "clSetKernelArg failed"); + + err = clSetKernelArg(kernel, 2, sizeof(cl_mem), + &ocl_pixel_buffer); + test_error(err, "clSetKernelArg failed"); + + err = clSetKernelArg(kernel, 3, sizeof(cl_mem), + &ahb_pixel_buffer); + test_error(err, "clSetKernelArg failed"); + + err = clEnqueueAcquireExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueAcquireExternalMemObjectsKHR failed"); + + size_t global_work_size[] = { imageInfo.width, + imageInfo.height }; + err = clEnqueueNDRangeKernel(queue, kernel, 2, nullptr, + global_work_size, nullptr, 0, + nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed"); + + err = clEnqueueReleaseExternalMemObjectsKHR( + queue, 1, &opencl_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueReleaseExternalMemObjectsKHR failed"); + + // Read buffer and verify + std::vector ocl_verify_data(verify_buffer_size); + err = clEnqueueReadBuffer( + queue, ocl_pixel_buffer, CL_BLOCKING, 0, verify_buffer_size, + ocl_verify_data.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); + + std::vector ahb_verify_data(verify_buffer_size); + err = clEnqueueReadBuffer( + queue, ahb_pixel_buffer, CL_BLOCKING, 0, verify_buffer_size, + ahb_verify_data.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); + + for (unsigned row = 0; row < imageInfo.height; row++) + { + for (unsigned col = 0; col < imageInfo.width; col++) + { + unsigned pixel_index = row * imageInfo.width + col; + switch (outputType) + { + case kFloat: { + auto *cl_ptr = reinterpret_cast( + ocl_verify_data.data()); + auto *ahb_ptr = reinterpret_cast( + ahb_verify_data.data()); + + if ((cl_ptr[pixel_index].s0 + != ahb_ptr[pixel_index].s0) + || (cl_ptr[pixel_index].s1 + != ahb_ptr[pixel_index].s1) + || (cl_ptr[pixel_index].s2 + != ahb_ptr[pixel_index].s2) + || (cl_ptr[pixel_index].s3 + != ahb_ptr[pixel_index].s3)) + { + log_error( + "At coord (%u, %u) expected " + "(%f,%f,%f,%f), got (%f,%f,%f,%f)", + col, row, cl_ptr[pixel_index].s0, + cl_ptr[pixel_index].s1, + cl_ptr[pixel_index].s2, + cl_ptr[pixel_index].s3, + ahb_ptr[pixel_index].s0, + ahb_ptr[pixel_index].s1, + ahb_ptr[pixel_index].s2, + ahb_ptr[pixel_index].s3); + + return TEST_FAIL; + } + } + break; + case kUInt: { + auto *cl_ptr = reinterpret_cast( + ocl_verify_data.data()); + auto *ahb_ptr = reinterpret_cast( + ahb_verify_data.data()); + + if ((cl_ptr[pixel_index].s0 + != ahb_ptr[pixel_index].s0) + || (cl_ptr[pixel_index].s1 + != ahb_ptr[pixel_index].s1) + || (cl_ptr[pixel_index].s2 + != ahb_ptr[pixel_index].s2) + || (cl_ptr[pixel_index].s3 + != ahb_ptr[pixel_index].s3)) + { + log_error( + "At coord (%u, %u) expected " + "(%u,%u,%u,%u), got (%u,%u,%u,%u)", + col, row, cl_ptr[pixel_index].s0, + cl_ptr[pixel_index].s1, + cl_ptr[pixel_index].s2, + cl_ptr[pixel_index].s3, + ahb_ptr[pixel_index].s0, + ahb_ptr[pixel_index].s1, + ahb_ptr[pixel_index].s2, + ahb_ptr[pixel_index].s3); + return TEST_FAIL; + } + } + break; + default: test_fail("Unknown output type"); + } + } + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + } + } + } + + return TEST_PASS; +} + +REGISTER_TEST(test_enqueue_read_image) +{ + cl_int err = CL_SUCCESS; + RandomSeed seed(gRandomSeed); + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + GET_PFN(device, clEnqueueAcquireExternalMemObjectsKHR); + GET_PFN(device, clEnqueueReleaseExternalMemObjectsKHR); + + for (auto format : test_formats) + { + log_info("Testing %s\n", + ahardwareBufferFormatToString(format.aHardwareBufferFormat) + .c_str()); + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = format.aHardwareBufferFormat; + for (auto usage : test_usages) + { + // Filter out usage flags that are not readable on device + if (!isAHBUsageReadable(usage.usageFlags)) + { + continue; + } + + aHardwareBufferDesc.usage = usage.usageFlags; + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + + CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, + &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + // Determine AHB memory layout + AHardwareBuffer_Desc hardware_buffer_desc = {}; + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + test_assert_error(hardware_buffer_desc.width + == resolution.width, + "AHB has unexpected width"); + test_assert_error(hardware_buffer_desc.height + == resolution.height, + "AHB has unexpected height"); + + // Populate AHB with random data + const size_t pixelSize = get_pixel_size(&format.clImageFormat); + image_descriptor imageInfo = { 0 }; + imageInfo.format = &format.clImageFormat; + imageInfo.type = format.clMemObjectType; + imageInfo.width = resolution.width; + imageInfo.height = resolution.height; + imageInfo.rowPitch = hardware_buffer_desc.stride * pixelSize; + test_assert_error(imageInfo.rowPitch + >= pixelSize * imageInfo.width, + "Row pitch is smaller than width"); + + size_t srcBytes = get_image_size(&imageInfo); + test_assert_error(srcBytes > 0, "Image cannot have zero size"); + + BufferOwningPtr srcData; + generate_random_image_data(&imageInfo, srcData, seed); + + void *hardware_buffer_data = nullptr; + ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, + nullptr, &hardware_buffer_data); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + memcpy(hardware_buffer_data, srcData, srcBytes); + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + const cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + clMemWrapper imported_image = clCreateImageWithProperties( + context, props, CL_MEM_READ_ONLY, nullptr, nullptr, nullptr, + &err); + test_error(err, + "Failed to create CL image from AHardwareBuffer"); + + err = clEnqueueAcquireExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueAcquireExternalMemObjectsKHR failed"); + + size_t origin[] = { 0, 0, 0 }; + size_t region[] = { imageInfo.width, imageInfo.height, 1 }; + + std::vector out_image(srcBytes); + err = clEnqueueReadImage(queue, imported_image, CL_TRUE, origin, + region, imageInfo.rowPitch, 0, + out_image.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueCopyImage failed"); + + err = clEnqueueReleaseExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueReleaseExternalMemObjectsKHR failed"); + + const char *out_image_ptr = out_image.data(); + auto srcData_ptr = static_cast(srcData); + + const size_t scanlineSize = + imageInfo.width * get_pixel_size(imageInfo.format); + + // Count the number of bytes successfully matched + size_t total_matched = 0; + for (size_t line = 0; line < imageInfo.height; line++) + { + + if (memcmp(srcData_ptr, out_image_ptr, scanlineSize) != 0) + { + // Find the first differing pixel + const size_t pixel_size = + get_pixel_size(imageInfo.format); + size_t where = compare_scanlines( + &imageInfo, srcData_ptr, out_image_ptr); + if (where < imageInfo.width) + { + print_first_pixel_difference_error( + where, srcData_ptr + pixel_size * where, + out_image_ptr + pixel_size * where, &imageInfo, + line, 1); + return TEST_FAIL; + } + } + + total_matched += scanlineSize; + srcData_ptr += imageInfo.rowPitch; + out_image_ptr += imageInfo.rowPitch; + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + + if (total_matched == 0) + { + test_fail("Zero bytes matched"); + } + } + } + } + + return TEST_PASS; +} + +REGISTER_TEST(test_enqueue_copy_image) +{ + cl_int err = CL_SUCCESS; + RandomSeed seed(gRandomSeed); + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + GET_PFN(device, clEnqueueAcquireExternalMemObjectsKHR); + GET_PFN(device, clEnqueueReleaseExternalMemObjectsKHR); + + for (auto format : test_formats) + { + log_info("Testing %s\n", + ahardwareBufferFormatToString(format.aHardwareBufferFormat) + .c_str()); + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = format.aHardwareBufferFormat; + for (auto usage : test_usages) + { + // Filter out usage flags that are not readable on device + if (!isAHBUsageReadable(usage.usageFlags)) + { + continue; + } + + aHardwareBufferDesc.usage = usage.usageFlags; + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + + CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, + &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + // Determine AHB memory layout + AHardwareBuffer_Desc hardware_buffer_desc = {}; + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + test_assert_error(hardware_buffer_desc.width + == resolution.width, + "AHB has unexpected width"); + test_assert_error(hardware_buffer_desc.height + == resolution.height, + "AHB has unexpected height"); + + // Populate AHB with random data + size_t pixelSize = get_pixel_size(&format.clImageFormat); + image_descriptor imageInfo = { 0 }; + imageInfo.format = &format.clImageFormat; + imageInfo.type = format.clMemObjectType; + imageInfo.width = resolution.width; + imageInfo.height = resolution.height; + imageInfo.rowPitch = resolution.width * pixelSize; + test_assert_error(imageInfo.rowPitch + >= pixelSize * imageInfo.width, + "Row pitch is smaller than width"); + + size_t srcBytes = get_image_size(&imageInfo); + test_assert_error(srcBytes > 0, "Image cannot have zero size"); + + BufferOwningPtr srcData; + generate_random_image_data(&imageInfo, srcData, seed); + + void *hardware_buffer_data = nullptr; + ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, + nullptr, &hardware_buffer_data); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + memcpy(hardware_buffer_data, srcData, srcBytes); + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + clMemWrapper imported_image = clCreateImageWithProperties( + context, props, CL_MEM_READ_ONLY, nullptr, nullptr, nullptr, + &err); + test_error(err, + "Failed to create CL image from AHardwareBuffer"); + + cl_image_desc imageDesc = { 0 }; + imageDesc.image_type = imageInfo.type; + imageDesc.image_width = imageInfo.width; + imageDesc.image_height = imageInfo.height; + + clMemWrapper opencl_image = + clCreateImage(context, CL_MEM_READ_WRITE, imageInfo.format, + &imageDesc, nullptr, &err); + test_error(err, "Failed to create CL image"); + + err = clEnqueueAcquireExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueAcquireExternalMemObjectsKHR failed"); + + size_t origin[] = { 0, 0, 0 }; + size_t region[] = { imageInfo.width, imageInfo.height, 1 }; + err = clEnqueueCopyImage(queue, imported_image, opencl_image, + origin, origin, region, 0, nullptr, + nullptr); + test_error(err, "Failed calling clEnqueueCopyImage"); + + ExplicitTypes outputType; + const char *readFormat; + + if (format.clImageFormat.image_channel_data_type + == CL_UNSIGNED_INT8) + { + readFormat = "ui"; + outputType = kUInt; + } + else + { + readFormat = "f"; + outputType = kFloat; + } + + size_t verify_buffer_size = imageInfo.width * imageInfo.height + * get_explicit_type_size(outputType) * 4; + + clMemWrapper ocl_pixel_buffer = + clCreateBuffer(context, CL_MEM_READ_WRITE, + verify_buffer_size, nullptr, &err); + test_error(err, "Failed to create ocl pixel buffer"); + + clMemWrapper ahb_pixel_buffer = + clCreateBuffer(context, CL_MEM_READ_WRITE, + verify_buffer_size, nullptr, &err); + test_error(err, "Failed to crete ahb pixel buffer"); + + // sprintf the kernel + std::vector programSrc( + 2 * strlen(diff_images_kernel_source)); + const char *outputTypeName = get_explicit_type_name(outputType); + + sprintf(programSrc.data(), diff_images_kernel_source, + outputTypeName, /*read image format 1 */ + readFormat, /*read image return type 1 */ + readFormat /*read image return type 2 */ + ); + const char *ptr = programSrc.data(); + clProgramWrapper program; + clKernelWrapper kernel; + err = create_single_kernel_helper(context, &program, &kernel, 1, + &ptr, "verify_image"); + + // set kernel args + + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), + &imported_image); /*imported image */ + test_error(err, "clSetKernelArg failed"); + + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), + &opencl_image); /*image made in opencl*/ + test_error(err, "clSetKernelArg failed"); + + err = clSetKernelArg(kernel, 2, sizeof(cl_mem), + &ocl_pixel_buffer); /*verification buffer*/ + test_error(err, "clSetKernelArg failed"); + + err = clSetKernelArg(kernel, 3, sizeof(cl_mem), + &ahb_pixel_buffer); /*verification buffer*/ + test_error(err, "clSetKernelArg failed"); + + size_t global_work_size[] = { (imageInfo.width), + (imageInfo.height) }; + err = clEnqueueNDRangeKernel(queue, kernel, 2, nullptr, + global_work_size, nullptr, 0, + nullptr, nullptr); + + err = clEnqueueReleaseExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueReleaseExternalMemObjectsKHR failed"); + + // Read buffer and verify + std::vector ocl_verify_data(verify_buffer_size); + err = clEnqueueReadBuffer( + queue, ocl_pixel_buffer, CL_BLOCKING, 0, verify_buffer_size, + ocl_verify_data.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); + + std::vector ahb_verify_data(verify_buffer_size); + err = clEnqueueReadBuffer( + queue, ahb_pixel_buffer, CL_BLOCKING, 0, verify_buffer_size, + ahb_verify_data.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); + + for (unsigned row = 0; row < imageInfo.height; row++) + { + for (unsigned col = 0; col < imageInfo.width; col++) + { + unsigned pixel_index = row * imageInfo.width + col; + switch (outputType) + { + case kFloat: { + auto *cl_ptr = reinterpret_cast( + ocl_verify_data.data()); + auto *ahb_ptr = reinterpret_cast( + ahb_verify_data.data()); + + if ((cl_ptr[pixel_index].s0 + != ahb_ptr[pixel_index].s0) + || (cl_ptr[pixel_index].s1 + != ahb_ptr[pixel_index].s1) + || (cl_ptr[pixel_index].s2 + != ahb_ptr[pixel_index].s2) + || (cl_ptr[pixel_index].s3 + != ahb_ptr[pixel_index].s3)) + { + printf("At %u\n", pixel_index); + printf("Expected %f,%f,%f,%f\n", + cl_ptr[pixel_index].s0, + cl_ptr[pixel_index].s1, + cl_ptr[pixel_index].s2, + cl_ptr[pixel_index].s3); + printf("Got %f,%f,%f,%f\n", + ahb_ptr[pixel_index].s0, + ahb_ptr[pixel_index].s1, + ahb_ptr[pixel_index].s2, + ahb_ptr[pixel_index].s3); + + return TEST_FAIL; + } + } + break; + case kUInt: { + auto *cl_ptr = reinterpret_cast( + ocl_verify_data.data()); + auto *ahb_ptr = reinterpret_cast( + ahb_verify_data.data()); + + if ((cl_ptr[pixel_index].s0 + != ahb_ptr[pixel_index].s0) + || (cl_ptr[pixel_index].s1 + != ahb_ptr[pixel_index].s1) + || (cl_ptr[pixel_index].s2 + != ahb_ptr[pixel_index].s2) + || (cl_ptr[pixel_index].s3 + != ahb_ptr[pixel_index].s3)) + { + printf("At %u\n", pixel_index); + printf("Expected %u,%u,%u,%u\n", + cl_ptr[pixel_index].s0, + cl_ptr[pixel_index].s1, + cl_ptr[pixel_index].s2, + cl_ptr[pixel_index].s3); + printf("Got %u,%u,%u,%u\n", + ahb_ptr[pixel_index].s0, + ahb_ptr[pixel_index].s1, + ahb_ptr[pixel_index].s2, + ahb_ptr[pixel_index].s3); + + return TEST_FAIL; + } + } + break; + default: test_fail("Unknown output type"); + } + } + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + } + } + } + + return TEST_PASS; +} + +REGISTER_TEST(test_enqueue_copy_image_to_buffer) +{ + cl_int err = CL_SUCCESS; + RandomSeed seed(gRandomSeed); + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + GET_PFN(device, clEnqueueAcquireExternalMemObjectsKHR); + GET_PFN(device, clEnqueueReleaseExternalMemObjectsKHR); + + for (auto format : test_formats) + { + log_info("Testing %s\n", + ahardwareBufferFormatToString(format.aHardwareBufferFormat) + .c_str()); + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = format.aHardwareBufferFormat; + for (auto usage : test_usages) + { + // Filter out usage flags that are not readable on device + if (!isAHBUsageReadable(usage.usageFlags)) + { + continue; + } + + aHardwareBufferDesc.usage = usage.usageFlags; + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + + CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, + &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + // Determine AHB memory layout + AHardwareBuffer_Desc hardware_buffer_desc = {}; + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + test_assert_error(hardware_buffer_desc.width + == resolution.width, + "AHB has unexpected width"); + test_assert_error(hardware_buffer_desc.height + == resolution.height, + "AHB has unexpected height"); + + // Populate AHB with random data + size_t pixelSize = get_pixel_size(&format.clImageFormat); + image_descriptor imageInfo = { 0 }; + imageInfo.format = &format.clImageFormat; + imageInfo.type = format.clMemObjectType; + imageInfo.width = resolution.width; + imageInfo.height = resolution.height; + imageInfo.rowPitch = hardware_buffer_desc.stride * pixelSize; + test_assert_error(imageInfo.rowPitch + >= pixelSize * imageInfo.width, + "Row pitch is smaller than width"); + + size_t srcBytes = get_image_size(&imageInfo); + test_assert_error(srcBytes > 0, "Image cannot have zero size"); + + BufferOwningPtr srcData; + generate_random_image_data(&imageInfo, srcData, seed); + + void *hardware_buffer_data = nullptr; + ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, + nullptr, &hardware_buffer_data); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + memcpy(hardware_buffer_data, srcData, srcBytes); + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + clMemWrapper imported_image = clCreateImageWithProperties( + context, props, CL_MEM_READ_ONLY, nullptr, nullptr, nullptr, + &err); + test_error(err, + "Failed to create CL image from AHardwareBuffer"); + + clMemWrapper opencl_buffer = clCreateBuffer( + context, CL_MEM_READ_WRITE, srcBytes, nullptr, &err); + test_error(err, "Failed to create CL buffer"); + + err = clEnqueueAcquireExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueAcquireExternalMemObjectsKHR failed"); + + size_t origin[] = { 0, 0, 0 }; + size_t region[] = { imageInfo.width, imageInfo.height, 1 }; + + err = clEnqueueCopyImageToBuffer(queue, imported_image, + opencl_buffer, origin, region, + 0, 0, nullptr, nullptr); + test_error( + err, "Failed to copy imported AHB image to opencl buffer"); + + err = clEnqueueReleaseExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueReleaseExternalMemObjectsKHR failed"); + + std::vector out_buffer(srcBytes); + err = clEnqueueReadBuffer(queue, opencl_buffer, CL_TRUE, 0, + srcBytes, out_buffer.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); + + char *out_buffer_ptr = out_buffer.data(); + auto srcData_ptr = static_cast(srcData); + + const size_t scanlineSize = + imageInfo.width * get_pixel_size(imageInfo.format); + + // Count the number of bytes successfully matched + size_t total_matched = 0; + for (size_t line = 0; line < imageInfo.height; line++) + { + + if (memcmp(srcData_ptr, out_buffer_ptr, scanlineSize) != 0) + { + // Find the first differing pixel + const size_t pixel_size = + get_pixel_size(imageInfo.format); + size_t where = compare_scanlines( + &imageInfo, srcData_ptr, out_buffer_ptr); + if (where < imageInfo.width) + { + print_first_pixel_difference_error( + where, srcData_ptr + pixel_size * where, + out_buffer_ptr + pixel_size * where, &imageInfo, + line, 1); + return TEST_FAIL; + } + } + + total_matched += scanlineSize; + srcData_ptr += imageInfo.rowPitch; + out_buffer_ptr += scanlineSize; + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + + if (total_matched == 0) + { + test_fail("Zero bytes matched"); + } + } + } + } + + return TEST_PASS; +} + +REGISTER_TEST(test_enqueue_copy_buffer_to_image) +{ + cl_int err = CL_SUCCESS; + RandomSeed seed(gRandomSeed); + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + GET_PFN(device, clEnqueueAcquireExternalMemObjectsKHR); + GET_PFN(device, clEnqueueReleaseExternalMemObjectsKHR); + + for (auto format : test_formats) + { + log_info("Testing %s\n", + ahardwareBufferFormatToString(format.aHardwareBufferFormat) + .c_str()); + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = format.aHardwareBufferFormat; + for (auto usage : test_usages) + { + // Filter out usage flags that are not readable on device + if (!isAHBUsageReadable(usage.usageFlags)) + { + continue; + } + + aHardwareBufferDesc.usage = usage.usageFlags; + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + + CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, + &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + // Determine AHB memory layout + AHardwareBuffer_Desc hardware_buffer_desc = {}; + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + test_assert_error(hardware_buffer_desc.width + == resolution.width, + "AHB has unexpected width"); + test_assert_error(hardware_buffer_desc.height + == resolution.height, + "AHB has unexpected height"); + + // Generate random data for opencl buffer + const size_t pixelSize = get_pixel_size(&format.clImageFormat); + image_descriptor imageInfo = { 0 }; + imageInfo.format = &format.clImageFormat; + imageInfo.type = format.clMemObjectType; + imageInfo.width = resolution.width; + imageInfo.height = resolution.height; + imageInfo.rowPitch = resolution.width * resolution.height + * pixelSize; // data is tightly packed in buffer + test_assert_error(imageInfo.rowPitch + >= pixelSize * imageInfo.width, + "Row pitch is smaller than width"); + + const size_t srcBytes = get_image_size(&imageInfo); + test_assert_error(srcBytes > 0, "Image cannot have zero size"); + + BufferOwningPtr srcData; + generate_random_image_data(&imageInfo, srcData, seed); + + clMemWrapper opencl_buffer = clCreateBuffer( + context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, srcBytes, + srcData, &err); + test_error(err, "Failed to create CL buffer"); + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + clMemWrapper imported_image = clCreateImageWithProperties( + context, props, CL_MEM_READ_WRITE, nullptr, nullptr, + nullptr, &err); + test_error(err, + "Failed to create CL image from AHardwareBuffer"); + + err = clEnqueueAcquireExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueAcquireExternalMemObjects failed"); + + size_t origin[] = { 0, 0, 0 }; + size_t region[] = { imageInfo.width, imageInfo.height, 1 }; + + err = clEnqueueCopyBufferToImage(queue, opencl_buffer, + imported_image, 0, origin, + region, 0, nullptr, nullptr); + test_error( + err, "Failed to copy opencl buffer to imported AHB image"); + + err = clEnqueueReleaseExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueReleaseExternalMemObjects failed"); + + clFinish(queue); + + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + void *hardware_buffer_data = nullptr; + ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, + nullptr, &hardware_buffer_data); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + auto out_image_ptr = static_cast(hardware_buffer_data); + auto srcData_ptr = static_cast(srcData); + + const size_t scanlineSize = + imageInfo.width * get_pixel_size(imageInfo.format); + + // Count the number of bytes successfully matched + size_t total_matched = 0; + for (size_t line = 0; line < imageInfo.height; line++) + { + + if (memcmp(srcData_ptr, out_image_ptr, scanlineSize) != 0) + { + // Find the first differing pixel + size_t where = compare_scanlines( + &imageInfo, srcData_ptr, out_image_ptr); + if (where < imageInfo.width) + { + print_first_pixel_difference_error( + where, srcData_ptr + pixelSize * where, + out_image_ptr + pixelSize * where, &imageInfo, + line, 1); + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, + nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with " + "code %d\n", + ahb_result); + return TEST_FAIL; + } + return TEST_FAIL; + } + } + + total_matched += scanlineSize; + srcData_ptr += + scanlineSize; // image data is tightly packed in buffer + out_image_ptr += hardware_buffer_desc.stride * pixelSize; + } + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + + if (total_matched == 0) + { + test_fail("Zero bytes matched"); + } + } + } + } + + return TEST_PASS; +} + +REGISTER_TEST(test_enqueue_write_image) +{ + cl_int err = CL_SUCCESS; + RandomSeed seed(gRandomSeed); + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + GET_PFN(device, clEnqueueAcquireExternalMemObjectsKHR); + GET_PFN(device, clEnqueueReleaseExternalMemObjectsKHR); + + for (auto format : test_formats) + { + log_info("Testing %s\n", + ahardwareBufferFormatToString(format.aHardwareBufferFormat) + .c_str()); + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = format.aHardwareBufferFormat; + for (auto usage : test_usages) + { + // Filter out usage flags that are not readable on device + if (!isAHBUsageReadable(usage.usageFlags)) + { + continue; + } + + aHardwareBufferDesc.usage = usage.usageFlags; + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + + CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, + &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + // Determine AHB memory layout + AHardwareBuffer_Desc hardware_buffer_desc = {}; + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + test_assert_error(hardware_buffer_desc.width + == resolution.width, + "AHB has unexpected width"); + test_assert_error(hardware_buffer_desc.height + == resolution.height, + "AHB has unexpected height"); + + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + clMemWrapper imported_image = clCreateImageWithProperties( + context, props, CL_MEM_READ_ONLY, nullptr, nullptr, nullptr, + &err); + test_error(err, + "Failed to create CL image from AHardwareBuffer"); + + // Generate data to write to image + const size_t pixelSize = get_pixel_size(&format.clImageFormat); + image_descriptor imageInfo = { 0 }; + imageInfo.format = &format.clImageFormat; + imageInfo.type = format.clMemObjectType; + imageInfo.width = resolution.width; + imageInfo.height = resolution.height; + imageInfo.rowPitch = resolution.width * resolution.height + * pixelSize; // Data is tightly packed + test_assert_error(imageInfo.rowPitch + >= pixelSize * imageInfo.width, + "Row pitch is smaller than width"); + + const size_t srcBytes = get_image_size(&imageInfo); + test_assert_error(srcBytes > 0, "Image cannot have zero size"); + + BufferOwningPtr srcData; + generate_random_image_data(&imageInfo, srcData, seed); + + err = clEnqueueAcquireExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueAcquireExternalMemObjects failed"); + + size_t origin[] = { 0, 0, 0 }; + size_t region[] = { imageInfo.width, imageInfo.height, 1 }; + + err = clEnqueueWriteImage(queue, imported_image, CL_TRUE, + origin, region, 0, 0, srcData, 0, + nullptr, nullptr); + test_error(err, "Failed calling clEnqueueWriteImage"); + + err = clEnqueueReleaseExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clReleaseExternalMemObject failed"); + + clFinish(queue); + + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + void *hardware_buffer_data = nullptr; + ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, + nullptr, &hardware_buffer_data); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + auto out_image_ptr = static_cast(hardware_buffer_data); + auto srcData_ptr = static_cast(srcData); + + const size_t scanlineSize = + imageInfo.width * get_pixel_size(imageInfo.format); + + // Count the number of bytes successfully matched + size_t total_matched = 0; + for (size_t line = 0; line < imageInfo.height; line++) + { + + if (memcmp(srcData_ptr, out_image_ptr, scanlineSize) != 0) + { + // Find the first differing pixel + const size_t pixel_size = + get_pixel_size(imageInfo.format); + size_t where = compare_scanlines( + &imageInfo, srcData_ptr, out_image_ptr); + if (where < imageInfo.width) + { + print_first_pixel_difference_error( + where, srcData_ptr + pixel_size * where, + out_image_ptr + pixel_size * where, &imageInfo, + line, 1); + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, + nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with " + "code %d\n", + ahb_result); + return TEST_FAIL; + } + return TEST_FAIL; + } + } + + total_matched += scanlineSize; + srcData_ptr += scanlineSize; // Data is tightly packed + out_image_ptr += hardware_buffer_desc.stride * pixelSize; + } + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + + if (total_matched == 0) + { + test_fail("Zero bytes matched"); + } + } + } + } + + return TEST_PASS; +} + +REGISTER_TEST(test_enqueue_fill_image) +{ + cl_int err = CL_SUCCESS; + RandomSeed seed(gRandomSeed); + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + GET_PFN(device, clEnqueueAcquireExternalMemObjectsKHR); + GET_PFN(device, clEnqueueReleaseExternalMemObjectsKHR); + + for (auto format : test_formats) + { + log_info("Testing %s\n", + ahardwareBufferFormatToString(format.aHardwareBufferFormat) + .c_str()); + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = format.aHardwareBufferFormat; + for (auto usage : test_usages) + { + // Filter out usage flags that are not readable on device + if (!isAHBUsageReadable(usage.usageFlags)) + { + continue; + } + + aHardwareBufferDesc.usage = usage.usageFlags; + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + + CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, + &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + // Determine AHB memory layout + AHardwareBuffer_Desc hardware_buffer_desc = {}; + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + test_assert_error(hardware_buffer_desc.width + == resolution.width, + "AHB has unexpected width"); + test_assert_error(hardware_buffer_desc.height + == resolution.height, + "AHB has unexpected height"); + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + clMemWrapper imported_image = clCreateImageWithProperties( + context, props, CL_MEM_READ_ONLY, nullptr, nullptr, nullptr, + &err); + test_error(err, + "Failed to create CL image from AHardwareBuffer"); + + // Create image info struct + size_t pixelSize = get_pixel_size(&format.clImageFormat); + image_descriptor imageInfo = { 0 }; + imageInfo.format = &format.clImageFormat; + imageInfo.type = format.clMemObjectType; + imageInfo.width = resolution.width; + imageInfo.height = resolution.height; + imageInfo.rowPitch = resolution.width * resolution.height + * pixelSize; // Data is tightly packed + test_assert_error(imageInfo.rowPitch + >= pixelSize * imageInfo.width, + "Row pitch is smaller than width"); + + size_t origin[] = { 0, 0, 0 }; + size_t region[] = { imageInfo.width, imageInfo.height, 1 }; + + auto verificationValue = static_cast(malloc(pixelSize)); + if (!verificationValue) + { + log_error( + "Unable to malloc %zu bytes for verificationValue", + pixelSize); + return TEST_FAIL; + } + + err = clEnqueueAcquireExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueAcquireExternalMemObjects failed"); + + // Generate pixel color and fill image + switch (format.clImageFormat.image_channel_data_type) + { + case CL_HALF_FLOAT: + DetectFloatToHalfRoundingMode( + queue); // Intentional drop-through + case CL_UNORM_INT8: { + auto pattern_decimal = + static_cast(genrand_real1(seed)); + cl_float fillColor[4] = { pattern_decimal, + pattern_decimal, + pattern_decimal, + pattern_decimal }; + + err = clEnqueueFillImage(queue, imported_image, + fillColor, origin, region, 0, + nullptr, nullptr); + test_error(err, "Failed calling clEnqueueFillImage"); + + pack_image_pixel(fillColor, &format.clImageFormat, + verificationValue); + break; + } + case CL_UNSIGNED_INT16: { + const cl_uint pattern_whole = genrand_int32(seed); + cl_uint fillColor[4] = { pattern_whole, pattern_whole, + pattern_whole, pattern_whole }; + + err = clEnqueueFillImage(queue, imported_image, + fillColor, origin, region, 0, + nullptr, nullptr); + test_error(err, "Failed calling clEnqueueFillImage"); + + pack_image_pixel(fillColor, &format.clImageFormat, + verificationValue); + break; + } + default: + log_info("Unsupported image channel data type"); + continue; + } + + err = clEnqueueReleaseExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueReleaseExternalMemObjects failed"); + + clFinish(queue); + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + void *hardware_buffer_data = nullptr; + ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, + nullptr, &hardware_buffer_data); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + auto out_image_ptr = static_cast(hardware_buffer_data); + const size_t scanlineSize = imageInfo.width * pixelSize; + + + auto verificationLine = + static_cast(malloc(pixelSize * scanlineSize)); + if (!verificationLine) + { + free(verificationValue); + log_error("Unable to malloc %zu bytes for verificationLine", + pixelSize * scanlineSize); + return TEST_FAIL; + } + char *index = verificationLine; + for (size_t x = 0; x < imageInfo.width; x++) + { + memcpy(index, verificationValue, pixelSize); + index += pixelSize; + } + + free(verificationValue); + + // Count the number of bytes successfully matched + size_t total_matched = 0; + for (size_t line = 0; line < imageInfo.height; line++) + { + + if (memcmp(verificationLine, out_image_ptr, scanlineSize) + != 0) + { + // Find the first differing pixel + const size_t pixel_size = + get_pixel_size(imageInfo.format); + size_t where = compare_scanlines( + &imageInfo, verificationLine, out_image_ptr); + if (where < imageInfo.width) + { + print_first_pixel_difference_error( + where, verificationLine + pixel_size * where, + out_image_ptr + pixel_size * where, &imageInfo, + line, 1); + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, + nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with " + "code %d\n", + ahb_result); + free(verificationLine); + return TEST_FAIL; + } + free(verificationLine); + return TEST_FAIL; + } + } + + total_matched += scanlineSize; + out_image_ptr += hardware_buffer_desc.stride * pixelSize; + } + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + free(verificationLine); + return TEST_FAIL; + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + free(verificationLine); + + if (total_matched == 0) + { + test_fail("Zero bytes matched"); + } + } + } + } + + return TEST_PASS; +} + +REGISTER_TEST(test_blob) +{ + cl_int err = CL_SUCCESS; + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB; + aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; + + log_info("Testing %s\n", + ahardwareBufferFormatToString(static_cast( + aHardwareBufferDesc.format)) + .c_str()); + + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width * resolution.height; + aHardwareBufferDesc.height = 1; + aHardwareBufferDesc.layers = 1; + aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; + + if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc)) + { + std::string usage_string = ahardwareBufferDecodeUsageFlagsToString( + static_cast( + aHardwareBufferDesc.usage)); + log_info("Unsupported format %s, usage flags %s\n", + ahardwareBufferFormatToString( + static_cast( + aHardwareBufferDesc.format)) + .c_str(), + usage_string.c_str()); + continue; + } + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = + AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + cl_mem buffer = clCreateBufferWithProperties( + context, props, CL_MEM_READ_WRITE, 0, nullptr, &err); + test_error(err, "Failed to create CL buffer from AHardwareBuffer"); + + test_error(clReleaseMemObject(buffer), "Failed to release buffer"); + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + } + + return TEST_PASS; +} diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb_negative.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb_negative.cpp new file mode 100644 index 00000000..b689b903 --- /dev/null +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb_negative.cpp @@ -0,0 +1,246 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "harness/compat.h" +#include "harness/kernelHelpers.h" +#include "harness/imageHelpers.h" +#include "harness/errorHelpers.h" +#include +#include "debug_ahb.h" + +REGISTER_TEST(test_buffer_format_negative) +{ + cl_int err = CL_SUCCESS; + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM; + aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; + aHardwareBufferDesc.width = 64; + aHardwareBufferDesc.height = 1; + aHardwareBufferDesc.layers = 1; + aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; + + if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc)) + { + const std::string usage_string = + ahardwareBufferDecodeUsageFlagsToString( + static_cast( + aHardwareBufferDesc.usage)); + log_info( + "Unsupported format %s, usage flags %s\n", + ahardwareBufferFormatToString( + static_cast(aHardwareBufferDesc.format)) + .c_str(), + usage_string.c_str()); + return TEST_SKIPPED_ITSELF; + } + + AHardwareBuffer *aHardwareBuffer = nullptr; + const int ahb_result = + AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result); + return TEST_FAIL; + } + log_info("Testing %s\n", + ahardwareBufferFormatToString(static_cast( + aHardwareBufferDesc.format)) + .c_str()); + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + cl_mem buffer = clCreateBufferWithProperties( + context, props, CL_MEM_READ_WRITE, 0, nullptr, &err); + test_assert_error(err == CL_INVALID_OPERATION, + "To create a buffer the aHardwareFormat must be " + "AHARDWAREBUFFER_FORMAT_BLOB"); + + if (buffer != nullptr) + { + test_error(clReleaseMemObject(buffer), "Failed to release buffer"); + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + + return TEST_PASS; +} + +REGISTER_TEST(test_buffer_size_negative) +{ + cl_int err = CL_SUCCESS; + constexpr size_t buffer_size = 64; + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB; + aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; + aHardwareBufferDesc.width = buffer_size; + aHardwareBufferDesc.height = 1; + aHardwareBufferDesc.layers = 1; + aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; + + if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc)) + { + const std::string usage_string = + ahardwareBufferDecodeUsageFlagsToString( + static_cast( + aHardwareBufferDesc.usage)); + log_info( + "Unsupported format %s, usage flags %s\n", + ahardwareBufferFormatToString( + static_cast(aHardwareBufferDesc.format)) + .c_str(), + usage_string.c_str()); + return TEST_SKIPPED_ITSELF; + } + + AHardwareBuffer *aHardwareBuffer = nullptr; + const int ahb_result = + AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result); + return TEST_FAIL; + } + log_info("Testing %s\n", + ahardwareBufferFormatToString(static_cast( + aHardwareBufferDesc.format)) + .c_str()); + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + cl_mem buffer = clCreateBufferWithProperties( + context, props, CL_MEM_READ_WRITE, buffer_size / 2, nullptr, &err); + test_assert_error(err == CL_INVALID_BUFFER_SIZE, + "Wrong error value returned"); + + if (buffer != nullptr) + { + test_error(clReleaseMemObject(buffer), "Failed to release buffer"); + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + + return TEST_PASS; +} + +REGISTER_TEST(test_images_negative) +{ + cl_int err = CL_SUCCESS; + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM; + aHardwareBufferDesc.usage = static_cast( + AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN + | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN + | AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE + | AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER); + aHardwareBufferDesc.width = 64; + aHardwareBufferDesc.height = 64; + aHardwareBufferDesc.layers = 1; + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = + AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result); + return TEST_FAIL; + } + + const cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + constexpr cl_image_format image_format = { CL_RGBA, CL_UNORM_INT8 }; + cl_mem image = + clCreateImageWithProperties(context, props, CL_MEM_READ_WRITE, + &image_format, nullptr, nullptr, &err); + test_assert_error(err == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, + "Wrong error value returned"); + if (image != nullptr) + { + test_error(clReleaseMemObject(image), "Failed to release image"); + } + + constexpr cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, 64, 64 }; + image = clCreateImageWithProperties(context, props, CL_MEM_READ_WRITE, + nullptr, &image_desc, nullptr, &err); + test_assert_error(err == CL_INVALID_IMAGE_DESCRIPTOR, + "Wrong error value returned"); + if (image != nullptr) + { + test_error(clReleaseMemObject(image), "Failed to release image"); + } + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + + return TEST_PASS; +} \ No newline at end of file From e2580bded220c6ac9786a1008b479aa96d32d359 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 5 Aug 2025 23:40:11 +0200 Subject: [PATCH 29/61] Use CL_KERNEL_WORK_GROUP_SIZE more often (#2435) Drivers _may_ choose to advertise values for `CL_DEVICE_MAX_WORK_GROUP_SIZE` or `CL_DEVICE_MAX_WORK_ITEM_SIZES` that kernels without a `reqd_work_group_size` are not able to be launched with. The CTS should therefore make sure that the local_size passed to `clEnqueueNDRangeKernel` does not exceed `CL_KERNEL_WORK_GROUP_SIZE` This fixes it up in two places I've noticed this not happening. --- .../api/test_sub_group_dispatch.cpp | 6 +++- .../profiling/execute_multipass.cpp | 30 +++++++++---------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/test_conformance/api/test_sub_group_dispatch.cpp b/test_conformance/api/test_sub_group_dispatch.cpp index 70a78f4a..c1b88be5 100644 --- a/test_conformance/api/test_sub_group_dispatch.cpp +++ b/test_conformance/api/test_sub_group_dispatch.cpp @@ -108,7 +108,11 @@ REGISTER_TEST_VERSION(sub_group_dispatch, Version(2, 1)) nullptr); test_error(error, "clGetDeviceInfo failed"); - max_local = max_work_item_sizes[0]; + error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, + sizeof(max_local), &max_local, nullptr); + test_error(error, "clGetKernelWorkGroupInfo failed"); + + max_local = std::min(max_local, max_work_item_sizes[0]); error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL); diff --git a/test_conformance/profiling/execute_multipass.cpp b/test_conformance/profiling/execute_multipass.cpp index 7d654ca5..7a711e6f 100644 --- a/test_conformance/profiling/execute_multipass.cpp +++ b/test_conformance/profiling/execute_multipass.cpp @@ -107,21 +107,6 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue threads[1] = h; threads[2] = d; - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, - 3 * sizeof(size_t), (size_t *)localThreads, NULL); - test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES) failed"); - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), - &maxWorkgroupSize, NULL); - test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed"); - - localThreads[0] = - std::min({ localThreads[0], threads[0], maxWorkgroupSize }); - localThreads[1] = std::min( - { localThreads[1], threads[1], maxWorkgroupSize / localThreads[0] }); - localThreads[2] = - std::min({ localThreads[2], threads[2], - maxWorkgroupSize / (localThreads[0] * localThreads[1]) }); - clSamplerWrapper sampler = clCreateSampler( context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err); test_error(err, "clCreateSampler failed"); @@ -143,6 +128,21 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue &read3d_kernel_code, "read3d"); test_error(err, "create_single_kernel_helper failed"); + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, + 3 * sizeof(size_t), (size_t *)localThreads, NULL); + test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES) failed"); + err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, + sizeof(size_t), &maxWorkgroupSize, NULL); + test_error(err, "clGetDeviceInfo(CL_KERNEL_WORK_GROUP_SIZE) failed\n"); + + localThreads[0] = + std::min({ localThreads[0], threads[0], maxWorkgroupSize }); + localThreads[1] = std::min( + { localThreads[1], threads[1], maxWorkgroupSize / localThreads[0] }); + localThreads[2] = + std::min({ localThreads[2], threads[2], + maxWorkgroupSize / (localThreads[0] * localThreads[1]) }); + // create kernel args object and set arg values. // set the args values err |= clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobjs[0]); From ac63117231d1691f9aecd4aa9b7ce29a4be4350a Mon Sep 17 00:00:00 2001 From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com> Date: Thu, 7 Aug 2025 15:12:27 +0100 Subject: [PATCH 30/61] Fix compiler warnings in test_vulkan (#2474) Use the correct print format specifiers for `uint64_t` and fix loop index type to address sign warning. Signed-off-by: Ahmed Hesham --- test_conformance/vulkan/test_vulkan_api_consistency.cpp | 3 ++- .../vulkan/test_vulkan_api_consistency_for_1dimages.cpp | 3 ++- .../vulkan/test_vulkan_api_consistency_for_3dimages.cpp | 3 ++- test_conformance/vulkan/test_vulkan_interop_buffer.cpp | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp index 5305e48c..bd19987b 100644 --- a/test_conformance/vulkan/test_vulkan_api_consistency.cpp +++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include "harness/testHarness.h" #include "harness/typeWrappers.h" #include "harness/deviceInfo.h" @@ -236,7 +237,7 @@ struct ConsistencyExternalImageTest : public VulkanTestBase log_info("Memory type index: %u\n", (uint32_t)memoryTypeList[0]); log_info("Memory type property: %d\n", memoryTypeList[0].getMemoryTypeProperty()); - log_info("Image size : %ld\n", vkImage2D.getSize()); + log_info("Image size : %" PRIu64 "\n", vkImage2D.getSize()); VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory(*vkDevice, vkImage2D, memoryTypeList[0], diff --git a/test_conformance/vulkan/test_vulkan_api_consistency_for_1dimages.cpp b/test_conformance/vulkan/test_vulkan_api_consistency_for_1dimages.cpp index ef7ae348..c979fb19 100644 --- a/test_conformance/vulkan/test_vulkan_api_consistency_for_1dimages.cpp +++ b/test_conformance/vulkan/test_vulkan_api_consistency_for_1dimages.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include "harness/testHarness.h" #include "harness/typeWrappers.h" #include "harness/deviceInfo.h" @@ -94,7 +95,7 @@ struct ConsistencyExternalImage1DTest : public VulkanTestBase log_info("Memory type index: %u\n", (uint32_t)memoryTypeList[0]); log_info("Memory type property: %d\n", memoryTypeList[0].getMemoryTypeProperty()); - log_info("Image size : %lu\n", vkImage1D.getSize()); + log_info("Image size : %" PRIu64 "\n", vkImage1D.getSize()); VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory(*vkDevice, vkImage1D, memoryTypeList[0], diff --git a/test_conformance/vulkan/test_vulkan_api_consistency_for_3dimages.cpp b/test_conformance/vulkan/test_vulkan_api_consistency_for_3dimages.cpp index 3d697b0e..a21eea7b 100644 --- a/test_conformance/vulkan/test_vulkan_api_consistency_for_3dimages.cpp +++ b/test_conformance/vulkan/test_vulkan_api_consistency_for_3dimages.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include "harness/testHarness.h" #include "harness/typeWrappers.h" #include "harness/deviceInfo.h" @@ -96,7 +97,7 @@ struct ConsistencyExternalImage3DTest : public VulkanTestBase log_info("Memory type index: %u\n", (uint32_t)memoryTypeList[0]); log_info("Memory type property: %d\n", memoryTypeList[0].getMemoryTypeProperty()); - log_info("Image size : %lu\n", vkImage3D.getSize()); + log_info("Image size : %" PRIu64 "\n", vkImage3D.getSize()); VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory(*vkDevice, vkImage3D, memoryTypeList[0], diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp index f50c457a..e39641f1 100644 --- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp @@ -1226,7 +1226,7 @@ int run_test_with_multi_import_diff_ctx( vkCommandBuffer.dispatch(512, 1, 1); vkCommandBuffer.end(); - for (uint32_t i = 0; i < numImports; i++) + for (int i = 0; i < numImports; i++) { update_buffer_kernel1[i] = (numBuffers == 1) ? kernel1[0] From b8f9581e89569225539614aeece66e19c2076035 Mon Sep 17 00:00:00 2001 From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com> Date: Thu, 7 Aug 2025 20:48:34 +0100 Subject: [PATCH 31/61] Update the GitHub workflow for Android (#2476) Use the latest stable LTS version of the NDK (r28c) and add `ANDROID_PLATFORM` to the CMake configuration options, set to 29, so that Android Hardware Buffer tests can be compiled. Signed-off-by: Ahmed Hesham --- .github/workflows/presubmit.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 66b03df2..2debc6de 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -69,12 +69,13 @@ jobs: - name: Install Android NDK if: ${{ matrix.arch == 'android-arm' || matrix.arch == 'android-aarch64' }} run: | - wget https://dl.google.com/android/repository/android-ndk-r27c-linux.zip -O android-ndk.zip + wget https://dl.google.com/android/repository/android-ndk-r28c-linux.zip -O android-ndk.zip unzip android-ndk.zip -d $HOME - export ANDROID_NDK=$HOME/android-ndk-r27c + export ANDROID_NDK=$HOME/android-ndk-r28c echo "ANDROID_NDK=$ANDROID_NDK" >> $GITHUB_ENV export ANDROID_ARCH_ABI=${{ matrix.android_arch_abi }} echo "ANDROID_ARCH_ABI=$ANDROID_ARCH_ABI" >> $GITHUB_ENV + echo "ANDROID_PLATFORM=29" >> $GITHUB_ENV - name: Prepare CMake Toolchain file shell: bash run: | @@ -107,7 +108,7 @@ jobs: if: ${{ matrix.arch == 'android-arm' || matrix.arch == 'android-aarch64' }} shell: bash run: | - echo "CMAKE_CONFIG_ARGS_ANDROID=-DCMAKE_ANDROID_ARCH_ABI=${ANDROID_ARCH_ABI}" >> $GITHUB_ENV + echo "CMAKE_CONFIG_ARGS_ANDROID=-DCMAKE_ANDROID_ARCH_ABI=${ANDROID_ARCH_ABI} -DANDROID_PLATFORM=${ANDROID_PLATFORM}" >> $GITHUB_ENV - name: Fetch and build OpenCL ICD Loader shell: bash run: | @@ -119,7 +120,7 @@ jobs: -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ -DOPENCL_ICD_LOADER_HEADERS_DIR='${{ github.workspace }}'/OpenCL-Headers/ \ - "${CMAKE_CONFIG_ARGS_ANDROID}" + ${CMAKE_CONFIG_ARGS_ANDROID} cmake --build . --parallel - name: Fetch Vulkan Headers shell: bash @@ -168,7 +169,7 @@ jobs: -DVULKAN_IS_SUPPORTED=ON \ -DVULKAN_INCLUDE_DIR='${{ github.workspace }}'/Vulkan-Headers/include/ \ -DVULKAN_LIB_DIR='${{ github.workspace }}'/Vulkan-Loader/build/loader/ \ - "${CMAKE_CONFIG_ARGS_ANDROID}" + ${CMAKE_CONFIG_ARGS_ANDROID} cmake --build . --parallel formatcheck: name: Check code format From 3fd572dec4eef532dec75ccd499ac17f9755d030 Mon Sep 17 00:00:00 2001 From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com> Date: Thu, 7 Aug 2025 20:49:06 +0100 Subject: [PATCH 32/61] Fix compilation errors cl_khr_external_memory_ahb (#2475) Update the test to use `CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR` instead of `CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR` to match the headers. Handle missing format in switch statement. Signed-off-by: Ahmed Hesham --- .../cl_khr_external_memory_ahb/debug_ahb.cpp | 5 ++++- .../cl_khr_external_memory_ahb/test_ahb.cpp | 18 +++++++++--------- .../test_ahb_negative.cpp | 8 ++++---- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.cpp index 3964c0f6..e0ca6615 100644 --- a/test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.cpp +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.cpp @@ -173,6 +173,9 @@ std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format) case AHARDWAREBUFFER_FORMAT_YCbCr_P010: result = "AHARDWAREBUFFER_FORMAT_YCbCr_P010"; break; + case AHARDWAREBUFFER_FORMAT_YCbCr_P210: + result = "AHARDWAREBUFFER_FORMAT_YCbCr_P210"; + break; case AHARDWAREBUFFER_FORMAT_R8_UNORM: result = "AHARDWAREBUFFER_FORMAT_R8_UNORM"; break; @@ -187,4 +190,4 @@ std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format) break; } return result; -} \ No newline at end of file +} diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp index ada80027..f0747d0e 100644 --- a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp @@ -145,7 +145,7 @@ REGISTER_TEST(test_images) } const cl_mem_properties props[] = { - CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, reinterpret_cast(aHardwareBuffer), 0 }; @@ -300,7 +300,7 @@ REGISTER_TEST(test_images_read) } cl_mem_properties props[] = { - CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, reinterpret_cast(aHardwareBuffer), 0 }; @@ -602,7 +602,7 @@ REGISTER_TEST(test_enqueue_read_image) } const cl_mem_properties props[] = { - CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, reinterpret_cast(aHardwareBuffer), 0 }; @@ -786,7 +786,7 @@ REGISTER_TEST(test_enqueue_copy_image) } cl_mem_properties props[] = { - CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, reinterpret_cast(aHardwareBuffer), 0 }; @@ -1095,7 +1095,7 @@ REGISTER_TEST(test_enqueue_copy_image_to_buffer) } cl_mem_properties props[] = { - CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, reinterpret_cast(aHardwareBuffer), 0 }; @@ -1274,7 +1274,7 @@ REGISTER_TEST(test_enqueue_copy_buffer_to_image) test_error(err, "Failed to create CL buffer"); cl_mem_properties props[] = { - CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, reinterpret_cast(aHardwareBuffer), 0 }; @@ -1452,7 +1452,7 @@ REGISTER_TEST(test_enqueue_write_image) cl_mem_properties props[] = { - CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, reinterpret_cast(aHardwareBuffer), 0 }; @@ -1649,7 +1649,7 @@ REGISTER_TEST(test_enqueue_fill_image) "AHB has unexpected height"); cl_mem_properties props[] = { - CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, reinterpret_cast(aHardwareBuffer), 0 }; @@ -1894,7 +1894,7 @@ REGISTER_TEST(test_blob) } cl_mem_properties props[] = { - CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, reinterpret_cast(aHardwareBuffer), 0 }; diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb_negative.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb_negative.cpp index b689b903..ca010b58 100644 --- a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb_negative.cpp +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb_negative.cpp @@ -77,7 +77,7 @@ REGISTER_TEST(test_buffer_format_negative) .c_str()); cl_mem_properties props[] = { - CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, reinterpret_cast(aHardwareBuffer), 0 }; @@ -155,7 +155,7 @@ REGISTER_TEST(test_buffer_size_negative) .c_str()); cl_mem_properties props[] = { - CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, reinterpret_cast(aHardwareBuffer), 0 }; @@ -215,7 +215,7 @@ REGISTER_TEST(test_images_negative) } const cl_mem_properties props[] = { - CL_EXTERNAL_MEMORY_HANDLE_AHB_KHR, + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, reinterpret_cast(aHardwareBuffer), 0 }; @@ -243,4 +243,4 @@ REGISTER_TEST(test_images_negative) aHardwareBuffer = nullptr; return TEST_PASS; -} \ No newline at end of file +} From 2e0f8036990e209f216a1def553cfce1b8a874dd Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Fri, 8 Aug 2025 23:06:15 +0200 Subject: [PATCH 33/61] Added test to verify negative result of clSetKernelArg with CL_INVALID_ARG_SIZE and local qalified argument (#2449) Related to #2282, according to work plan from [here](https://github.com/KhronosGroup/OpenCL-CTS/issues/2282#issuecomment-3069182773) --- test_conformance/api/test_kernels.cpp | 32 +++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/test_conformance/api/test_kernels.cpp b/test_conformance/api/test_kernels.cpp index a8d02558..fc619bf8 100644 --- a/test_conformance/api/test_kernels.cpp +++ b/test_conformance/api/test_kernels.cpp @@ -87,6 +87,14 @@ const char *sample_two_kernel_program[] = { "\n" "}\n" }; +const char *sample_local_size_test_kernel = R"( + __kernel void local_size_test(__local int *src, __global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = src[tid]; + } +)"; + const char *sample_read_only_image_test_kernel = R"( __kernel void read_only_image_test(__write_only image2d_t img, __global uint4 *src) { @@ -718,6 +726,30 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg) return TEST_PASS; } +REGISTER_TEST(negative_invalid_arg_size_local) +{ + cl_int error = CL_SUCCESS; + clProgramWrapper program; + clKernelWrapper local_arg_kernel; + + // Setup the test + error = create_single_kernel_helper( + context, &program, nullptr, 1, &sample_local_size_test_kernel, nullptr); + test_error(error, "Unable to build test program"); + + local_arg_kernel = clCreateKernel(program, "local_size_test", &error); + test_error(error, "Unable to get local_size_test kernel for built program"); + + // Run the test + error = clSetKernelArg(local_arg_kernel, 0, 0, nullptr); + test_failure_error_ret( + error, CL_INVALID_ARG_SIZE, + "clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when 0 is " + "passed to a local qualifier kernel argument", + TEST_FAIL); + return TEST_PASS; +} + REGISTER_TEST(negative_set_read_write_image_arg) { cl_int error = CL_SUCCESS; From 4115d04ae0e8cc5d7c1c07265ba620f3b13a74c3 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 12 Aug 2025 17:40:13 +0200 Subject: [PATCH 34/61] Added test to verify negative result of clSetKernelArg with CL_INVALID_ARG_INDEX (#2458) Related to #2282, according to work plan from [here](https://github.com/KhronosGroup/OpenCL-CTS/issues/2282#issuecomment-3069182773) --- test_conformance/api/test_kernels.cpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/test_conformance/api/test_kernels.cpp b/test_conformance/api/test_kernels.cpp index fc619bf8..1446ade8 100644 --- a/test_conformance/api/test_kernels.cpp +++ b/test_conformance/api/test_kernels.cpp @@ -726,6 +726,31 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg) return TEST_PASS; } +REGISTER_TEST(negative_invalid_arg_index) +{ + cl_int error = CL_SUCCESS; + clProgramWrapper program; + clKernelWrapper kernel; + + // Setup the test + error = create_single_kernel_helper(context, &program, nullptr, 1, + sample_single_test_kernel, nullptr); + test_error(error, "Unable to build test program"); + + kernel = clCreateKernel(program, "sample_test", &error); + test_error(error, "Unable to get sample_test kernel for built program"); + + // Run the test - 2 index is out or range - expected CL_INVALID_ARG_INDEX + error = clSetKernelArg(kernel, 2, sizeof(cl_mem), nullptr); + test_failure_error_ret( + error, CL_INVALID_ARG_INDEX, + "clSetKernelArg is supposed to fail with CL_INVALID_ARG_INDEX when " + "arg_index is not a valid argument index", + TEST_FAIL); + + return TEST_PASS; +} + REGISTER_TEST(negative_invalid_arg_size_local) { cl_int error = CL_SUCCESS; From aef863afa212091cee653e2f9d4fabf826c7f227 Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Tue, 12 Aug 2025 09:46:23 -0600 Subject: [PATCH 35/61] Support building for Windows on 64-bit Arm (#2355) Support to build for Windows on Arm. --- .github/workflows/presubmit.yml | 2 +- test_common/harness/ThreadPool.cpp | 13 ++++ test_common/harness/conversions.cpp | 8 +-- test_common/harness/fpcontrol.h | 61 ++++++++++++------- test_common/harness/msvc9.c | 2 + test_common/harness/rounding_mode.cpp | 22 +++++-- .../conversions/basic_test_conversions.cpp | 6 +- .../conversions/conversions_data_info.h | 6 +- .../math_brute_force/reference_math.cpp | 15 ++--- 9 files changed, 86 insertions(+), 49 deletions(-) diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 2debc6de..bfeb322b 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -10,7 +10,7 @@ jobs: matrix: build-type: [Release] gl: [0] - os: [ubuntu-22.04, macos-latest, windows-latest] + os: [ubuntu-22.04, macos-latest, windows-latest, windows-11-arm] include: - os: ubuntu-22.04 gl: 1 diff --git a/test_common/harness/ThreadPool.cpp b/test_common/harness/ThreadPool.cpp index fb1291d6..fab778c8 100644 --- a/test_common/harness/ThreadPool.cpp +++ b/test_common/harness/ThreadPool.cpp @@ -436,7 +436,14 @@ void *ThreadPool_WorkerFunc(void *p) // drop run count to 0 gRunCount = 0; +#if defined(_M_IX86) || defined(_M_X64) _mm_mfence(); +#elif defined(_M_ARM64) + __dmb(_ARM64_BARRIER_ISHST); +#else +#error Architecture needs an implementation +#endif + #else if (pthread_mutex_lock(&gAtomicLock)) log_error( @@ -703,7 +710,13 @@ void ThreadPool_Exit(void) // http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins __sync_synchronize(); #elif defined(_MSC_VER) +#if defined(_M_IX86) || defined(_M_X64) _mm_mfence(); +#elif defined(_M_ARM64) + __dmb(_ARM64_BARRIER_ISHST); +#else +#error Architecture needs an implementation +#endif #else #warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed #endif diff --git a/test_common/harness/conversions.cpp b/test_common/harness/conversions.cpp index 18c2869d..e0e326ff 100644 --- a/test_common/harness/conversions.cpp +++ b/test_common/harness/conversions.cpp @@ -23,10 +23,10 @@ #include -#if defined(__SSE__) || defined(_MSC_VER) +#if defined(__SSE__) || _M_IX86_FP == 1 #include #endif -#if defined(__SSE2__) || defined(_MSC_VER) +#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64) #include #endif @@ -110,7 +110,7 @@ static long lrintf_clamped(float f) volatile float x = f; float magicVal = magic[f < 0]; -#if defined(__SSE__) || defined(_WIN32) +#if defined(__SSE__) || _M_IX86_FP == 1 // Defeat x87 based arithmetic, which cant do FTZ, and will round this // incorrectly __m128 v = _mm_set_ss(x); @@ -150,7 +150,7 @@ static long lrint_clamped(double f) { volatile double x = f; double magicVal = magic[f < 0]; -#if defined(__SSE2__) || (defined(_MSC_VER)) +#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64) // Defeat x87 based arithmetic, which cant do FTZ, and will round this // incorrectly __m128d v = _mm_set_sd(x); diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h index 12aba0a9..afb0f5a3 100644 --- a/test_common/harness/fpcontrol.h +++ b/test_common/harness/fpcontrol.h @@ -37,36 +37,44 @@ typedef int FPU_mode_type; #else typedef int64_t FPU_mode_type; #endif -#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ - || defined(__MINGW32__) +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ + || defined(_M_X64) || defined(__MINGW32__) #include +#elif defined(_M_ARM64) +#include #elif defined(__PPC__) #include extern __thread fpu_control_t fpu_control; #elif defined(__mips__) #include "mips/m32c1.h" #endif + // Set the reference hardware floating point unit to FTZ mode -inline void ForceFTZ(FPU_mode_type *mode) +inline void ForceFTZ(FPU_mode_type *oldMode) { -#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ - || defined(__MINGW32__) - *mode = _mm_getcsr(); - _mm_setcsr(*mode | 0x8040); +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ + || defined(_M_X64) || defined(__MINGW32__) + *oldMode = _mm_getcsr(); + _mm_setcsr(*oldMode | 0x8040); #elif defined(__PPC__) - *mode = fpu_control; + *oldMode = fpu_control; fpu_control |= _FPU_MASK_NI; #elif defined(__arm__) unsigned fpscr; __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr)); - *mode = fpscr; + *oldMode = fpscr; __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24))); // Add 64 bit support -#elif defined(__aarch64__) +#elif defined(__aarch64__) // Clang uint64_t fpscr; __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr)); - *mode = fpscr; + *oldMode = fpscr; __asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24))); +#elif defined(_M_ARM64) // Visual Studio + uint64_t fpscr; + fpscr = _ReadStatusReg(ARM64_FPSR); + *oldMode = fpscr; + _WriteStatusReg(ARM64_FPCR, fpscr | (1U << 24)); #elif defined(__mips__) fpa_bissr(FPA_CSR_FS); #else @@ -75,26 +83,31 @@ inline void ForceFTZ(FPU_mode_type *mode) } // Disable the denorm flush to zero -inline void DisableFTZ(FPU_mode_type *mode) +inline void DisableFTZ(FPU_mode_type *oldMode) { -#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ - || defined(__MINGW32__) - *mode = _mm_getcsr(); - _mm_setcsr(*mode & ~0x8040); +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ + || defined(_M_X64) || defined(__MINGW32__) + *oldMode = _mm_getcsr(); + _mm_setcsr(*oldMode & ~0x8040); #elif defined(__PPC__) *mode = fpu_control; fpu_control &= ~_FPU_MASK_NI; #elif defined(__arm__) unsigned fpscr; __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr)); - *mode = fpscr; + *oldMode = fpscr; __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24))); // Add 64 bit support -#elif defined(__aarch64__) +#elif defined(__aarch64__) // Clang uint64_t fpscr; __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr)); - *mode = fpscr; + *oldMode = fpscr; __asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24))); +#elif defined(_M_ARM64) // Visual Studio + uint64_t fpscr; + fpscr = _ReadStatusReg(ARM64_FPSR); + *oldMode = fpscr; + _WriteStatusReg(ARM64_FPCR, fpscr & ~(1U << 24)); #elif defined(__mips__) fpa_bicsr(FPA_CSR_FS); #else @@ -105,16 +118,18 @@ inline void DisableFTZ(FPU_mode_type *mode) // Restore the reference hardware to floating point state indicated by *mode inline void RestoreFPState(FPU_mode_type *mode) { -#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ - || defined(__MINGW32__) +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ + || defined(_M_X64) || defined(__MINGW32__) _mm_setcsr(*mode); #elif defined(__PPC__) fpu_control = *mode; #elif defined(__arm__) __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode)); // Add 64 bit support -#elif defined(__aarch64__) +#elif defined(__aarch64__) // Clang __asm__ volatile("msr fpcr, %0" ::"r"(*mode)); +#elif defined(_M_ARM64) // Visual Studio + _WriteStatusReg(ARM64_FPCR, *mode); #elif defined(__mips__) // Mips runs by default with DAZ=1 FTZ=1 #else @@ -125,4 +140,4 @@ inline void RestoreFPState(FPU_mode_type *mode) #error ForceFTZ and RestoreFPState need implentations #endif -#endif +#endif \ No newline at end of file diff --git a/test_common/harness/msvc9.c b/test_common/harness/msvc9.c index ef70035f..c0042928 100644 --- a/test_common/harness/msvc9.c +++ b/test_common/harness/msvc9.c @@ -786,7 +786,9 @@ int __builtin_clz(unsigned int pattern) #endif // !__has_builtin(__builtin_clz) #include +#if !defined(_M_ARM64) #include +#endif int usleep(int usec) { diff --git a/test_common/harness/rounding_mode.cpp b/test_common/harness/rounding_mode.cpp index b2e443b7..5aeb86f1 100644 --- a/test_common/harness/rounding_mode.cpp +++ b/test_common/harness/rounding_mode.cpp @@ -193,7 +193,8 @@ RoundingMode get_round(void) // basic_test_conversions.c in which case, these function are at // liberty to do nothing. // -#if defined(__i386__) || defined(__x86_64__) || defined(_WIN32) +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ + || defined(_M_X64) #include #elif defined(__PPC__) #include @@ -203,18 +204,24 @@ RoundingMode get_round(void) void *FlushToZero(void) { #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32) -#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ + || defined(_M_X64) union { unsigned int i; void *p; } u = { _mm_getcsr() }; _mm_setcsr(u.i | 0x8040); return u.p; -#elif defined(__arm__) || defined(__aarch64__) +#elif defined(__arm__) || defined(__aarch64__) // Clang int64_t fpscr; _FPU_GETCW(fpscr); _FPU_SETCW(fpscr | FPSCR_FZ); return NULL; +#elif defined(_M_ARM64) // Visual Studio + uint64_t fpscr; + fpscr = _ReadStatusReg(ARM64_FPSR); + _WriteStatusReg(ARM64_FPCR, fpscr | (1U << 24)); + return NULL; #elif defined(__PPC__) fpu_control_t flags = 0; _FPU_GETCW(flags); @@ -237,16 +244,21 @@ void *FlushToZero(void) void UnFlushToZero(void *p) { #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32) -#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ + || defined(_M_X64) union { void *p; unsigned int i; } u = { p }; _mm_setcsr(u.i); -#elif defined(__arm__) || defined(__aarch64__) +#elif defined(__arm__) || defined(__aarch64__) // Clang int64_t fpscr; _FPU_GETCW(fpscr); _FPU_SETCW(fpscr & ~FPSCR_FZ); +#elif defined(_M_ARM64) // Visual Studio + uint64_t fpscr; + fpscr = _ReadStatusReg(ARM64_FPSR); + _WriteStatusReg(ARM64_FPCR, fpscr & ~(1U << 24)); #elif defined(__PPC__) fpu_control_t flags = 0; _FPU_GETCW(flags); diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp index d4f6d366..4692c4b4 100644 --- a/test_conformance/conversions/basic_test_conversions.cpp +++ b/test_conformance/conversions/basic_test_conversions.cpp @@ -53,17 +53,17 @@ #include "basic_test_conversions.h" -#if defined(_WIN32) +#if defined(_M_IX86) || defined(_M_X64) #include #include -#else // !_WIN32 +#else #if defined(__SSE__) #include #endif #if defined(__SSE2__) #include #endif -#endif // _WIN32 +#endif cl_context gContext = NULL; cl_command_queue gQueue = NULL; diff --git a/test_conformance/conversions/conversions_data_info.h b/test_conformance/conversions/conversions_data_info.h index 9d2cbc60..46eb9c23 100644 --- a/test_conformance/conversions/conversions_data_info.h +++ b/test_conformance/conversions/conversions_data_info.h @@ -343,7 +343,7 @@ float DataInfoSpec::round_to_int(float f) volatile float x = f; float magicVal = magic[f < 0]; -#if defined(__SSE__) +#if defined(__SSE__) || _M_IX86_FP == 1 // Defeat x87 based arithmetic, which cant do FTZ, and will round this // incorrectly __m128 v = _mm_set_ss(x); @@ -376,7 +376,7 @@ DataInfoSpec::round_to_int_and_clamp(double f) { volatile double x = f; double magicVal = magic[f < 0]; -#if defined(__SSE2__) || defined(_MSC_VER) +#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64) // Defeat x87 based arithmetic, which cant do FTZ, and will round this // incorrectly __m128d v = _mm_set_sd(x); @@ -479,7 +479,7 @@ void DataInfoSpec::conv(OutType *out, InType *in) { if (std::is_same::value) { -#if defined(_MSC_VER) +#if defined(_M_IX86) || defined(_M_X64) double result; if (std::is_same::value) diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp index 45dd6526..a66e6f7e 100644 --- a/test_conformance/math_brute_force/reference_math.cpp +++ b/test_conformance/math_brute_force/reference_math.cpp @@ -25,12 +25,10 @@ #include "utility.h" -#if defined(__SSE__) \ - || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) +#if defined(__SSE__) || _M_IX86_FP == 1 #include #endif -#if defined(__SSE2__) \ - || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) +#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64) #include #endif @@ -855,8 +853,7 @@ double reference_add(double x, double y) volatile float a = (float)x; volatile float b = (float)y; -#if defined(__SSE__) \ - || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) +#if defined(__SSE__) || _M_IX86_FP == 1 // defeat x87 __m128 va = _mm_set_ss((float)a); __m128 vb = _mm_set_ss((float)b); @@ -953,8 +950,7 @@ double reference_subtract(double x, double y) { volatile float a = (float)x; volatile float b = (float)y; -#if defined(__SSE__) \ - || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) +#if defined(__SSE__) || _M_IX86_FP == 1 // defeat x87 __m128 va = _mm_set_ss((float)a); __m128 vb = _mm_set_ss((float)b); @@ -970,8 +966,7 @@ double reference_multiply(double x, double y) { volatile float a = (float)x; volatile float b = (float)y; -#if defined(__SSE__) \ - || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) +#if defined(__SSE__) || _M_IX86_FP == 1 // defeat x87 __m128 va = _mm_set_ss((float)a); __m128 vb = _mm_set_ss((float)b); From 86634c07f923f63fa557715bb1d86aa6ec69c571 Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Tue, 12 Aug 2025 17:46:59 +0200 Subject: [PATCH 36/61] fix test_api min_max_image_buffer_size (#2342) print a `log_info` and use the minimum value (`1`) for `pixelBytes` instead of printing an error and returning with an error value. It allows device exposing a big CL_DEVICE_IMAGE_MAX_BUFFER_SIZE (more than CL_DEVICE_MAX_MEM_ALLOC_SIZE/2) to pass test_api min_max_image_buffer_size Fix #2245 --- test_conformance/api/test_api_min_max.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp index cd0934d0..29677623 100644 --- a/test_conformance/api/test_api_min_max.cpp +++ b/test_conformance/api/test_api_min_max.cpp @@ -1088,10 +1088,12 @@ REGISTER_TEST(min_max_image_buffer_size) pixelBytes = maxAllocSize / maxDimensionPixels; if (pixelBytes == 0) { - log_error("Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than " - "CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image " - "of maximum size!\n"); - return -1; + log_info( + "Note, the value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is %zu pixels, " + "therefore the size of the allocated image may be larger than the " + "scaled CL_DEVICE_MAX_MEM_ALLOC_SIZE of %" PRIu64 " bytes.\n", + maxDimensionPixels, maxAllocSize); + pixelBytes = 1; } error = -1; From 555b7cd38302383efe768074eb6026f4f4af9869 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 13 Aug 2025 00:11:10 -0700 Subject: [PATCH 37/61] tests for cl_khr_spirv_queries (#2409) See: https://github.com/KhronosGroup/OpenCL-Docs/pull/1385 --- .github/workflows/presubmit.yml | 5 + CMakeLists.txt | 7 + README.md | 4 + test_conformance/api/CMakeLists.txt | 16 + .../api/generate_spirv_capability_deps.py | 102 +++ test_conformance/api/test_spirv_queries.cpp | 767 ++++++++++++++++++ 6 files changed, 901 insertions(+) create mode 100644 test_conformance/api/generate_spirv_capability_deps.py create mode 100644 test_conformance/api/test_spirv_queries.cpp diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index bfeb322b..594dc6f5 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -61,6 +61,10 @@ jobs: cd OpenCL-Headers ln -s CL OpenCL # For OSX builds cd .. + - name: Fetch SPIR-V Headers + shell: bash + run: | + git clone https://github.com/KhronosGroup/SPIRV-Headers.git - name: Install Vulkan SDK uses: humbletim/install-vulkan-sdk@main with: @@ -160,6 +164,7 @@ jobs: -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ -DCMAKE_CACHE_OPTIONS="-DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache" \ -DCL_INCLUDE_DIR='${{ github.workspace }}'/OpenCL-Headers \ + -DSPIRV_INCLUDE_DIR='${{ github.workspace }}'/SPIRV-Headers \ -DCL_LIB_DIR='${{ github.workspace }}'/OpenCL-ICD-Loader/build \ -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin \ diff --git a/CMakeLists.txt b/CMakeLists.txt index f34ade8e..90c343fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,6 +59,12 @@ else(CL_INCLUDE_DIR AND CL_LIB_DIR) message(FATAL_ERROR "Either install OpenCL or pass -DCL_INCLUDE_DIR and -DCL_LIB_DIR") endif(CL_INCLUDE_DIR AND CL_LIB_DIR) +# SPIRV_INCLUDE_DIR - path to dir with SPIR-V headers +if(NOT SPIRV_INCLUDE_DIR) + message(STATUS "SPIR-V headers haven't been found!") + message(FATAL_ERROR "Pass -DSPIRV_INCLUDE_DIR") +endif(NOT SPIRV_INCLUDE_DIR) + # CLConform_GL_LIBRARIES_DIR - path to OpenGL libraries if(GL_IS_SUPPORTED AND CLConform_GL_LIBRARIES_DIR) link_directories(${CLConform_GL_LIBRARIES_DIR}) @@ -195,6 +201,7 @@ if(APPLE) endif(APPLE) include_directories(SYSTEM ${CL_INCLUDE_DIR}) +include_directories(SYSTEM ${SPIRV_INCLUDE_DIR}/include) include_directories(${CLConform_SOURCE_DIR}/test_common/harness ${CLConform_SOURCE_DIR}/test_common/gles ${CLConform_SOURCE_DIR}/test_common/gl diff --git a/README.md b/README.md index 0cc09b1b..77e4d9a9 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,8 @@ Compiling the CTS requires the following CMake configuration options to be set: * `CL_INCLUDE_DIR` Points to the unified [OpenCL-Headers](https://github.com/KhronosGroup/OpenCL-Headers). +* `SPIRV_INCLUDE_DIR` Points to the unified + [SPIRV-Headers](https://github.com/KhronosGroup/SPIRV-Headers). * `CL_LIB_DIR` Directory containing the OpenCL library to build against. * `SPIRV_TOOLS_DIR` Directory containing the `spirv-as` and `spirv-val` binaries to be used in the CTS build process. Alternatively, the location to these binaries @@ -31,6 +33,7 @@ a build, and compile. ```sh git clone https://github.com/KhronosGroup/OpenCL-CTS.git git clone https://github.com/KhronosGroup/OpenCL-Headers.git +git clone https://github.com/KhronosGroup/SPIRV-Headers.git git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git git clone https://github.com/KhronosGroup/SPIRV-Tools.git git clone https://github.com/KhronosGroup/SPIRV-Headers.git SPIRV-Tools/external/spirv-headers @@ -50,6 +53,7 @@ cmake --build SPIRV-Tools/build --config Release mkdir OpenCL-CTS/build cmake -S OpenCL-CTS -B OpenCL-CTS/build \ -DCL_INCLUDE_DIR=$PWD/OpenCL-Headers \ + -DSPIRV_INCLUDE_DIR=$PWD/SPIRV-Headers \ -DCL_LIB_DIR=$PWD/OpenCL-ICD-Loader/build \ -DSPIRV_TOOLS_DIR=$PWD/SPIRV-Tools/build/tools/ \ -DOPENCL_LIBRARIES=OpenCL diff --git a/test_conformance/api/CMakeLists.txt b/test_conformance/api/CMakeLists.txt index f2bfac35..b781e49b 100644 --- a/test_conformance/api/CMakeLists.txt +++ b/test_conformance/api/CMakeLists.txt @@ -1,5 +1,7 @@ set(MODULE_NAME API) +find_package(Python3 COMPONENTS Interpreter QUIET) + set(${MODULE_NAME}_SOURCES main.cpp negative_platform.cpp @@ -40,6 +42,20 @@ set(${MODULE_NAME}_SOURCES test_pipe_properties_queries.cpp test_wg_suggested_local_work_size.cpp test_device_command_queue.cpp + test_spirv_queries.cpp + ${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def ) +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def + COMMENT "Generating spirv_capability_deps.def..." + COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_spirv_capability_deps.py + --grammar "${SPIRV_INCLUDE_DIR}/include/spirv/unified1/spirv.core.grammar.json" + --output "${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def" + DEPENDS generate_spirv_capability_deps.py "${SPIRV_INCLUDE_DIR}/include/spirv/unified1/spirv.core.grammar.json" + USES_TERMINAL + VERBATIM) + include(../CMakeCommon.txt) + +target_include_directories(${${MODULE_NAME}_OUT} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/test_conformance/api/generate_spirv_capability_deps.py b/test_conformance/api/generate_spirv_capability_deps.py new file mode 100644 index 00000000..ef59b6e8 --- /dev/null +++ b/test_conformance/api/generate_spirv_capability_deps.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 + +##################################################################### +# Copyright (c) 2025 The Khronos Group Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +##################################################################### + +""" +Generates a file describing the SPIR-V extension dependencies or SPIR-V version +dependencies for a SPIR-V capability. This can be used to ensure that if support +for a SPIR-V capability is reported, the necessary SPIR-V extensions or SPIR-V +version is also supported. +""" + +import argparse +import json + +header_text = """\ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// This file is generated from the SPIR-V JSON grammar file. +// Please do not edit it directly! +""" + +def main(): + parser = argparse.ArgumentParser(description='Generate SPIR-V extension and version dependencies for SPIR-V capabilities') + + parser.add_argument('--grammar', metavar='', + type=str, required=True, + help='input JSON grammar file') + parser.add_argument('--output', metavar='', + type=str, required=False, + help='output file path (default: stdout)') + args = parser.parse_args() + + dependencies = {} + capabilities = [] + with open(args.grammar) as json_file: + grammar_json = json.loads(json_file.read()) + for operand_kind in grammar_json['operand_kinds']: + if operand_kind['kind'] == 'Capability': + for cap in operand_kind['enumerants']: + capname = cap['enumerant'] + capabilities.append(capname) + dependencies[capname] = {} + dependencies[capname]['extensions'] = cap['extensions'] if 'extensions' in cap else [] + dependencies[capname]['version'] = ("SPIR-V_" + cap['version']) if 'version' in cap and cap['version'] != 'None' else "" + + capabilities.sort() + + output = [] + output.append(header_text) + output.append("// clang-format off") + if False: + for cap in capabilities: + deps = dependencies[cap] + extensions_str = ', '.join(f'"{ext}"' for ext in deps['extensions']) + + output.append('SPIRV_CAPABILITY_DEPENDENCIES( {}, {{{}}}, "{}" )'.format( + cap, extensions_str, deps['version'])) + else: + for cap in capabilities: + deps = dependencies[cap] + if deps['version'] != "": + output.append('SPIRV_CAPABILITY_VERSION_DEPENDENCY( {}, "{}" )'.format(cap, deps['version'])) + for ext in deps['extensions']: + output.append('SPIRV_CAPABILITY_EXTENSION_DEPENDENCY( {}, "{}" )'.format(cap, ext)) + output.append("// clang-format on") + + if args.output: + with open(args.output, 'w') as output_file: + output_file.write('\n'.join(output)) + else: + print('\n'.join(output)) + +if __name__ == '__main__': + main() diff --git a/test_conformance/api/test_spirv_queries.cpp b/test_conformance/api/test_spirv_queries.cpp new file mode 100644 index 00000000..720f73b1 --- /dev/null +++ b/test_conformance/api/test_spirv_queries.cpp @@ -0,0 +1,767 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "testBase.h" +#include +#include +#include + +#define SPV_ENABLE_UTILITY_CODE +#include + +static bool is_spirv_version_supported(cl_device_id deviceID, + const std::string& version) +{ + std::string ilVersions = get_device_il_version_string(deviceID); + return ilVersions.find(version) != std::string::npos; +} + +static int doQueries(cl_device_id device, + std::vector& extendedInstructionSets, + std::vector& extensions, + std::vector& capabilities) +{ + cl_int error = CL_SUCCESS; + + size_t size = 0; + error = + clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR, + 0, nullptr, &size); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR size\n"); + + extendedInstructionSets.resize(size / sizeof(const char*)); + error = + clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR, + size, extendedInstructionSets.data(), nullptr); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR\n"); + + error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENSIONS_KHR, 0, nullptr, + &size); + test_error( + error, + "clGetDeviceInfo failed for CL_DEVICE_SPIRV_EXTENSIONS_KHR size\n"); + + extensions.resize(size / sizeof(const char*)); + error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENSIONS_KHR, size, + extensions.data(), nullptr); + test_error(error, + "clGetDeviceInfo failed for CL_DEVICE_SPIRV_EXTENSIONS_KHR\n"); + + error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_CAPABILITIES_KHR, 0, + nullptr, &size); + test_error( + error, + "clGetDeviceInfo failed for CL_DEVICE_SPIRV_CAPABILITIES_KHR size\n"); + + capabilities.resize(size / sizeof(cl_uint)); + error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_CAPABILITIES_KHR, size, + capabilities.data(), nullptr); + test_error(error, + "clGetDeviceInfo failed for CL_DEVICE_SPIRV_CAPABILITIES_KHR\n"); + + return CL_SUCCESS; +} + +static int findRequirements(cl_device_id device, + std::vector& extendedInstructionSets, + std::vector& extensions, + std::vector& capabilities) +{ + cl_int error = CL_SUCCESS; + + auto version = get_device_cl_version(device); + auto ilVersions = get_device_il_version_string(device); + + // If no SPIR-V versions are supported, there are no requirements. + if (ilVersions.find("SPIR-V") == std::string::npos) + { + return CL_SUCCESS; + } + + cl_bool deviceImageSupport = CL_FALSE; + cl_bool deviceReadWriteImageSupport = CL_FALSE; + cl_bool deviceSubGroupsSupport = CL_FALSE; + cl_bool deviceGenericAddressSpaceSupport = CL_FALSE; + cl_bool deviceWorkGroupCollectiveFunctionsSupport = CL_FALSE; + cl_bool devicePipeSupport = CL_FALSE; + cl_bool deviceDeviceEnqueueSupport = CL_FALSE; + cl_device_integer_dot_product_capabilities_khr + deviceIntegerDotProductCapabilities = 0; + cl_device_fp_atomic_capabilities_ext deviceFp32AtomicCapabilities = 0; + cl_device_fp_atomic_capabilities_ext deviceFp16AtomicCapabilities = 0; + cl_device_fp_atomic_capabilities_ext deviceFp64AtomicCapabilities = 0; + + error = clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, + sizeof(deviceImageSupport), &deviceImageSupport, + nullptr); + test_error(error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE_SUPPORT\n"); + + if (version >= Version(2, 0)) + { + cl_uint deviceMaxReadWriteImageArgs = 0; + error = clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, + sizeof(deviceMaxReadWriteImageArgs), + &deviceMaxReadWriteImageArgs, nullptr); + test_error( + error, + "clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n"); + + deviceReadWriteImageSupport = + deviceMaxReadWriteImageArgs != 0 ? CL_TRUE : CL_FALSE; + } + + if (version >= Version(2, 1)) + { + cl_uint deviceMaxNumSubGroups = 0; + error = clGetDeviceInfo(device, CL_DEVICE_MAX_NUM_SUB_GROUPS, + sizeof(deviceMaxNumSubGroups), + &deviceMaxNumSubGroups, nullptr); + test_error(error, + "clGetDeviceInfo failed for CL_DEVICE_MAX_NUM_SUB_GROUPS\n"); + + deviceSubGroupsSupport = + deviceMaxNumSubGroups != 0 ? CL_TRUE : CL_FALSE; + } + else if (is_extension_available(device, "cl_khr_subgroups")) + { + deviceSubGroupsSupport = CL_TRUE; + } + + if (version >= Version(3, 0)) + { + error = clGetDeviceInfo(device, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT, + sizeof(deviceGenericAddressSpaceSupport), + &deviceGenericAddressSpaceSupport, nullptr); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT\n"); + + error = clGetDeviceInfo( + device, CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT, + sizeof(deviceWorkGroupCollectiveFunctionsSupport), + &deviceWorkGroupCollectiveFunctionsSupport, nullptr); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT\n"); + + error = clGetDeviceInfo(device, CL_DEVICE_PIPE_SUPPORT, + sizeof(devicePipeSupport), &devicePipeSupport, + nullptr); + test_error(error, + "clGetDeviceInfo failed for CL_DEVICE_PIPE_SUPPORT\n"); + + cl_device_device_enqueue_capabilities deviceDeviceEnqueueCapabilities = + 0; + error = clGetDeviceInfo(device, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES, + sizeof(deviceDeviceEnqueueCapabilities), + &deviceDeviceEnqueueCapabilities, nullptr); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES\n"); + + deviceDeviceEnqueueSupport = + deviceDeviceEnqueueCapabilities != 0 ? CL_TRUE : CL_FALSE; + } + else if (version >= Version(2, 0)) + { + deviceGenericAddressSpaceSupport = CL_TRUE; + deviceWorkGroupCollectiveFunctionsSupport = CL_TRUE; + devicePipeSupport = CL_TRUE; + deviceDeviceEnqueueSupport = CL_TRUE; + } + + if (is_extension_available(device, "cl_khr_integer_dot_product")) + { + error = clGetDeviceInfo(device, + CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR, + sizeof(deviceIntegerDotProductCapabilities), + &deviceIntegerDotProductCapabilities, nullptr); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR\n"); + } + + if (is_extension_available(device, "cl_ext_float_atomics")) + { + error = + clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT, + sizeof(deviceFp32AtomicCapabilities), + &deviceFp32AtomicCapabilities, nullptr); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT\n"); + + error = + clGetDeviceInfo(device, CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT, + sizeof(deviceFp16AtomicCapabilities), + &deviceFp16AtomicCapabilities, nullptr); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT\n"); + + error = + clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT, + sizeof(deviceFp64AtomicCapabilities), + &deviceFp64AtomicCapabilities, nullptr); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT\n"); + } + + // Required. + extendedInstructionSets.push_back("OpenCL.std"); + + capabilities.push_back(spv::CapabilityAddresses); + capabilities.push_back(spv::CapabilityFloat16Buffer); + capabilities.push_back(spv::CapabilityInt16); + capabilities.push_back(spv::CapabilityInt8); + capabilities.push_back(spv::CapabilityKernel); + capabilities.push_back(spv::CapabilityLinkage); + capabilities.push_back(spv::CapabilityVector16); + + // Required for FULL_PROFILE devices, or devices supporting + // cles_khr_int64. + if (gHasLong) + { + capabilities.push_back(spv::CapabilityInt64); + } + + // Required for devices supporting images. + if (deviceImageSupport == CL_TRUE) + { + capabilities.push_back(spv::CapabilityImage1D); + capabilities.push_back(spv::CapabilityImageBasic); + capabilities.push_back(spv::CapabilityImageBuffer); + capabilities.push_back(spv::CapabilityLiteralSampler); + capabilities.push_back(spv::CapabilitySampled1D); + capabilities.push_back(spv::CapabilitySampledBuffer); + } + + // Required for devices supporting SPIR-V 1.6. + if (ilVersions.find("SPIR-V_1.6") != std::string::npos) + { + capabilities.push_back(spv::CapabilityUniformDecoration); + } + + // Required for devices supporting read-write images. + if (deviceReadWriteImageSupport == CL_TRUE) + { + capabilities.push_back(spv::CapabilityImageReadWrite); + } + + // Required for devices supporting the generic address space. + if (deviceGenericAddressSpaceSupport == CL_TRUE) + { + capabilities.push_back(spv::CapabilityGenericPointer); + } + + // Required for devices supporting sub-groups or work-group collective + // functions. + if (deviceSubGroupsSupport == CL_TRUE + || deviceWorkGroupCollectiveFunctionsSupport == CL_TRUE) + { + capabilities.push_back(spv::CapabilityGroups); + } + + // Required for devices supporting pipes. + if (devicePipeSupport == CL_TRUE) + { + capabilities.push_back(spv::CapabilityPipes); + } + + // Required for devices supporting device-side enqueue. + if (deviceDeviceEnqueueSupport == CL_TRUE) + { + capabilities.push_back(spv::CapabilityDeviceEnqueue); + } + + // Required for devices supporting SPIR-V 1.1 and OpenCL 2.2. + if (ilVersions.find("SPIR-V_1.1") != std::string::npos + && version == Version(2, 2)) + { + capabilities.push_back(spv::CapabilityPipeStorage); + } + + // Required for devices supporting SPIR-V 1.1 and either OpenCL 2.2 or + // OpenCL 3.0 devices supporting sub-groups. + if (ilVersions.find("SPIR-V_1.1") != std::string::npos + && (version == Version(2, 2) + || (version >= Version(3, 0) && deviceSubGroupsSupport == CL_TRUE))) + { + capabilities.push_back(spv::CapabilitySubgroupDispatch); + } + + // Required for devices supporting cl_khr_expect_assume. + if (is_extension_available(device, "cl_khr_expect_assume")) + { + extensions.push_back("SPV_KHR_expect_assume"); + capabilities.push_back(spv::CapabilityExpectAssumeKHR); + } + + // Required for devices supporting cl_khr_extended_bit_ops. + if (is_extension_available(device, "cl_khr_extended_bit_ops")) + { + extensions.push_back("SPV_KHR_bit_instructions"); + capabilities.push_back(spv::CapabilityBitInstructions); + } + + // Required for devices supporting half-precision floating-point + // (cl_khr_fp16). + if (is_extension_available(device, "cl_khr_fp16")) + { + capabilities.push_back(spv::CapabilityFloat16); + } + + // Required for devices supporting double-precision floating-point + // (cl_khr_fp64). + if (is_extension_available(device, "cl_khr_fp64")) + { + capabilities.push_back(spv::CapabilityFloat64); + } + + // Required for devices supporting 64-bit atomics + // (cl_khr_int64_base_atomics or cl_khr_int64_extended_atomics). + if (is_extension_available(device, "cl_khr_int64_base_atomics") + || is_extension_available(device, "cl_khr_int64_extended_atomics")) + { + capabilities.push_back(spv::CapabilityInt64Atomics); + } + + // Required for devices supporting cl_khr_integer_dot_product. + if (is_extension_available(device, "cl_khr_integer_dot_product")) + { + extensions.push_back("SPV_KHR_integer_dot_product"); + capabilities.push_back(spv::CapabilityDotProduct); + capabilities.push_back(spv::CapabilityDotProductInput4x8BitPacked); + } + + // Required for devices supporting cl_khr_integer_dot_product and + // CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR. + if (is_extension_available(device, "cl_khr_integer_dot_product") + && (deviceIntegerDotProductCapabilities + & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR)) + { + capabilities.push_back(spv::CapabilityDotProductInput4x8Bit); + } + + // Required for devices supporting cl_khr_kernel_clock. + if (is_extension_available(device, "cl_khr_kernel_clock")) + { + extensions.push_back("SPV_KHR_shader_clock"); + capabilities.push_back(spv::CapabilityShaderClockKHR); + } + + // Required for devices supporting both cl_khr_mipmap_image and + // cl_khr_mipmap_image_writes. + if (is_extension_available(device, "cl_khr_mipmap_image") + && is_extension_available(device, "cl_khr_mipmap_image_writes")) + { + capabilities.push_back(spv::CapabilityImageMipmap); + } + + // Required for devices supporting cl_khr_spirv_extended_debug_info. + if (is_extension_available(device, "cl_khr_spirv_extended_debug_info")) + { + extendedInstructionSets.push_back("OpenCL.DebugInfo.100"); + } + + // Required for devices supporting cl_khr_spirv_linkonce_odr. + if (is_extension_available(device, "cl_khr_spirv_linkonce_odr")) + { + extensions.push_back("SPV_KHR_linkonce_odr"); + } + + // Required for devices supporting + // cl_khr_spirv_no_integer_wrap_decoration. + if (is_extension_available(device, + "cl_khr_spirv_no_integer_wrap_decoration")) + { + extensions.push_back("SPV_KHR_no_integer_wrap_decoration"); + } + + // Required for devices supporting cl_khr_subgroup_ballot. + if (is_extension_available(device, "cl_khr_subgroup_ballot")) + { + capabilities.push_back(spv::CapabilityGroupNonUniformBallot); + } + + // Required for devices supporting cl_khr_subgroup_clustered_reduce. + if (is_extension_available(device, "cl_khr_subgroup_clustered_reduce")) + { + capabilities.push_back(spv::CapabilityGroupNonUniformClustered); + } + + // Required for devices supporting cl_khr_subgroup_named_barrier. + if (is_extension_available(device, "cl_khr_subgroup_named_barrier")) + { + capabilities.push_back(spv::CapabilityNamedBarrier); + } + + // Required for devices supporting + // cl_khr_subgroup_non_uniform_arithmetic. + if (is_extension_available(device, + "cl_khr_subgroup_non_uniform_arithmetic")) + { + capabilities.push_back(spv::CapabilityGroupNonUniformArithmetic); + } + + // Required for devices supporting cl_khr_subgroup_non_uniform_vote. + if (is_extension_available(device, "cl_khr_subgroup_non_uniform_vote")) + { + capabilities.push_back(spv::CapabilityGroupNonUniform); + capabilities.push_back(spv::CapabilityGroupNonUniformVote); + } + + // Required for devices supporting cl_khr_subgroup_rotate. + if (is_extension_available(device, "cl_khr_subgroup_rotate")) + { + extensions.push_back("SPV_KHR_subgroup_rotate"); + capabilities.push_back(spv::CapabilityGroupNonUniformRotateKHR); + } + + // Required for devices supporting cl_khr_subgroup_shuffle. + if (is_extension_available(device, "cl_khr_subgroup_shuffle")) + { + capabilities.push_back(spv::CapabilityGroupNonUniformShuffle); + } + + // Required for devices supporting cl_khr_subgroup_shuffle_relative. + if (is_extension_available(device, "cl_khr_subgroup_shuffle_relative")) + { + capabilities.push_back(spv::CapabilityGroupNonUniformShuffleRelative); + } + + // Required for devices supporting cl_khr_work_group_uniform_arithmetic. + if (is_extension_available(device, "cl_khr_work_group_uniform_arithmetic")) + { + extensions.push_back("SPV_KHR_uniform_group_instructions"); + capabilities.push_back(spv::CapabilityGroupUniformArithmeticKHR); + } + + // Required for devices supporting cl_ext_float_atomics and fp32 atomic + // adds. + if (is_extension_available(device, "cl_ext_float_atomics") + && (deviceFp32AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT))) + { + capabilities.push_back(spv::CapabilityAtomicFloat32AddEXT); + } + + // Required for devices supporting cl_ext_float_atomics and fp32 atomic + // min and max. + if (is_extension_available(device, "cl_ext_float_atomics") + && (deviceFp32AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))) + { + capabilities.push_back(spv::CapabilityAtomicFloat32MinMaxEXT); + } + + // Required for devices supporting cl_ext_float_atomics and fp16 atomic + // adds. + if (is_extension_available(device, "cl_ext_float_atomics") + && (deviceFp16AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT))) + { + extensions.push_back("SPV_EXT_shader_atomic_float16_add"); + capabilities.push_back(spv::CapabilityAtomicFloat16AddEXT); + } + + // Required for devices supporting cl_ext_float_atomics and fp16 atomic + // min and max. + if (is_extension_available(device, "cl_ext_float_atomics") + && (deviceFp16AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))) + { + capabilities.push_back(spv::CapabilityAtomicFloat16MinMaxEXT); + } + + // Required for devices supporting cl_ext_float_atomics and fp64 atomic + // adds. + if (is_extension_available(device, "cl_ext_float_atomics") + && (deviceFp64AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT))) + { + capabilities.push_back(spv::CapabilityAtomicFloat64AddEXT); + } + + // Required for devices supporting cl_ext_float_atomics and fp64 atomic + // min and max. + if (is_extension_available(device, "cl_ext_float_atomics") + && (deviceFp64AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))) + { + capabilities.push_back(spv::CapabilityAtomicFloat64MinMaxEXT); + } + + // Required for devices supporting cl_ext_float_atomics and fp16, fp32, + // or fp64 atomic min or max. + if (is_extension_available(device, "cl_ext_float_atomics") + && ((deviceFp32AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)) + || (deviceFp16AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)) + || (deviceFp64AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)))) + { + extensions.push_back("SPV_EXT_shader_atomic_float_min_max"); + } + + // Required for devices supporting cl_ext_float_atomics and fp32 or fp64 + // atomic adds. + if (is_extension_available(device, "cl_ext_float_atomics") + && ((deviceFp32AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT)) + || (deviceFp64AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT)))) + { + extensions.push_back("SPV_EXT_shader_atomic_float_add"); + } + + // Required for devices supporting cl_intel_bfloat16_conversions. + if (is_extension_available(device, "cl_intel_bfloat16_conversions")) + { + extensions.push_back("SPV_INTEL_bfloat16_conversion"); + capabilities.push_back(spv::CapabilityBFloat16ConversionINTEL); + } + + // Required for devices supporting + // cl_intel_spirv_device_side_avc_motion_estimation. + if (is_extension_available( + device, "cl_intel_spirv_device_side_avc_motion_estimation")) + { + extensions.push_back("SPV_INTEL_device_side_avc_motion_estimation"); + capabilities.push_back( + spv::CapabilitySubgroupAvcMotionEstimationChromaINTEL); + capabilities.push_back(spv::CapabilitySubgroupAvcMotionEstimationINTEL); + capabilities.push_back( + spv::CapabilitySubgroupAvcMotionEstimationIntraINTEL); + } + + // Required for devices supporting cl_intel_spirv_media_block_io. + if (is_extension_available(device, "cl_intel_spirv_media_block_io")) + { + extensions.push_back("SPV_INTEL_media_block_io"); + capabilities.push_back(spv::CapabilitySubgroupImageMediaBlockIOINTEL); + } + + // Required for devices supporting cl_intel_spirv_subgroups. + if (is_extension_available(device, "cl_intel_spirv_subgroups")) + { + extensions.push_back("SPV_INTEL_subgroups"); + capabilities.push_back(spv::CapabilitySubgroupBufferBlockIOINTEL); + capabilities.push_back(spv::CapabilitySubgroupImageBlockIOINTEL); + capabilities.push_back(spv::CapabilitySubgroupShuffleINTEL); + } + + // Required for devices supporting cl_intel_split_work_group_barrier. + if (is_extension_available(device, "cl_intel_split_work_group_barrier")) + { + extensions.push_back("SPV_INTEL_split_barrier"); + capabilities.push_back(spv::CapabilitySplitBarrierINTEL); + } + + // Required for devices supporting cl_intel_subgroup_buffer_prefetch. + if (is_extension_available(device, "cl_intel_subgroup_buffer_prefetch")) + { + extensions.push_back("SPV_INTEL_subgroup_buffer_prefetch"); + capabilities.push_back(spv::CapabilitySubgroupBufferPrefetchINTEL); + } + + return CL_SUCCESS; +} + +REGISTER_TEST(spirv_query_requirements) +{ + if (!is_extension_available(device, "cl_khr_spirv_queries")) + { + log_info("cl_khr_spirv_queries is not supported; skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + cl_int error; + + std::vector queriedExtendedInstructionSets; + std::vector queriedExtensions; + std::vector queriedCapabilities; + + error = doQueries(device, queriedExtendedInstructionSets, queriedExtensions, + queriedCapabilities); + test_error_fail(error, "Unable to perform SPIR-V queries"); + + std::vector requiredExtendedInstructionSets; + std::vector requiredExtensions; + std::vector requiredCapabilities; + error = findRequirements(device, requiredExtendedInstructionSets, + requiredExtensions, requiredCapabilities); + test_error_fail(error, "Unable to find SPIR-V requirements"); + + for (auto check : requiredExtendedInstructionSets) + { + auto cmp = [=](const char* queried) { + return strcmp(check, queried) == 0; + }; + auto it = std::find_if(queriedExtendedInstructionSets.begin(), + queriedExtendedInstructionSets.end(), cmp); + if (it == queriedExtendedInstructionSets.end()) + { + test_fail("Missing required extended instruction set: %s\n", check); + } + } + + for (auto check : requiredExtensions) + { + auto cmp = [=](const char* queried) { + return strcmp(check, queried) == 0; + }; + auto it = std::find_if(queriedExtensions.begin(), + queriedExtensions.end(), cmp); + if (it == queriedExtensions.end()) + { + test_fail("Missing required extension: %s\n", check); + } + } + + for (auto check : requiredCapabilities) + { + if (std::find(queriedCapabilities.begin(), queriedCapabilities.end(), + check) + == queriedCapabilities.end()) + { + test_fail( + "Missing required capability: %s\n", + spv::CapabilityToString(static_cast(check))); + } + } + + // Find any extraneous capabilities (informational): + for (auto check : queriedCapabilities) + { + if (std::find(requiredCapabilities.begin(), requiredCapabilities.end(), + check) + == requiredCapabilities.end()) + { + log_info( + "Found non-required capability: %s\n", + spv::CapabilityToString(static_cast(check))); + } + } + + return TEST_PASS; +} + +REGISTER_TEST(spirv_query_dependencies) +{ + if (!is_extension_available(device, "cl_khr_spirv_queries")) + { + log_info("cl_khr_spirv_queries is not supported; skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + cl_int error; + + std::vector queriedExtendedInstructionSets; + std::vector queriedExtensions; + std::vector queriedCapabilities; + + error = doQueries(device, queriedExtendedInstructionSets, queriedExtensions, + queriedCapabilities); + test_error_fail(error, "Unable to perform SPIR-V queries"); + + struct CapabilityDependencies + { + std::vector extensions; + std::string version; + }; + + std::map dependencies; + +#define SPIRV_CAPABILITY_VERSION_DEPENDENCY(_cap, _ver) \ + dependencies[spv::Capability##_cap].version = _ver; +#define SPIRV_CAPABILITY_EXTENSION_DEPENDENCY(_cap, _ext) \ + dependencies[spv::Capability##_cap].extensions.push_back(_ext); +#include "spirv_capability_deps.def" + + // For each queried SPIR-V capability, ensure that either that any SPIR-V + // version dependencies or SPIR-V extension dependencies are satisfied. + + for (auto check : queriedCapabilities) + { + // Log and skip any unknown capabilities + auto it = dependencies.find(static_cast(check)); + if (it == dependencies.end()) + { + log_info( + "No known dependencies for queried capability %s!\n", + spv::CapabilityToString(static_cast(check))); + continue; + } + + // Check if a SPIR-V version dependency is satisfied + const auto& version_dep = it->second.version; + if (!version_dep.empty() + && is_spirv_version_supported(device, version_dep)) + { + continue; + } + + // Check if a SPIR-V extension dependency is satisfied + bool found = false; + for (const auto& extension_dep : it->second.extensions) + { + if (std::find(queriedExtensions.begin(), queriedExtensions.end(), + extension_dep) + != queriedExtensions.end()) + { + found = true; + break; + } + } + if (found) + { + continue; + } + + // If we get here then the capability has an unsatisfied dependency. + log_error("Couldn't find a dependency for queried capability %s!\n", + spv::CapabilityToString(static_cast(check))); + if (!version_dep.empty()) + { + log_error("Checked for SPIR-V version %s.\n", version_dep.c_str()); + } + for (const auto& extension_dep : it->second.extensions) + { + log_error("Checked for SPIR-V extension %s.n", + extension_dep.c_str()); + } + return TEST_FAIL; + } + + return TEST_PASS; +} From 9acbb240649df4c29c990a539f3e3ce0cda7fda4 Mon Sep 17 00:00:00 2001 From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com> Date: Tue, 19 Aug 2025 17:37:37 +0100 Subject: [PATCH 38/61] Fix ccache in CI (#2492) sccache emits a warning message after every build that it has failed to save a build cache. Additionally the stats are all zeroes, no cache hits (or misses). The "Caches" tab under "Actions" does not have any cached build artifacts which confirms that nothing is being saved. Fix by passing the correct launcher options directly to CMake instead of wrapping them in `CMAKE_CACHE_OPTIONS`. Signed-off-by: Ahmed Hesham --- .github/workflows/presubmit.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 594dc6f5..39bfbba3 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -162,7 +162,8 @@ jobs: fi cmake .. -G Ninja \ -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ - -DCMAKE_CACHE_OPTIONS="-DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache" \ + -DCMAKE_C_COMPILER_LAUNCHER=sccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ -DCL_INCLUDE_DIR='${{ github.workspace }}'/OpenCL-Headers \ -DSPIRV_INCLUDE_DIR='${{ github.workspace }}'/SPIRV-Headers \ -DCL_LIB_DIR='${{ github.workspace }}'/OpenCL-ICD-Loader/build \ From e778fa83808cea1a5b3a960f1c01b4d99caf4117 Mon Sep 17 00:00:00 2001 From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com> Date: Tue, 19 Aug 2025 17:37:47 +0100 Subject: [PATCH 39/61] Update D3D10 and D3D11 interop tests (#2479) Both tests depend on a very old DirectX SDK (August 2009) and expect it to be extracted to an {NV_TOOLS} environment variable. They additionally require definining {ARCH} as a CMake configuration option, which is not needed. Update both projects to use DirectX libraries provided by the Windows SDK and drop the unneeded configuration options. --------- Signed-off-by: Ahmed Hesham --- .github/workflows/presubmit.yml | 7 ++++--- test_conformance/d3d10/CMakeLists.txt | 27 ++++----------------------- test_conformance/d3d11/CMakeLists.txt | 27 ++++----------------------- 3 files changed, 12 insertions(+), 49 deletions(-) diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 39bfbba3..43310277 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -112,7 +112,7 @@ jobs: if: ${{ matrix.arch == 'android-arm' || matrix.arch == 'android-aarch64' }} shell: bash run: | - echo "CMAKE_CONFIG_ARGS_ANDROID=-DCMAKE_ANDROID_ARCH_ABI=${ANDROID_ARCH_ABI} -DANDROID_PLATFORM=${ANDROID_PLATFORM}" >> $GITHUB_ENV + echo "CMAKE_ADDITIONAL_CONFIG_ARGS=-DCMAKE_ANDROID_ARCH_ABI=${ANDROID_ARCH_ABI} -DANDROID_PLATFORM=${ANDROID_PLATFORM}" >> $GITHUB_ENV - name: Fetch and build OpenCL ICD Loader shell: bash run: | @@ -124,7 +124,7 @@ jobs: -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ -DOPENCL_ICD_LOADER_HEADERS_DIR='${{ github.workspace }}'/OpenCL-Headers/ \ - ${CMAKE_CONFIG_ARGS_ANDROID} + ${CMAKE_ADDITIONAL_CONFIG_ARGS} cmake --build . --parallel - name: Fetch Vulkan Headers shell: bash @@ -154,6 +154,7 @@ jobs: cd build if [[ ${RUNNER_OS} == "Windows" ]]; then CMAKE_OPENCL_LIBRARIES_OPTION="OpenCL" + CMAKE_ADDITIONAL_CONFIG_ARGS="-DD3D10_IS_SUPPORTED=ON -DD3D11_IS_SUPPORTED=ON" else CMAKE_OPENCL_LIBRARIES_OPTION="-lOpenCL" if [[ '${{ matrix.arch }}' != android-* ]]; then @@ -175,7 +176,7 @@ jobs: -DVULKAN_IS_SUPPORTED=ON \ -DVULKAN_INCLUDE_DIR='${{ github.workspace }}'/Vulkan-Headers/include/ \ -DVULKAN_LIB_DIR='${{ github.workspace }}'/Vulkan-Loader/build/loader/ \ - ${CMAKE_CONFIG_ARGS_ANDROID} + ${CMAKE_ADDITIONAL_CONFIG_ARGS} cmake --build . --parallel formatcheck: name: Check code format diff --git a/test_conformance/d3d10/CMakeLists.txt b/test_conformance/d3d10/CMakeLists.txt index 385ea86f..46387f51 100644 --- a/test_conformance/d3d10/CMakeLists.txt +++ b/test_conformance/d3d10/CMakeLists.txt @@ -1,22 +1,4 @@ if(WIN32) - -set(D3D10_INCLUDE_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Include) - -if(${ARCH} STREQUAL "i686") -set(D3D10_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x86) -endif(${ARCH} STREQUAL "i686") - -if(${ARCH} STREQUAL "x86_64") -set(D3D10_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x64) -endif(${ARCH} STREQUAL "x86_64") - -list(APPEND CLConform_INCLUDE_DIR ${D3D10_INCLUDE_DIR}) -include_directories (${CLConform_SOURCE_DIR}/test_common/harness - ${CLConform_INCLUDE_DIR} ) -link_directories(${CL_LIB_DIR}, ${D3D10_LIB_DIR}) - -list(APPEND CLConform_LIBRARIES d3d10 dxgi) - set(MODULE_NAME D3D10) set(${MODULE_NAME}_SOURCES @@ -28,10 +10,9 @@ set(${MODULE_NAME}_SOURCES harness.cpp ) -set_source_files_properties( - ${MODULE_NAME}_SOURCES - PROPERTIES LANGUAGE CXX) +list(APPEND CLConform_LIBRARIES d3d10 dxgi) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include(../CMakeCommon.txt) -endif(WIN32) +else() +message(STATUS "D3D10 tests are only supported on Windows.") +endif() diff --git a/test_conformance/d3d11/CMakeLists.txt b/test_conformance/d3d11/CMakeLists.txt index 14a378d7..b9b81b56 100644 --- a/test_conformance/d3d11/CMakeLists.txt +++ b/test_conformance/d3d11/CMakeLists.txt @@ -1,22 +1,4 @@ if(WIN32) - -set(D3D11_INCLUDE_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Include) - -if(${ARCH} STREQUAL "i686") -set(D3D11_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x86) -endif(${ARCH} STREQUAL "i686") - -if(${ARCH} STREQUAL "x86_64") -set(D3D11_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x64) -endif(${ARCH} STREQUAL "x86_64") - -list(APPEND CLConform_INCLUDE_DIR ${D3D11_INCLUDE_DIR}) -include_directories (${CLConform_SOURCE_DIR}/test_common/harness - ${CLConform_INCLUDE_DIR} ) -link_directories(${CL_LIB_DIR}, ${D3D11_LIB_DIR}) - -list(APPEND CLConform_LIBRARIES d3d11 dxgi) - set(MODULE_NAME D3D11) set(${MODULE_NAME}_SOURCES @@ -28,10 +10,9 @@ set(${MODULE_NAME}_SOURCES harness.cpp ) -set_source_files_properties( - ${MODULE_NAME}_SOURCES - PROPERTIES LANGUAGE CXX) +list(APPEND CLConform_LIBRARIES d3d11 dxgi) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include(../CMakeCommon.txt) -endif(WIN32) +else() +message(STATUS "D3D11 tests are only supported on Windows.") +endif() From 7f01a861d86e35883d5cac0f080921a8bb84f9f0 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 19 Aug 2025 18:47:20 +0200 Subject: [PATCH 40/61] Add feature_macro test for cl_ext_image_unorm_int_2_101010 (#2420) --- .../compiler/test_feature_macro.cpp | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/test_conformance/compiler/test_feature_macro.cpp b/test_conformance/compiler/test_feature_macro.cpp index 93776a8b..f7a53628 100644 --- a/test_conformance/compiler/test_feature_macro.cpp +++ b/test_conformance/compiler/test_feature_macro.cpp @@ -656,6 +656,32 @@ static int test_feature_macro_integer_dot_product_input_4x8bit( compiler_status, supported); } +static int test_feature_macro_ext_image_unorm_int_2_101010( + cl_device_id deviceID, cl_context context, std::string test_macro_name, + cl_bool& supported) +{ + cl_int error = TEST_FAIL; + cl_bool api_status = CL_TRUE; + cl_bool compiler_status; + log_info("\n%s ...\n", test_macro_name.c_str()); + + if (!is_extension_available(deviceID, "cl_ext_image_unorm_int_2_101010")) + { + supported = false; + return TEST_PASS; + } + + error = check_compiler_feature_info(deviceID, context, test_macro_name, + compiler_status); + if (error != CL_SUCCESS) + { + return error; + } + + return feature_macro_verify_results(test_macro_name, api_status, + compiler_status, supported); +} + static int test_feature_macro_int64(cl_device_id deviceID, cl_context context, std::string test_macro_name, cl_bool& supported) @@ -833,6 +859,7 @@ REGISTER_TEST_VERSION(features_macro, Version(3, 0)) NEW_FEATURE_MACRO_TEST(int64); NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit); NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit_packed); + NEW_FEATURE_MACRO_TEST(ext_image_unorm_int_2_101010); error |= test_consistency_c_features_list(device, supported_features_vec); From cef3ef6b5953d1440271ed03436196b9a30355a6 Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Tue, 19 Aug 2025 18:48:03 +0200 Subject: [PATCH 41/61] Update '-list' option (#2457) '-list' option is used to print all sub-tests. But some test do not support it at all. And all test do not display it the same way, making it quite complicated for external tools to extract them. That CL clean the usage so that tests: - Print the sub-tests list with either '-list' (to prevent breaking legacy usage) or '--list' (to match other options) - Do not print anything else when the option is used --- test_common/harness/parseParameters.cpp | 8 +++++ test_common/harness/parseParameters.h | 1 + test_common/harness/testHarness.cpp | 30 +++++++++++++----- .../contractions/contractions.cpp | 5 +++ .../conversions/test_conversions.cpp | 29 +++++++++++++++++ test_conformance/half/main.cpp | 21 ++++++++----- test_conformance/math_brute_force/main.cpp | 31 ++++++++++++------- test_conformance/mem_host_flags/main.cpp | 1 - test_conformance/printf/test_printf.cpp | 13 ++++---- test_conformance/select/test_select.cpp | 3 +- test_conformance/spir/main.cpp | 20 +++++++++--- test_conformance/spirv_new/main.cpp | 8 +++-- 12 files changed, 127 insertions(+), 43 deletions(-) diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp index 29499381..820814ec 100644 --- a/test_common/harness/parseParameters.cpp +++ b/test_common/harness/parseParameters.cpp @@ -35,6 +35,7 @@ std::string gCompilationProgram = DEFAULT_COMPILATION_PROGRAM; bool gDisableSPIRVValidation = false; std::string gSPIRVValidator = DEFAULT_SPIRV_VALIDATOR; unsigned gNumWorkerThreads; +bool gListTests = false; void helpInfo() { @@ -49,6 +50,8 @@ void helpInfo() spir-v Use SPIR-V offline compilation --num-worker-threads Select parallel execution with the specified number of worker threads. + --list + List sub-tests For offline compilation (binary and spir-v modes) only: --compilation-cache-mode @@ -104,6 +107,11 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore) // option and print its own help. helpInfo(); } + else if (!strcmp(argv[i], "--list") || !strcmp(argv[i], "-list")) + { + delArg++; + gListTests = true; + } else if (!strcmp(argv[i], "--compilation-mode")) { delArg++; diff --git a/test_common/harness/parseParameters.h b/test_common/harness/parseParameters.h index 437e12f9..685aef21 100644 --- a/test_common/harness/parseParameters.h +++ b/test_common/harness/parseParameters.h @@ -40,6 +40,7 @@ extern std::string gCompilationCachePath; extern std::string gCompilationProgram; extern bool gDisableSPIRVValidation; extern std::string gSPIRVValidator; +extern bool gListTests; extern int parseCustomParam(int argc, const char *argv[], const char *ignore = 0); diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp index 611d0b32..71430cd0 100644 --- a/test_common/harness/testHarness.cpp +++ b/test_common/harness/testHarness.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -169,6 +170,19 @@ void version_expected_info(const char *test_name, const char *api_name, "reports %s version %s)\n", test_name, api_name, expected_version, api_name, device_version); } + +static void list_tests(int testNum, test_definition testList[]) +{ + std::set names; + for (int i = 0; i < testNum; i++) + { + names.insert(testList[i].name); + } + for (const auto &name : names) + { + log_info("\t%s\n", name.c_str()); + } +} int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, test_definition testList[], int forceNoContextCreation, @@ -258,10 +272,13 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, return EXIT_FAILURE; } - /* Special case: just list the tests */ - if ((argc > 1) - && (!strcmp(argv[1], "-list") || !strcmp(argv[1], "-h") - || !strcmp(argv[1], "--help"))) + if (gListTests) + { + list_tests(testNum, testList); + return EXIT_SUCCESS; + } + + if ((argc > 1) && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) { char *fileName = getenv("CL_CONFORMANCE_RESULTS_FILENAME"); @@ -284,10 +301,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, log_info("\n"); log_info("Test names:\n"); - for (int i = 0; i < testNum; i++) - { - log_info("\t%s\n", testList[i].name); - } + list_tests(testNum, testList); return EXIT_SUCCESS; } diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp index 2fdf0323..0c868764 100644 --- a/test_conformance/contractions/contractions.cpp +++ b/test_conformance/contractions/contractions.cpp @@ -284,6 +284,11 @@ int main( int argc, const char **argv ) static int ParseArgs( int argc, const char **argv ) { + if (gListTests) + { + return 0; + } + gArgList = (const char **)calloc( argc, sizeof( char*) ); if( NULL == gArgList ) diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp index 1712e099..5df5f0fd 100644 --- a/test_conformance/conversions/test_conversions.cpp +++ b/test_conformance/conversions/test_conversions.cpp @@ -112,6 +112,35 @@ int main(int argc, const char **argv) int error; argc = parseCustomParam(argc, argv); + if (gListTests) + { + for (unsigned dst = 0; dst < kTypeCount; dst++) + { + for (unsigned src = 0; src < kTypeCount; src++) + { + for (unsigned sat = 0; sat < 2; sat++) + { + // skip illegal saturated conversions to float type + if (gSaturationNames[sat] == std::string("_sat") + && (gTypeNames[dst] == std::string("float") + || gTypeNames[dst] == std::string("half") + || gTypeNames[dst] == std::string("double"))) + { + continue; + } + for (unsigned rnd = 0; rnd < kRoundingModeCount; rnd++) + { + vlog("\t%s\n", + (std::string(gTypeNames[dst]) + + gSaturationNames[sat] + gRoundingModeNames[rnd] + + "_" + gTypeNames[src]) + .c_str()); + } + } + } + } + return 0; + } if (argc == -1) { return 1; diff --git a/test_conformance/half/main.cpp b/test_conformance/half/main.cpp index 743bc45c..9d9211c7 100644 --- a/test_conformance/half/main.cpp +++ b/test_conformance/half/main.cpp @@ -83,13 +83,6 @@ int main (int argc, const char **argv ) if( (error = ParseArgs( argc, argv )) ) goto exit; - if (gIsEmbedded) { - vlog( "\tProfile: Embedded\n" ); - }else - { - vlog( "\tProfile: Full\n" ); - } - fflush( stdout ); error = runTestHarnessWithCheck( argCount, argList, test_registry::getInstance().num_tests(), @@ -114,6 +107,10 @@ exit: static int ParseArgs( int argc, const char **argv ) { + if (gListTests) + { + return 0; + } int i; argList = (const char **)calloc(argc, sizeof(char *)); if( NULL == argList ) @@ -217,6 +214,16 @@ static int ParseArgs( int argc, const char **argv ) vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" ); vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor); } + + if (gIsEmbedded) + { + vlog("\tProfile: Embedded\n"); + } + else + { + vlog("\tProfile: Full\n"); + } + return 0; } diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index 008ab307..519d8b12 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -384,21 +384,24 @@ int main(int argc, const char *argv[]) error = ParseArgs(argc, argv); if (error) return error; - // This takes a while, so prevent the machine from going to sleep. - PreventSleep(); - atexit(ResumeSleep); + if (!gListTests) + { + // This takes a while, so prevent the machine from going to sleep. + PreventSleep(); + atexit(ResumeSleep); - if (gSkipCorrectnessTesting) - vlog("*** Skipping correctness testing! ***\n\n"); - else if (gStopOnError) - vlog("Stopping at first error.\n"); + if (gSkipCorrectnessTesting) + vlog("*** Skipping correctness testing! ***\n\n"); + else if (gStopOnError) + vlog("Stopping at first error.\n"); - vlog(" \t "); - if (gWimpyMode) vlog(" "); - if (!gSkipCorrectnessTesting) vlog("\t max_ulps"); + vlog(" \t "); + if (gWimpyMode) vlog(" "); + if (!gSkipCorrectnessTesting) vlog("\t max_ulps"); - vlog("\n-------------------------------------------------------------------" - "----------------------------------------\n"); + vlog("\n---------------------------------------------------------------" + "--------------------------------------------\n"); + } gMTdata = MTdataHolder(gRandomSeed); @@ -425,6 +428,10 @@ int main(int argc, const char *argv[]) static int ParseArgs(int argc, const char **argv) { + if (gListTests) + { + return 0; + } // We only pass test names to runTestHarnessWithCheck, hence global command // line options defined by the harness cannot be used by the user. // To respect the implementation details of runTestHarnessWithCheck, diff --git a/test_conformance/mem_host_flags/main.cpp b/test_conformance/mem_host_flags/main.cpp index 0e9df56c..0b05ff16 100644 --- a/test_conformance/mem_host_flags/main.cpp +++ b/test_conformance/mem_host_flags/main.cpp @@ -31,7 +31,6 @@ int main(int argc, const char *argv[]) { - log_info("1st part, non gl-sharing objects...\n"); gTestRounding = true; return runTestHarness(argc, argv, test_registry::getInstance().num_tests(), test_registry::getInstance().definitions(), false, 0); diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp index 38333175..9cd9db00 100644 --- a/test_conformance/printf/test_printf.cpp +++ b/test_conformance/printf/test_printf.cpp @@ -1101,20 +1101,19 @@ int main(int argc, const char* argv[]) argCount, argList, test_registry::getInstance().num_tests(), test_registry::getInstance().definitions(), true, 0, InitCL); - if(gQueue) + if (gQueue) { int error = clFinish(gQueue); - if (error) { + if (error) + { log_error("clFinish failed: %d\n", error); } + if (clReleaseCommandQueue(gQueue) != CL_SUCCESS) + log_error("clReleaseCommandQueue\n"); } - - if(clReleaseCommandQueue(gQueue)!=CL_SUCCESS) - log_error("clReleaseCommandQueue\n"); - if(clReleaseContext(gContext)!= CL_SUCCESS) + if (gContext && clReleaseContext(gContext) != CL_SUCCESS) log_error("clReleaseContext\n"); - free(argList); remove(gFileName); return err; diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp index 20f5bd5e..490fa8bc 100644 --- a/test_conformance/select/test_select.cpp +++ b/test_conformance/select/test_select.cpp @@ -630,7 +630,8 @@ int main(int argc, const char* argv[]) s_wimpy_mode = true; } - if (s_wimpy_mode) { + if (s_wimpy_mode && !gListTests) + { log_info("\n"); log_info("*** WARNING: Testing in Wimpy mode! ***\n"); log_info("*** Wimpy mode is not sufficient to verify correctness. ***\n"); diff --git a/test_conformance/spir/main.cpp b/test_conformance/spir/main.cpp index 95e8c00f..322e71bc 100644 --- a/test_conformance/spir/main.cpp +++ b/test_conformance/spir/main.cpp @@ -6713,6 +6713,14 @@ cl_device_id get_platform_device (cl_device_type device_type, cl_uint choosen_de return devices[choosen_device_index]; } +static void ListTests() +{ + for (unsigned int i = 0; i < (sizeof(spir_suites) / sizeof(sub_suite)); i++) + { + log_info("\t%s\n", spir_suites[i].name); + } +} + /** Parses the command line parameters and set the @@ -6761,7 +6769,7 @@ static int ParseCommandLine (int argc, const char *argv[], /* Process the command line arguments */ /* Special case: just list the tests */ - if( ( argc > 1 ) && (!strcmp( argv[ 1 ], "-list" ) || !strcmp( argv[ 1 ], "-h" ) || !strcmp( argv[ 1 ], "--help" ))) + if ((argc > 1) && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) { log_info( "Usage: %s [] [pid] [id] [] [w32] [no-unzip]\n", argv[0] ); log_info( "\t\tOne or more of: (default all)\n"); @@ -6771,10 +6779,12 @@ static int ParseCommandLine (int argc, const char *argv[], log_info( "\tw32\t\tIndicates device address bits is 32.\n" ); log_info( "\tno-unzip\t\tDo not extract test files from Zip; use existing.\n" ); - for( unsigned int i = 0; i < (sizeof(spir_suites) / sizeof(sub_suite)); i++ ) - { - log_info( "\t\t%s\n", spir_suites[i].name ); - } + ListTests(); + return 0; + } + else if (!strcmp(argv[1], "--list") || !strcmp(argv[1], "-list")) + { + ListTests(); return 0; } diff --git a/test_conformance/spirv_new/main.cpp b/test_conformance/spirv_new/main.cpp index 98ce18e8..b5421f92 100644 --- a/test_conformance/spirv_new/main.cpp +++ b/test_conformance/spirv_new/main.cpp @@ -216,6 +216,7 @@ int main(int argc, const char *argv[]) { gReSeed = 1; bool modifiedSpvBinariesPath = false; + bool listTests = false; for (int i = 0; i < argc; ++i) { int argsRemoveNum = 0; if (argv[i] == spvBinariesPathArg) { @@ -241,9 +242,12 @@ int main(int argc, const char *argv[]) argc -= argsRemoveNum; --i; } + listTests |= (argv[i] == std::string("--list") + || argv[i] == std::string("-list")); } - if (modifiedSpvBinariesPath == false) { - printUsage(); + if (modifiedSpvBinariesPath == false && !listTests) + { + printUsage(); } return runTestHarnessWithCheck( From e51fadbbb1136d13f0e52bc4848519c4d986e472 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 19 Aug 2025 19:06:22 +0200 Subject: [PATCH 42/61] Properly handle NaN when comparing images in Vulkan interop testing (#2484) I was considering reusing other code where NaN gets taken into account, but all the other places are a mess. The kernel read write tests are just doing it within loops. Other places only compare raw values as outside of kernel functions it seems to be fine to expect NaN to not mess up results. However the vulkan interop testing does run kernels and does operate on NaN float values, so we need to special case this there. --- .../vulkan_wrapper/opencl_vulkan_wrapper.cpp | 2 +- .../vulkan_wrapper/opencl_vulkan_wrapper.hpp | 2 + .../vulkan/test_vulkan_interop_image.cpp | 43 +++++++++++++++++-- 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp index f4245703..ba1b7445 100644 --- a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp +++ b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp @@ -863,7 +863,7 @@ clExternalMemoryImage::clExternalMemoryImage( size_t clImageFormatSize; cl_image_desc image_desc; memset(&image_desc, 0x0, sizeof(cl_image_desc)); - cl_image_format img_format = { 0 }; + img_format = { 0 }; const VkImageCreateInfo VulkanImageCreateInfo = image2D.getVkImageCreateInfo(); diff --git a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp index f9a305e1..0bf89322 100644 --- a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp +++ b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp @@ -106,6 +106,7 @@ protected: cl_mem m_externalMemory; int fd; void *handle; + cl_image_format img_format; clExternalMemoryImage(); public: @@ -117,6 +118,7 @@ public: cl_device_id deviceId); virtual ~clExternalMemoryImage(); cl_mem getExternalMemoryImage(); + cl_image_format getImageFormat() { return img_format; }; }; class clExternalSemaphore { diff --git a/test_conformance/vulkan/test_vulkan_interop_image.cpp b/test_conformance/vulkan/test_vulkan_interop_image.cpp index 0c1887aa..0dc2fa8a 100644 --- a/test_conformance/vulkan/test_vulkan_interop_image.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_image.cpp @@ -17,6 +17,7 @@ #include #include #include "harness/errorHelpers.h" +#include "harness/imageHelpers.h" #include "harness/os_helpers.h" #include @@ -136,6 +137,38 @@ const uint32_t num2DImagesList[] = { 1, 2, 4 }; const uint32_t widthList[] = { 4, 64, 183, 1024 }; const uint32_t heightList[] = { 4, 64, 365 }; +bool memcmp_images(const void *a, const void *b, size_t size, + cl_image_format format) +{ + if (format.image_channel_data_type == CL_FLOAT) + { + const float *a_float = static_cast(a); + const float *b_float = static_cast(b); + return !std::equal(a_float, a_float + size / sizeof(*a_float), b_float, + b_float + size / sizeof(*b_float), + [](float a, float b) { + if (isnan(a) && isnan(b)) return true; + return a == b; + }); + } + else if (format.image_channel_data_type == CL_HALF_FLOAT) + { + const cl_half *a_half = static_cast(a); + const cl_half *b_half = static_cast(b); + return !std::equal(a_half, a_half + size / sizeof(*a_half), b_half, + b_half + size / sizeof(*b_half), + [](cl_half a, cl_half b) { + if (is_half_nan(a) && is_half_nan(b)) + return true; + return a == b; + }); + } + else + { + return memcmp(a, b, size) != 0; + } +} + const cl_kernel getKernelType(VulkanFormat format, cl_kernel kernel_float, cl_kernel kernel_signed, cl_kernel kernel_unsigned) @@ -744,8 +777,9 @@ int run_test_with_two_queue( "clEnqueueReadImage failed with" "error\n"); - if (memcmp(srcBufferPtr, dstBufferPtr, - srcBufSize)) + if (memcmp_images( + srcBufferPtr, dstBufferPtr, srcBufSize, + externalMemory2[i]->getImageFormat())) { log_info("Source and destination buffers " "don't match\n"); @@ -1296,8 +1330,9 @@ int run_test_with_one_queue( "clEnqueueReadImage failed with" "error\n"); - if (memcmp(srcBufferPtr, dstBufferPtr, - srcBufSize)) + if (memcmp_images( + srcBufferPtr, dstBufferPtr, srcBufSize, + externalMemory2[i]->getImageFormat())) { log_info("Source and destination buffers " "don't match\n"); From 909095f60a45d2ea131586a8a06411b3072a1bdd Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 19 Aug 2025 19:06:37 +0200 Subject: [PATCH 43/61] test_vulkan: Fix some VVL errors (#2486) Fixes the following VVL errors: ``` Validation Error: [ VUID-VkExternalSemaphoreProperties-sType-sType ] | MessageID = 0x160be1e8 vkGetPhysicalDeviceExternalSemaphorePropertiesKHR(): pExternalSemaphoreProperties->sType must be VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES. The Vulkan spec states: sType must be VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES (https://docs.vulkan.org/spec/latest/chapters/capabilities.html#VUID-VkExternalSemaphoreProperties-sType-sType) ``` ``` Validation Error: [ VUID-VkImportSemaphoreFdInfoKHR-handleType-07307 ] | MessageID = 0x1b609443 vkImportSemaphoreFdKHR(): pImportSemaphoreFdInfo->handleType is VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT so VK_SEMAPHORE_IMPORT_TEMPORARY_BIT must be set, but flags is VkSemaphoreImportFlags(0). The Vulkan spec states: If handleType refers to a handle type with copy payload transference semantics, flags must contain VK_SEMAPHORE_IMPORT_TEMPORARY_BIT (https://docs.vulkan.org/spec/latest/chapters/synchronization.html#VUID-VkImportSemaphoreFdInfoKHR-handleType-07307) ``` ``` Validation Error: [ VUID-VkPhysicalDeviceExternalBufferInfo-None-09500 ] | MessageID = 0x8434cf vkGetPhysicalDeviceExternalBufferPropertiesKHR(): pExternalBufferInfo->usage is zero. The Vulkan spec states: If the pNext chain does not include a VkBufferUsageFlags2CreateInfo structure, usage must not be 0 (https://docs.vulkan.org/spec/latest/chapters/capabilities.html#VUID-VkPhysicalDeviceExternalBufferInfo-None-09500) ``` --- .../common/vulkan_wrapper/opencl_vulkan_wrapper.cpp | 2 +- test_conformance/common/vulkan_wrapper/vulkan_utility.cpp | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp index ba1b7445..4d62a82f 100644 --- a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp +++ b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp @@ -1233,7 +1233,7 @@ int clExternalExportableSemaphore::signal(cl_command_queue cmd_queue) import.fd = fd; import.pNext = nullptr; import.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR; - import.flags = 0; + import.flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT; VkResult res = vkImportSemaphoreFdKHR(m_deviceSemaphore.getDevice(), &import); diff --git a/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp b/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp index 75aa536d..3773944b 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp @@ -243,6 +243,8 @@ getSupportedVulkanExternalMemoryHandleTypeList( VkPhysicalDeviceExternalBufferInfo buffer_info = {}; buffer_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO; buffer_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; + buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT + | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; VkExternalBufferProperties buffer_properties = {}; buffer_properties.sType = VK_STRUCTURE_TYPE_EXTERNAL_BUFFER_PROPERTIES; @@ -307,7 +309,9 @@ getSupportedVulkanExternalSemaphoreHandleTypeList(const VulkanDevice &vkDevice) VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO, nullptr, handle_type.vk_type }; - VkExternalSemaphoreProperties query_result = {}; + VkExternalSemaphoreProperties query_result = { + VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES + }; vkGetPhysicalDeviceExternalSemaphorePropertiesKHR( vkDevice.getPhysicalDevice(), &handle_query, &query_result); if (query_result.externalSemaphoreFeatures From 0633af21c15319653808cd07803eccaf5f6014e9 Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Wed, 20 Aug 2025 20:21:46 +0200 Subject: [PATCH 44/61] mutable_command_info: remove unnecessary Skip function (#2498) The check implementated by that Skip function is already implemented in 'InfoMutableCommandBufferTest::Skip()'. Also this is trying to get the extension_version before checking whether the extension is supported, leading to false negative for device not supporting the extension. --- .../mutable_command_info.cpp | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp index f75457e6..1657a6fa 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp @@ -116,26 +116,6 @@ struct PropertiesArray : public InfoMutableCommandBufferTest : InfoMutableCommandBufferTest(device, context, queue) {} - virtual bool Skip() override - { - Version device_version = get_device_cl_version(device); - if ((device_version >= Version(3, 0)) - || is_extension_available(device, "cl_khr_extended_versioning")) - { - - cl_version extension_version = get_extension_version( - device, "cl_khr_command_buffer_mutable_dispatch"); - - if (extension_version != CL_MAKE_VERSION(0, 9, 3)) - { - log_info("cl_khr_command_buffer_mutable_dispatch version 0.9.3 " - "is required to run the test, skipping.\n "); - return true; - } - } - return InfoMutableCommandBufferTest::Skip(); - } - cl_int Run() override { cl_command_properties_khr props[] = { From c0f5c4e27104c57eb56dd3fea731c499a05d091a Mon Sep 17 00:00:00 2001 From: Ahmed <36049290+AhmedAmraniAkdi@users.noreply.github.com> Date: Tue, 26 Aug 2025 17:27:33 +0100 Subject: [PATCH 45/61] Fix basic progvar_prog_scope_init and progvar_prog_scope_uninit (#2497) The test was mapping a memory object using CL_MAP_READ but then writing to its host_ptr using memset. --- test_conformance/basic/test_progvar.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp index 15b4df43..b555b33d 100644 --- a/test_conformance/basic/test_progvar.cpp +++ b/test_conformance/basic/test_progvar.cpp @@ -1264,7 +1264,7 @@ static int l_write_read_for_type(cl_device_id device, cl_context context, } cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer( - queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, + queue, read_mem, CL_TRUE, CL_MAP_WRITE, 0, read_data_size, 0, 0, 0, 0); memset(read_data, -1, read_data_size); clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0); @@ -1503,7 +1503,7 @@ static int l_init_write_read_for_type(cl_device_id device, cl_context context, clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0); cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer( - queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, + queue, read_mem, CL_TRUE, CL_MAP_WRITE, 0, read_data_size, 0, 0, 0, 0); memset(read_data, -1, read_data_size); clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0); From b0245f1a28764cf0438e34a0d5669dd405c698bc Mon Sep 17 00:00:00 2001 From: Jose Lopez Date: Tue, 26 Aug 2025 17:35:48 +0100 Subject: [PATCH 46/61] Rename Version major and minor getters (#2451) Both these functions cause a conflict when using an old version of the GNU C Library with the header file sys/sysmacros.h where major() and minor() are defined as a macro --- test_common/harness/testHarness.h | 4 ++-- test_conformance/images/common.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test_common/harness/testHarness.h b/test_common/harness/testHarness.h index 32ed18b4..cc9d8212 100644 --- a/test_common/harness/testHarness.h +++ b/test_common/harness/testHarness.h @@ -28,8 +28,8 @@ public: Version(): m_major(0), m_minor(0) {} Version(cl_uint major, cl_uint minor): m_major(major), m_minor(minor) {} - int major() const { return m_major; } - int minor() const { return m_minor; } + int get_major() const { return m_major; } + int get_minor() const { return m_minor; } bool operator>(const Version &rhs) const { return to_uint() > rhs.to_uint(); diff --git a/test_conformance/images/common.cpp b/test_conformance/images/common.cpp index 95845b72..8120e3ab 100644 --- a/test_conformance/images/common.cpp +++ b/test_conformance/images/common.cpp @@ -248,7 +248,7 @@ clMemWrapper create_image(cl_context context, cl_command_queue queue, cl_mem_flags buffer_flags = CL_MEM_READ_WRITE; if (enable_pitch) { - if (version.major() == 1) + if (version.get_major() == 1) { host_ptr = malloc(imageInfo->rowPitch); } From e936977934ac450d8bb7c66635c02105a29f7ea8 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 27 Aug 2025 03:43:29 -0700 Subject: [PATCH 47/61] remove checks for the command-buffer pending state (#2504) This is a very small subset of the changes in #2477 to get things building again, since the command-buffer pending state is no longer in the spec or headers. --- .../command_buffer_get_command_buffer_info.cpp | 3 --- .../cl_khr_command_buffer/negative_command_buffer_enqueue.cpp | 2 -- .../cl_khr_command_buffer/negative_command_buffer_finalize.cpp | 2 -- 3 files changed, 7 deletions(-) diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp index 88ea906b..add0a531 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp @@ -250,9 +250,6 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest &trigger_event, &execute_event); test_error(error, "clEnqueueCommandBufferKHR failed"); - // verify pending state - error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR); - // execute command buffer cl_int signal_error = clSetUserEventStatus(trigger_event, CL_COMPLETE); diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_enqueue.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_enqueue.cpp index bb59118d..ae14b87b 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_enqueue.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_enqueue.cpp @@ -124,8 +124,6 @@ struct EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState error = EnqueueCommandBuffer(); test_error(error, "EnqueueCommandBuffer failed"); - error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR); - test_error(error, "State is not Pending"); return CL_SUCCESS; } diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_finalize.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_finalize.cpp index 8d00ca86..05f43506 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_finalize.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_finalize.cpp @@ -89,8 +89,6 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest error = EnqueueCommandBuffer(); test_error(error, "EnqueueCommandBuffer failed"); - error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR); - test_error(error, "State is not Pending"); error = clFinalizeCommandBufferKHR(command_buffer); test_failure_error_ret(error, CL_INVALID_OPERATION, From d417d7670d0358fddcf67dfb46274cb56458132a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 Sep 2025 13:29:05 +0100 Subject: [PATCH 48/61] Bump actions/checkout from 4 to 5 in the github-actions group (#2508) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps the github-actions group with 1 update: [actions/checkout](https://github.com/actions/checkout). Updates `actions/checkout` from 4 to 5
Release notes

Sourced from actions/checkout's releases.

v5.0.0

What's Changed

⚠️ Minimum Compatible Runner Version

v2.327.1
Release Notes

Make sure your runner is updated to this version or newer to use this release.

Full Changelog: https://github.com/actions/checkout/compare/v4...v5.0.0

v4.3.0

What's Changed

New Contributors

Full Changelog: https://github.com/actions/checkout/compare/v4...v4.3.0

v4.2.2

What's Changed

Full Changelog: https://github.com/actions/checkout/compare/v4.2.1...v4.2.2

v4.2.1

What's Changed

New Contributors

Full Changelog: https://github.com/actions/checkout/compare/v4.2.0...v4.2.1

... (truncated)

Changelog

Sourced from actions/checkout's changelog.

Changelog

V5.0.0

V4.3.0

v4.2.2

v4.2.1

v4.2.0

v4.1.7

v4.1.6

v4.1.5

v4.1.4

v4.1.3

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/checkout&package-manager=github_actions&previous-version=4&new-version=5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore major version` will close this group update PR and stop Dependabot creating any more for the specific dependency's major version (unless you unignore this specific dependency's major version or upgrade to it yourself) - `@dependabot ignore minor version` will close this group update PR and stop Dependabot creating any more for the specific dependency's minor version (unless you unignore this specific dependency's minor version or upgrade to it yourself) - `@dependabot ignore ` will close this group update PR and stop Dependabot creating any more for the specific dependency (unless you unignore this specific dependency or upgrade to it yourself) - `@dependabot unignore ` will remove all of the ignore conditions of the specified dependency - `@dependabot unignore ` will remove the ignore condition of the specified dependency and ignore conditions
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/presubmit.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 43310277..3e05d0c8 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -28,7 +28,7 @@ jobs: arch: android-aarch64 android_arch_abi: arm64-v8a steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Setup Ninja uses: seanmiddleditch/gha-setup-ninja@master - name: Install Arm and AArch64 compilers @@ -184,7 +184,7 @@ jobs: steps: - name: Install packages run: sudo apt install -y clang-format clang-format-14 - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 - name: Check code format From fbba22770ddf84c5ac1e471e18134ed0bdd376a1 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 2 Sep 2025 17:38:56 +0200 Subject: [PATCH 49/61] Added support for cl_ext_float_atomics in CBasicTestFetchAdd with atomic_float (#2345) Related to #2142, according to the work plan, extending CBasicTestFetchAdd with support for atomic_float. --- test_conformance/c11_atomics/common.h | 11 +- test_conformance/c11_atomics/host_atomics.h | 20 +- test_conformance/c11_atomics/main.cpp | 6 + test_conformance/c11_atomics/test_atomics.cpp | 258 +++++++++++++++--- 4 files changed, 253 insertions(+), 42 deletions(-) diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index 3cab98ce..1fca36b8 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -74,9 +74,11 @@ extern int gMaxDeviceThreads; // maximum number of threads executed on OCL device extern cl_device_atomic_capabilities gAtomicMemCap, gAtomicFenceCap; // atomic memory and fence capabilities for this device + extern cl_half_rounding_mode gHalfRoundingMode; extern bool gFloatAtomicsSupported; extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps; +extern cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps; extern const char * get_memory_order_type_name(TExplicitMemoryOrderType orderType); @@ -174,6 +176,13 @@ public: { return false; } + virtual bool + IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + cl_uint whichDestValue) + { + return expected != testValues[whichDestValue]; + } virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) { @@ -1449,7 +1458,7 @@ int CBasicTest::ExecuteSingleTest( startRefValues.size() ? &startRefValues[0] : 0, i)) break; // no expected value function provided - if (expected != destItems[i]) + if (IsTestNotAsExpected(expected, destItems, i)) { std::stringstream logLine; logLine << "ERROR: Result " << i diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index e5b1d328..3bc88a23 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -17,6 +17,7 @@ #define HOST_ATOMICS_H_ #include "harness/testHarness.h" +#include #ifdef WIN32 #include "Windows.h" @@ -94,14 +95,25 @@ template CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { + if constexpr (std::is_same_v) + { + static std::mutex mx; + std::lock_guard lock(mx); + CorrespondingType old_value = *a; + *a += c; + return old_value; + } + else + { #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) - return InterlockedExchangeAdd(a, c); + return InterlockedExchangeAdd(a, c); #elif defined(__GNUC__) - return __sync_fetch_and_add(a, c); + return __sync_fetch_and_add(a, c); #else - log_info("Host function not implemented: atomic_fetch_add\n"); - return 0; + log_info("Host function not implemented: atomic_fetch_add\n"); + return 0; #endif + } } template diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp index 40972b26..f089d6da 100644 --- a/test_conformance/c11_atomics/main.cpp +++ b/test_conformance/c11_atomics/main.cpp @@ -34,6 +34,7 @@ cl_device_atomic_capabilities gAtomicMemCap, cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE; bool gFloatAtomicsSupported = false; cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0; +cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0; test_status InitCL(cl_device_id device) { auto version = get_device_cl_version(device); @@ -132,6 +133,11 @@ test_status InitCL(cl_device_id device) { if (is_extension_available(device, "cl_ext_float_atomics")) { gFloatAtomicsSupported = true; + + cl_int error = clGetDeviceInfo( + device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT, + sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr); + test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); if (is_extension_available(device, "cl_khr_fp16")) { cl_int error = clGetDeviceInfo( diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index a08a0daf..a2be0549 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -16,10 +16,13 @@ #include "harness/testHarness.h" #include "harness/kernelHelpers.h" #include "harness/typeWrappers.h" +#include "harness/conversions.h" #include "common.h" #include "host_atomics.h" +#include +#include #include #include @@ -1163,61 +1166,233 @@ REGISTER_TEST(svm_atomic_compare_exchange_weak) template class CBasicTestFetchAdd : public CBasicTestMemOrderScope { + + double min_range; + double max_range; + double max_error_fp32; + std::vector ref_vals; + public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, - useSVM) - {} - virtual std::string ProgramCore() + useSVM), + min_range(-999.0), max_range(999.0), max_error_fp32(0.0) + { + if constexpr (std::is_same_v) + { + StartValue(0.f); + CBasicTestMemOrderScope::OldValueCheck(false); + } + } + bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) override + { + if constexpr (std::is_same_v) + { + if (threadCount > ref_vals.size()) + { + ref_vals.resize(threadCount); + + for (cl_uint i = 0; i < threadCount; i++) + ref_vals[i] = get_random_float(min_range, max_range, d); + + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * ref_vals.size()); + + // Estimate highest possible summation error for given set. + std::vector sums; + std::sort(ref_vals.begin(), ref_vals.end()); + + sums.push_back( + std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f)); + + sums.push_back( + std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f)); + + std::sort( + ref_vals.begin(), ref_vals.end(), + [](float a, float b) { return std::abs(a) < std::abs(b); }); + + double precise = 0.0; + for (auto elem : ref_vals) precise += double(elem); + sums.push_back(precise); + + sums.push_back( + std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f)); + + sums.push_back( + std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f)); + + std::sort(sums.begin(), sums.end()); + max_error_fp32 = + std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back()); + + // restore unsorted order + memcpy(ref_vals.data(), startRefValues, + sizeof(HostDataType) * ref_vals.size()); + } + else + { + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * threadCount); + } + return true; + } + return false; + } + std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return " oldValues[tid] = atomic_fetch_add" + postfix - + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() - + ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add" - + postfix + "(&destMemory[0], (" - + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope - + ");\n" - " atomic_fetch_add" - + postfix + "(&destMemory[0], (" - + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope - + ");\n" - " atomic_fetch_add" - + postfix + "(&destMemory[0], ((" - + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" - + DataType().AddSubOperandTypeName() + ")-1)*8" + memoryOrderScope - + ");\n"; + + if constexpr (std::is_same_v) + { + return " atomic_fetch_add" + postfix + "(&destMemory[0], (" + + DataType().AddSubOperandTypeName() + ")oldValues[tid]" + + memoryOrderScope + ");\n" + + " oldValues[tid] = atomic_fetch_add" + postfix + + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + + ")0" + memoryOrderScope + ");\n"; + } + else + { + return " oldValues[tid] = atomic_fetch_add" + postfix + + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + + ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add" + + postfix + "(&destMemory[0], (" + + DataType().AddSubOperandTypeName() + ")tid + 3" + + memoryOrderScope + + ");\n" + " atomic_fetch_add" + + postfix + "(&destMemory[0], (" + + DataType().AddSubOperandTypeName() + ")tid + 3" + + memoryOrderScope + + ");\n" + " atomic_fetch_add" + + postfix + "(&destMemory[0], ((" + + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" + + DataType().AddSubOperandTypeName() + ")-1)*8" + + memoryOrderScope + ");\n"; + } } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, - volatile HostAtomicType *destMemory, - HostDataType *oldValues) + void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) override { - oldValues[tid] = host_atomic_fetch_add( - &destMemory[0], (HostDataType)tid + 3, MemoryOrder()); - host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, - MemoryOrder()); - host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, - MemoryOrder()); - host_atomic_fetch_add(&destMemory[0], - ((HostDataType)tid + 3) - << (sizeof(HostDataType) - 1) * 8, - MemoryOrder()); + if constexpr (std::is_same_v) + { + host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid], + MemoryOrder()); + oldValues[tid] = host_atomic_fetch_add( + &destMemory[tid], (HostDataType)0, MemoryOrder()); + } + else + { + oldValues[tid] = host_atomic_fetch_add( + &destMemory[0], (HostDataType)tid + 3, MemoryOrder()); + host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, + MemoryOrder()); + host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, + MemoryOrder()); + host_atomic_fetch_add( + &destMemory[0], + (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8), + MemoryOrder()); + } } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, - HostDataType *startRefValues, - cl_uint whichDestValue) + bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) override { expected = StartValue(); - for (cl_uint i = 0; i < threadCount; i++) - expected += ((HostDataType)i + 3) * 3 - + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); + if constexpr (std::is_same_v) + { + if (whichDestValue == 0) + for (cl_uint i = 0; i < threadCount; i++) + expected += startRefValues[i]; + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + expected += ((HostDataType)i + 3) * 3 + + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); + } + return true; } + bool IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + cl_uint whichDestValue) override + { + if (std::is_same::value) + { + if (whichDestValue == 0) + return std::abs((HOST_ATOMIC_FLOAT)expected + - testValues[whichDestValue]) + > max_error_fp32; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, + testValues, + whichDestValue); + } + bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, + HostAtomicType *finalValues) override + { + if (std::is_same::value) + { + correct = true; + for (cl_uint i = 1; i < threadCount; i++) + { + if (refValues[i] != StartValue()) + { + log_error("Thread %d found %d mismatch(es)\n", i, + (cl_uint)refValues[i]); + correct = false; + } + } + return !correct; + } + return CBasicTestMemOrderScope::VerifyRefs(correct, + threadCount, + refValues, + finalValues); + } + int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) override + { + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override + { + if constexpr (std::is_same_v) + { + return threadCount; + } + return CBasicTestMemOrderScope::NumResults(threadCount, + deviceID); + } }; static int test_atomic_fetch_add_generic(cl_device_id deviceID, @@ -1242,6 +1417,15 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID, TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); + + if (gFloatAtomicsSupported) + { + CBasicTestFetchAdd test_float( + TYPE_ATOMIC_FLOAT, useSVM); + EXECUTE_TEST( + error, test_float.Execute(deviceID, context, queue, num_elements)); + } + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchAdd From 8a2b91c715bd8631ce8577c3fd80823927278dee Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 2 Sep 2025 17:40:11 +0200 Subject: [PATCH 50/61] Added test to verify negative result of clSetKernelArg with CL_INVALID_KERNEL (#2460) Related to #2282, according to work plan from [here](https://github.com/KhronosGroup/OpenCL-CTS/issues/2282#issuecomment-3069182773) --- test_conformance/api/test_kernels.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/test_conformance/api/test_kernels.cpp b/test_conformance/api/test_kernels.cpp index 1446ade8..c5b46d94 100644 --- a/test_conformance/api/test_kernels.cpp +++ b/test_conformance/api/test_kernels.cpp @@ -726,6 +726,26 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg) return TEST_PASS; } +REGISTER_TEST(negative_invalid_kernel) +{ + cl_int error = CL_SUCCESS; + clKernelWrapper kernel; + + clMemWrapper mem = clCreateBuffer(context, CL_MEM_READ_ONLY, + sizeof(cl_float), NULL, &error); + test_error(error, "clCreateBuffer failed"); + + // Run the test - CL_INVALID_KERNEL + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem); + test_failure_error_ret( + error, CL_INVALID_KERNEL, + "clSetKernelArg is supposed to fail with CL_INVALID_KERNEL when kernel " + "is not a valid kernel object", + TEST_FAIL); + + return TEST_PASS; +} + REGISTER_TEST(negative_invalid_arg_index) { cl_int error = CL_SUCCESS; @@ -772,6 +792,7 @@ REGISTER_TEST(negative_invalid_arg_size_local) "clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when 0 is " "passed to a local qualifier kernel argument", TEST_FAIL); + return TEST_PASS; } From d0b0bd9570753907d1cb3a22278f53b84631dbf1 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Tue, 2 Sep 2025 17:23:50 +0100 Subject: [PATCH 51/61] Fix -list option in test_spir (#2509) This handles the case where test_spir is called without arguments. --- test_conformance/spir/main.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test_conformance/spir/main.cpp b/test_conformance/spir/main.cpp index 322e71bc..4e7c0ec5 100644 --- a/test_conformance/spir/main.cpp +++ b/test_conformance/spir/main.cpp @@ -6782,7 +6782,8 @@ static int ParseCommandLine (int argc, const char *argv[], ListTests(); return 0; } - else if (!strcmp(argv[1], "--list") || !strcmp(argv[1], "-list")) + else if ((argc > 1) + && (!strcmp(argv[1], "--list") || !strcmp(argv[1], "-list"))) { ListTests(); return 0; From 1562bcf03ce83ff553e390df4af2a707db19243d Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 2 Sep 2025 18:44:46 +0200 Subject: [PATCH 52/61] Fix OOB read in negative_set_immutable_memory_to_writeable_kernel_arg (#2481) The image was created with 4 channels, but the initialization data only accounted for a single channel. --- test_conformance/api/test_kernels.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_conformance/api/test_kernels.cpp b/test_conformance/api/test_kernels.cpp index c5b46d94..1f81d9d9 100644 --- a/test_conformance/api/test_kernels.cpp +++ b/test_conformance/api/test_kernels.cpp @@ -690,7 +690,7 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg) test_error(error, "Unable to get sample_image_test kernel for built program"); - std::vector mem_data(size_dim * size_dim); + std::vector mem_data(size_dim * size_dim * 4); buffer = clCreateBuffer(context, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR, sizeof(cl_int) * size_dim, mem_data.data(), &error); test_error(error, "clCreateBuffer failed"); From 096a227afdff96c828fa19e3f62776d1ac620313 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 2 Sep 2025 17:45:54 +0100 Subject: [PATCH 53/61] Extra mutable dispatch coverage (#2499) Adds tests to cover points 2 & 3 from the questions asked about cl_khr_command_buffer_mutable_dispatch in https://github.com/KhronosGroup/OpenCL-Docs/issues/1437 * New test for point 2 from issue, `mutable_dispatch_updates_persist`, testing multiple enqueues of a command-buffer after update, and that the updated argument persists for all of them. * New test for point 3 pseudocode from issue in test `mutable_dispatch_set_kernel_arg` --- .../CMakeLists.txt | 1 + .../mutable_command_update_state.cpp | 280 ++++++++++++++++++ 2 files changed, 281 insertions(+) create mode 100644 test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_update_state.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt index c848f733..aed183ff 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt @@ -15,6 +15,7 @@ set(${MODULE_NAME}_SOURCES mutable_command_iterative_arg_update.cpp mutable_command_work_groups.cpp mutable_command_work_dim.cpp + mutable_command_update_state.cpp ../basic_command_buffer.cpp ) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_update_state.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_update_state.cpp new file mode 100644 index 00000000..b3c421b2 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_update_state.cpp @@ -0,0 +1,280 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "testHarness.h" +#include "mutable_command_basic.h" + +#include + +#include + +namespace { + +//////////////////////////////////////////////////////////////////////////////// +// Tests related to ensuring the state of the updated command-buffer is expected +// and the effects of operations on it don't have side effects on other objects. +// +// - Tests the updates applied to a command-buffer persist over all subsequent +// enqueues. +// - Tests interaction of `clSetKernelArg` with mutable-dispatch extension. + +struct MutableDispatchUpdateStateTest : public BasicMutableCommandBufferTest +{ + MutableDispatchUpdateStateTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue), + buffer(nullptr), command(nullptr) + {} + + bool Skip() override + { + if (BasicMutableCommandBufferTest::Skip()) return true; + + cl_mutable_dispatch_fields_khr mutable_capabilities; + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR; + return !mutable_support; + } + + cl_int SetUpKernelArgs() override + { + cl_int error = CL_SUCCESS; + buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, + num_elements * sizeof(cl_int), nullptr, &error); + test_error(error, "clCreateBuffer error"); + + // Zero initialize buffer + const cl_int zero_pattern = 0; + error = clEnqueueFillBuffer( + queue, buffer, &zero_pattern, sizeof(cl_int), 0, + num_elements * sizeof(cl_int), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + error = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer); + test_error(error, "Unable to set kernel argument 0"); + + return CL_SUCCESS; + } + + cl_int SetUpKernel() override + { + const char *add_kernel = + R"( + __kernel void add_kernel(__global int *data, int value) + { + size_t tid = get_global_id(0); + data[tid] += value; + })"; + + cl_int error = create_single_kernel_helper( + context, &program, &kernel, 1, &add_kernel, "add_kernel"); + test_error(error, "Creating kernel failed"); + return CL_SUCCESS; + } + + bool verify_result(cl_int ref) + { + std::vector data(num_elements); + cl_int error = + clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size(), + data.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + if (data[i] != ref) + { + log_error("Modified verification failed at index %zu: Got %d, " + "wanted %d\n", + i, data[i], ref); + return false; + } + } + return true; + } + + clMemWrapper buffer; + cl_mutable_command_khr command; +}; + +struct MutableDispatchUpdatesPersistTest : public MutableDispatchUpdateStateTest +{ + MutableDispatchUpdatesPersistTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : MutableDispatchUpdateStateTest(device, context, queue) + {} + + cl_int Run() override + { + const cl_int original_val = 42; + cl_int error = + clSetKernelArg(kernel, 1, sizeof(original_val), &original_val); + test_error(error, "Unable to set kernel argument 1"); + + cl_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + // Modify the command buffer before executing + const cl_int new_command_val = 5; + cl_mutable_dispatch_arg_khr arg{ 1, sizeof(new_command_val), + &new_command_val }; + cl_mutable_dispatch_config_khr dispatch_config{ + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + &arg /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + + cl_uint num_configs = 1; + cl_command_buffer_update_type_khr config_types[1] = { + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR + }; + const void *configs[1] = { &dispatch_config }; + error = clUpdateMutableCommandsKHR(command_buffer, num_configs, + config_types, configs); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + const unsigned iterations = 5; + for (unsigned i = 0; i < iterations; i++) + { + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + } + + // Check the results execution sequence is the clEnqueueNDRangeKernel + // value + the updated command-buffer value, not using the original + // command value in the operation. + constexpr cl_int ref = iterations * new_command_val; + return verify_result(ref) ? TEST_PASS : TEST_FAIL; + } +}; + +struct MutableDispatchSetKernelArgTest : public MutableDispatchUpdateStateTest +{ + MutableDispatchSetKernelArgTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : MutableDispatchUpdateStateTest(device, context, queue) + {} + + cl_int Run() override + { + const cl_int original_val = 42; + cl_int error = + clSetKernelArg(kernel, 1, sizeof(original_val), &original_val); + test_error(error, "Unable to set kernel argument 1"); + + cl_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + // Set new kernel argument for later clEnqueueNDRangeKernel + const cl_int new_eager_val = 10; + error = + clSetKernelArg(kernel, 1, sizeof(new_eager_val), &new_eager_val); + test_error(error, "Unable to set kernel argument 1"); + + // Modify the command buffer before executing + const cl_int new_command_val = 5; + cl_mutable_dispatch_arg_khr arg{ 1, sizeof(new_command_val), + &new_command_val }; + cl_mutable_dispatch_config_khr dispatch_config{ + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + &arg /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + + cl_uint num_configs = 1; + cl_command_buffer_update_type_khr config_types[1] = { + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR + }; + const void *configs[1] = { &dispatch_config }; + error = clUpdateMutableCommandsKHR(command_buffer, num_configs, + config_types, configs); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + // Eager kernel enqueue, followed by command-buffer enqueue + error = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr); + test_error(error, "clEnqueueNDRangeKernel failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results execution sequence is the clEnqueueNDRangeKernel + // value + the updated command-buffer value, not using the original + // command value in the operation. + constexpr cl_int ref = new_eager_val + new_command_val; + return verify_result(ref) ? TEST_PASS : TEST_FAIL; + } +}; +} + +REGISTER_TEST(mutable_dispatch_updates_persist) +{ + return MakeAndRunTest( + device, context, queue, num_elements); +} + +REGISTER_TEST(mutable_dispatch_set_kernel_arg) +{ + return MakeAndRunTest(device, context, + queue, num_elements); +} From 8e125bd2e8f6606311af38ee479b28c7f1a2ea8f Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Tue, 2 Sep 2025 18:47:44 +0200 Subject: [PATCH 54/61] Refactor wimpy feature (#2507) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Make it a common parameter in harness using either '-w', '--wimpy' or 'CL_WIMPY_MODE' environment variable. - Remove all test specific wimpy variable. --------- Co-authored-by: Kévin Petit --- test_common/harness/parseParameters.cpp | 10 +++++++ test_common/harness/parseParameters.h | 1 + test_common/harness/testHarness.cpp | 10 +++++++ .../conversions/basic_test_conversions.cpp | 2 +- .../conversions/basic_test_conversions.h | 1 - .../conversions/test_conversions.cpp | 12 --------- .../device_execution/enqueue_block.cpp | 2 +- .../device_execution/enqueue_flags.cpp | 2 +- .../device_execution/enqueue_multi_queue.cpp | 4 +-- .../device_execution/enqueue_ndrange.cpp | 2 +- .../device_execution/enqueue_wg_size.cpp | 2 +- .../device_execution/host_multi_queue.cpp | 2 +- .../device_execution/host_queue_order.cpp | 3 +-- test_conformance/device_execution/main.cpp | 6 ----- .../device_execution/nested_blocks.cpp | 2 +- test_conformance/half/Test_vStoreHalf.cpp | 1 + test_conformance/half/cl_utils.cpp | 1 - test_conformance/half/cl_utils.h | 1 - test_conformance/half/main.cpp | 10 ------- .../integer_ops/test_int_basic_ops.cpp | 9 ++++--- test_conformance/math_brute_force/main.cpp | 14 ---------- test_conformance/math_brute_force/utility.h | 2 +- test_conformance/select/test_select.cpp | 27 +++++++------------ .../test_thread_dimensions.cpp | 5 ++-- test_conformance/vectors/defines.h | 1 - test_conformance/vectors/globals.cpp | 2 -- test_conformance/vectors/structs.cpp | 4 ++- 27 files changed, 52 insertions(+), 86 deletions(-) diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp index 820814ec..65167116 100644 --- a/test_common/harness/parseParameters.cpp +++ b/test_common/harness/parseParameters.cpp @@ -36,6 +36,7 @@ bool gDisableSPIRVValidation = false; std::string gSPIRVValidator = DEFAULT_SPIRV_VALIDATOR; unsigned gNumWorkerThreads; bool gListTests = false; +bool gWimpyMode = false; void helpInfo() { @@ -52,6 +53,10 @@ void helpInfo() Select parallel execution with the specified number of worker threads. --list List sub-tests + -w, --wimpy + Enable wimpy mode. It does not impact all tests. Impacted tests will run + with a very small subset of the tests. This option should not be used + for conformance submission (default: disabled). For offline compilation (binary and spir-v modes) only: --compilation-cache-mode @@ -112,6 +117,11 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore) delArg++; gListTests = true; } + else if (!strcmp(argv[i], "--wimpy") || !strcmp(argv[i], "-w")) + { + delArg++; + gWimpyMode = true; + } else if (!strcmp(argv[i], "--compilation-mode")) { delArg++; diff --git a/test_common/harness/parseParameters.h b/test_common/harness/parseParameters.h index 685aef21..ef8a7cb6 100644 --- a/test_common/harness/parseParameters.h +++ b/test_common/harness/parseParameters.h @@ -41,6 +41,7 @@ extern std::string gCompilationProgram; extern bool gDisableSPIRVValidation; extern std::string gSPIRVValidator; extern bool gListTests; +extern bool gWimpyMode; extern int parseCustomParam(int argc, const char *argv[], const char *ignore = 0); diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp index 71430cd0..c745a639 100644 --- a/test_common/harness/testHarness.cpp +++ b/test_common/harness/testHarness.cpp @@ -278,6 +278,16 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, return EXIT_SUCCESS; } + gWimpyMode |= (getenv("CL_WIMPY_MODE") != nullptr); + if (gWimpyMode) + { + log_info("\n"); + log_info("**************************\n"); + log_info("*** Wimpy mode enabled ***\n"); + log_info("**************************\n"); + log_info("\n"); + } + if ((argc > 1) && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) { char *fileName = getenv("CL_CONFORMANCE_RESULTS_FILENAME"); diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp index 4692c4b4..79333275 100644 --- a/test_conformance/conversions/basic_test_conversions.cpp +++ b/test_conformance/conversions/basic_test_conversions.cpp @@ -17,6 +17,7 @@ #include "harness/testHarness.h" #include "harness/compat.h" #include "harness/ThreadPool.h" +#include "harness/parseParameters.h" #if defined(__APPLE__) #include @@ -77,7 +78,6 @@ cl_mem gInBuffer; cl_mem gOutBuffers[kCallStyleCount]; size_t gComputeDevices = 0; uint32_t gDeviceFrequency = 0; -int gWimpyMode = 0; int gWimpyReductionFactor = 128; int gSkipTesting = 0; int gForceFTZ = 0; diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h index 56232374..6846f780 100644 --- a/test_conformance/conversions/basic_test_conversions.h +++ b/test_conformance/conversions/basic_test_conversions.h @@ -80,7 +80,6 @@ extern int gHasDouble; extern int gTestDouble; extern int gHasHalfs; extern int gTestHalfs; -extern int gWimpyMode; extern int gWimpyReductionFactor; extern int gSkipTesting; extern int gMinVectorSize; diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp index 5df5f0fd..7a143a6b 100644 --- a/test_conformance/conversions/test_conversions.cpp +++ b/test_conformance/conversions/test_conversions.cpp @@ -247,7 +247,6 @@ static int ParseArgs(int argc, const char **argv) case 'h': gTestHalfs ^= 1; break; case 'l': gSkipTesting ^= 1; break; case 'm': gMultithread ^= 1; break; - case 'w': gWimpyMode ^= 1; break; case '[': parseWimpyReductionFactor(arg, gWimpyReductionFactor); break; @@ -316,14 +315,6 @@ static int ParseArgs(int argc, const char **argv) } } - // Check for the wimpy mode environment variable - if (getenv("CL_WIMPY_MODE")) - { - vlog("\n"); - vlog("*** Detected CL_WIMPY_MODE env ***\n"); - gWimpyMode = 1; - } - vlog("\n"); PrintArch(); @@ -364,9 +355,6 @@ static void PrintUsage(void) vlog("\t\t-l\tToggle link check mode. When on, testing is skipped, and we " "just check to see that the kernels build. (Off by default.)\n"); vlog("\t\t-m\tToggle Multithreading. (On by default.)\n"); - vlog("\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very " - "small subset of the tests for each fn. NOT A VALID TEST! (Off by " - "default.)\n"); vlog(" \t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is " "1-12, default factor(%u)\n", gWimpyReductionFactor); diff --git a/test_conformance/device_execution/enqueue_block.cpp b/test_conformance/device_execution/enqueue_block.cpp index c3761d08..10d7b86d 100644 --- a/test_conformance/device_execution/enqueue_block.cpp +++ b/test_conformance/device_execution/enqueue_block.cpp @@ -17,6 +17,7 @@ #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/parseParameters.h" #include @@ -25,7 +26,6 @@ #ifdef CL_VERSION_2_0 -extern int gWimpyMode; // clang-format off static const char* enqueue_simple_block[] = { R"( diff --git a/test_conformance/device_execution/enqueue_flags.cpp b/test_conformance/device_execution/enqueue_flags.cpp index d880fadd..35610174 100644 --- a/test_conformance/device_execution/enqueue_flags.cpp +++ b/test_conformance/device_execution/enqueue_flags.cpp @@ -17,6 +17,7 @@ #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/parseParameters.h" #include @@ -25,7 +26,6 @@ #ifdef CL_VERSION_2_0 -extern int gWimpyMode; #define BITS_DEPTH 28 static const char* enqueue_flags_wait_kernel_simple[] = diff --git a/test_conformance/device_execution/enqueue_multi_queue.cpp b/test_conformance/device_execution/enqueue_multi_queue.cpp index 90dbf2fd..5caaefa7 100644 --- a/test_conformance/device_execution/enqueue_multi_queue.cpp +++ b/test_conformance/device_execution/enqueue_multi_queue.cpp @@ -17,6 +17,7 @@ #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/parseParameters.h" #include @@ -24,10 +25,7 @@ #include - - #ifdef CL_VERSION_2_0 -extern int gWimpyMode; static const char enqueue_block_multi_queue[] = NL "#define BLOCK_COMPLETED 0" NL "#define BLOCK_SUBMITTED 1" diff --git a/test_conformance/device_execution/enqueue_ndrange.cpp b/test_conformance/device_execution/enqueue_ndrange.cpp index ffadc0ac..8f71ac4e 100644 --- a/test_conformance/device_execution/enqueue_ndrange.cpp +++ b/test_conformance/device_execution/enqueue_ndrange.cpp @@ -17,6 +17,7 @@ #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/parseParameters.h" #include #include @@ -26,7 +27,6 @@ #ifdef CL_VERSION_2_0 -extern int gWimpyMode; static const char *helper_ndrange_1d_glo[] = { NL, "void block_fn(int len, __global atomic_uint* val)" NL, diff --git a/test_conformance/device_execution/enqueue_wg_size.cpp b/test_conformance/device_execution/enqueue_wg_size.cpp index f662edb9..5f40951e 100644 --- a/test_conformance/device_execution/enqueue_wg_size.cpp +++ b/test_conformance/device_execution/enqueue_wg_size.cpp @@ -17,6 +17,7 @@ #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/parseParameters.h" #include @@ -25,7 +26,6 @@ #ifdef CL_VERSION_2_0 -extern int gWimpyMode; static int nestingLevel = 3; static const char* enqueue_1D_wg_size_single[] = diff --git a/test_conformance/device_execution/host_multi_queue.cpp b/test_conformance/device_execution/host_multi_queue.cpp index cca83454..13ab87e6 100644 --- a/test_conformance/device_execution/host_multi_queue.cpp +++ b/test_conformance/device_execution/host_multi_queue.cpp @@ -17,6 +17,7 @@ #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/parseParameters.h" #include @@ -25,7 +26,6 @@ #ifdef CL_VERSION_2_0 -extern int gWimpyMode; static const char* multi_queue_simple_block1[] = { NL, "void block_fn(size_t tid, int mul, __global int* res)" diff --git a/test_conformance/device_execution/host_queue_order.cpp b/test_conformance/device_execution/host_queue_order.cpp index bafbce08..d235780e 100644 --- a/test_conformance/device_execution/host_queue_order.cpp +++ b/test_conformance/device_execution/host_queue_order.cpp @@ -17,6 +17,7 @@ #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/parseParameters.h" #include #include @@ -24,8 +25,6 @@ #include "utils.h" #include -extern int gWimpyMode; - #ifdef CL_VERSION_2_0 static const char* enqueue_block_first_kernel[] = diff --git a/test_conformance/device_execution/main.cpp b/test_conformance/device_execution/main.cpp index efb311f9..81c19802 100644 --- a/test_conformance/device_execution/main.cpp +++ b/test_conformance/device_execution/main.cpp @@ -25,7 +25,6 @@ #include "utils.h" std::string gKernelName; -int gWimpyMode = 0; test_status InitCL(cl_device_id device) { auto version = get_device_cl_version(device); @@ -71,11 +70,6 @@ int main(int argc, const char *argv[]) gKernelName = std::string(argv[i + 1]); argsRemoveNum += 2; } - if (strcmp(argv[i], "-w") == 0 ){ - gWimpyMode = 1; - argsRemoveNum += 1; - } - if (argsRemoveNum > 0) { for (int j = i; j < (argc - argsRemoveNum); ++j) diff --git a/test_conformance/device_execution/nested_blocks.cpp b/test_conformance/device_execution/nested_blocks.cpp index a191bdf5..9fc2d741 100644 --- a/test_conformance/device_execution/nested_blocks.cpp +++ b/test_conformance/device_execution/nested_blocks.cpp @@ -17,6 +17,7 @@ #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/parseParameters.h" #include @@ -27,7 +28,6 @@ #ifdef CL_VERSION_2_0 static int gNestingLevel = 4; -extern int gWimpyMode; static const char* enqueue_nested_blocks_single[] = { diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp index ab7fe257..15b940f5 100644 --- a/test_conformance/half/Test_vStoreHalf.cpp +++ b/test_conformance/half/Test_vStoreHalf.cpp @@ -16,6 +16,7 @@ #include "harness/compat.h" #include "harness/kernelHelpers.h" #include "harness/testHarness.h" +#include "harness/parseParameters.h" #include diff --git a/test_conformance/half/cl_utils.cpp b/test_conformance/half/cl_utils.cpp index 4fd42c02..7090f435 100644 --- a/test_conformance/half/cl_utils.cpp +++ b/test_conformance/half/cl_utils.cpp @@ -58,7 +58,6 @@ uint32_t gDeviceFrequency = 0; uint32_t gComputeDevices = 0; size_t gMaxThreadGroupSize = 0; size_t gWorkGroupSize = 0; -bool gWimpyMode = false; int gWimpyReductionFactor = 512; int gTestDouble = 0; bool gHostReset = false; diff --git a/test_conformance/half/cl_utils.h b/test_conformance/half/cl_utils.h index da6073cf..d7754ebc 100644 --- a/test_conformance/half/cl_utils.h +++ b/test_conformance/half/cl_utils.h @@ -74,7 +74,6 @@ extern bool gHostReset; // gWimpyMode indicates if we run the test in wimpy mode where we limit the // size of 32 bit ranges to a much smaller set. This is meant to be used // as a smoke test -extern bool gWimpyMode; extern int gWimpyReductionFactor; uint64_t ReadTime( void ); diff --git a/test_conformance/half/main.cpp b/test_conformance/half/main.cpp index 9d9211c7..0beec938 100644 --- a/test_conformance/half/main.cpp +++ b/test_conformance/half/main.cpp @@ -178,9 +178,6 @@ static int ParseArgs( int argc, const char **argv ) case 'r': gHostReset = true; break; - case 'w': // Wimpy mode - gWimpyMode = true; - break; case '[': parseWimpyReductionFactor( arg, gWimpyReductionFactor); break; @@ -199,12 +196,6 @@ static int ParseArgs( int argc, const char **argv ) } } - if (getenv("CL_WIMPY_MODE")) { - vlog( "\n" ); - vlog( "*** Detected CL_WIMPY_MODE env ***\n" ); - gWimpyMode = 1; - } - PrintArch(); if( gWimpyMode ) { @@ -234,7 +225,6 @@ static void PrintUsage( void ) "supported)\n"); vlog("\t\t-t\tToggle reporting performance data.\n"); vlog("\t\t-r\tReset buffers on host instead of on device.\n"); - vlog("\t\t-w\tRun in wimpy mode\n"); vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is " "1-12, default factor(%u)\n", gWimpyReductionFactor); diff --git a/test_conformance/integer_ops/test_int_basic_ops.cpp b/test_conformance/integer_ops/test_int_basic_ops.cpp index b9a47d75..aa48a6ba 100644 --- a/test_conformance/integer_ops/test_int_basic_ops.cpp +++ b/test_conformance/integer_ops/test_int_basic_ops.cpp @@ -22,6 +22,7 @@ #include "harness/conversions.h" #include "harness/ThreadPool.h" +#include "harness/parseParameters.h" #define NUM_TESTS 23 @@ -823,10 +824,10 @@ int run_specific_test(cl_device_id deviceID, cl_context context, cl_command_queu int run_multiple_tests(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int *tests, int total_tests) { int errors = 0; - if (getenv("CL_WIMPY_MODE") && num == LONG_MATH_SHIFT_SIZE) { - log_info("Detected CL_WIMPY_MODE env\n"); - log_info("Skipping long test\n"); - return 0; + if (gWimpyMode && num == LONG_MATH_SHIFT_SIZE) + { + log_info("Running in wimpy mode, skipping long test\n"); + return 0; } int i; diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index 519d8b12..a395488c 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -66,7 +66,6 @@ int gSkipCorrectnessTesting = 0; static int gStopOnError = 0; static bool gSkipRestOfTests; int gForceFTZ = 0; -int gWimpyMode = 0; int gHostFill = 0; static int gHasDouble = 0; static int gTestFloat = 1; @@ -502,10 +501,6 @@ static int ParseArgs(int argc, const char **argv) case 'v': gVerboseBruteForce ^= 1; break; - case 'w': // wimpy mode - gWimpyMode ^= 1; - break; - case '[': parseWimpyReductionFactor(arg, gWimpyReductionFactor); break; @@ -585,14 +580,6 @@ static int ParseArgs(int argc, const char **argv) } } - // Check for the wimpy mode environment variable - if (getenv("CL_WIMPY_MODE")) - { - vlog("\n"); - vlog("*** Detected CL_WIMPY_MODE env ***\n"); - gWimpyMode = 1; - } - PrintArch(); if (gWimpyMode) @@ -647,7 +634,6 @@ static void PrintUsage(void) "accuracy checks.)\n"); vlog("\t\t-m\tToggle run multi-threaded. (Default: on) )\n"); vlog("\t\t-s\tStop on error\n"); - vlog("\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n"); vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is " "1-10, default factor(%u)\n", gWimpyReductionFactor); diff --git a/test_conformance/math_brute_force/utility.h b/test_conformance/math_brute_force/utility.h index f5a30f86..f735f9d3 100644 --- a/test_conformance/math_brute_force/utility.h +++ b/test_conformance/math_brute_force/utility.h @@ -23,6 +23,7 @@ #include "harness/testHarness.h" #include "harness/ThreadPool.h" #include "harness/conversions.h" +#include "harness/parseParameters.h" #include "CL/cl_half.h" #define BUFFER_SIZE (1024 * 1024 * 2) @@ -60,7 +61,6 @@ extern cl_mem gOutBuffer2[VECTOR_SIZE_COUNT]; extern int gSkipCorrectnessTesting; extern int gForceFTZ; extern int gFastRelaxedDerived; -extern int gWimpyMode; extern int gHostFill; extern int gIsInRTZMode; extern int gHasHalf; diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp index 490fa8bc..bec85e12 100644 --- a/test_conformance/select/test_select.cpp +++ b/test_conformance/select/test_select.cpp @@ -89,7 +89,6 @@ static void printUsage( void ); // test their entire range and 64 bits test will test the 32 bit // range. Otherwise, we test a subset of the range // [-min_short, min_short] -static bool s_wimpy_mode = false; static int s_wimpy_reduction_factor = 256; //----------------------------------------- @@ -141,8 +140,9 @@ static void initCmpBuffer(void *cmp, Type cmptype, uint64_t start, break; } case 4: { - if (!s_wimpy_mode) { - uint32_t* ui = (uint32_t *)cmp; + if (!gWimpyMode) + { + uint32_t *ui = (uint32_t *)cmp; for (size_t i = 0; i < count; ++i) ui[i] = (uint32_t)start++; } else { @@ -323,7 +323,7 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE; const size_t block_elements = BUFFER_SIZE / type_size[stype]; - size_t step = s_wimpy_mode ? s_wimpy_reduction_factor : 1; + size_t step = gWimpyMode ? s_wimpy_reduction_factor : 1; cl_ulong cmp_stride = block_elements * step; // It is more efficient to create the tests all at once since we @@ -474,7 +474,7 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c } // for vecsize } // for i - if (!s_wimpy_mode) + if (!gWimpyMode) log_info(" Passed\n\n"); else log_info(" Wimpy Passed\n\n"); @@ -603,13 +603,9 @@ int main(int argc, const char* argv[]) arg++; while(*arg != '\0') { - switch(*arg) { - case 'h': - printUsage(); - return 0; - case 'w': - s_wimpy_mode = true; - break; + switch (*arg) + { + case 'h': printUsage(); return 0; case '[': parseWimpyReductionFactor(arg, s_wimpy_reduction_factor); break; @@ -626,11 +622,7 @@ int main(int argc, const char* argv[]) } } - if (getenv("CL_WIMPY_MODE")) { - s_wimpy_mode = true; - } - - if (s_wimpy_mode && !gListTests) + if (gWimpyMode && !gListTests) { log_info("\n"); log_info("*** WARNING: Testing in Wimpy mode! ***\n"); @@ -652,7 +644,6 @@ static void printUsage( void ) { log_info("test_select: [-w] \n"); log_info("\tdefault is to run the full test on the default device\n"); - log_info("\t-w run in wimpy mode (smoke test)\n"); log_info("\t-[2^n] Set wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", s_wimpy_reduction_factor); log_info("\n"); log_info("Test names:\n"); diff --git a/test_conformance/thread_dimensions/test_thread_dimensions.cpp b/test_conformance/thread_dimensions/test_thread_dimensions.cpp index ed163ac7..4dd8c7ae 100644 --- a/test_conformance/thread_dimensions/test_thread_dimensions.cpp +++ b/test_conformance/thread_dimensions/test_thread_dimensions.cpp @@ -19,6 +19,7 @@ #include "harness/errorHelpers.h" #include "harness/conversions.h" #include "harness/mt19937.h" +#include "harness/parseParameters.h" #include #include @@ -524,9 +525,9 @@ int test_thread_dimensions(cl_device_id device, cl_context context, cl_uint max_x_size = 1, min_x_size = 1, max_y_size = 1, min_y_size = 1, max_z_size = 1, min_z_size = 1; - if (getenv("CL_WIMPY_MODE") && !quick_test) + if (gWimpyMode && !quick_test) { - log_info("CL_WIMPY_MODE enabled, skipping test\n"); + log_info("Wimpy mode enabled, skipping test\n"); return 0; } diff --git a/test_conformance/vectors/defines.h b/test_conformance/vectors/defines.h index 0ea0b00d..7068cf18 100644 --- a/test_conformance/vectors/defines.h +++ b/test_conformance/vectors/defines.h @@ -26,7 +26,6 @@ extern int g_arrVecSizes[NUM_VECTOR_SIZES]; extern int g_arrVecSteps[NUM_VECTOR_SIZES]; -extern bool g_wimpyMode; extern const char *g_arrVecSizeNames[NUM_VECTOR_SIZES]; extern size_t g_arrVecAlignMasks[NUM_VECTOR_SIZES]; diff --git a/test_conformance/vectors/globals.cpp b/test_conformance/vectors/globals.cpp index 6dee6d96..b7da9b6e 100644 --- a/test_conformance/vectors/globals.cpp +++ b/test_conformance/vectors/globals.cpp @@ -32,8 +32,6 @@ size_t g_arrVecAlignMasks[NUM_VECTOR_SIZES] = { (size_t)0xf // 16 }; -bool g_wimpyMode = false; - ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes diff --git a/test_conformance/vectors/structs.cpp b/test_conformance/vectors/structs.cpp index 0098e414..c0757017 100644 --- a/test_conformance/vectors/structs.cpp +++ b/test_conformance/vectors/structs.cpp @@ -18,6 +18,8 @@ #include "defines.h" +#include "harness/parseParameters.h" + #define DEBUG_MEM_ALLOC 0 /** typedef struct _bufferStruct @@ -223,7 +225,7 @@ void initContents(bufferStruct *pBufferStruct, clState *pClState, break; } case 4: { - if (!g_wimpyMode) + if (!gWimpyMode) { uint32_t *ui = (uint32_t *)(pBufferStruct->m_pIn); for (i = 0; i < countIn; ++i) From 913e6e43880d3df3cbcc167dd46a0913f4bc7a22 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Tue, 2 Sep 2025 17:48:51 +0100 Subject: [PATCH 55/61] Fix test_api negative_set_read_write_image_arg. (#2510) When the implementation does not have image support, the functions used are not required to be defined and the test should be skipped. --- test_conformance/api/test_kernels.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test_conformance/api/test_kernels.cpp b/test_conformance/api/test_kernels.cpp index 1f81d9d9..d7b30462 100644 --- a/test_conformance/api/test_kernels.cpp +++ b/test_conformance/api/test_kernels.cpp @@ -807,6 +807,8 @@ REGISTER_TEST(negative_set_read_write_image_arg) constexpr cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT8 }; const int size_dim = 128; + PASSIVE_REQUIRE_IMAGE_SUPPORT(device); + // Setup the test error = create_single_kernel_helper(context, &program, nullptr, 2, test_kernels, nullptr); From 1aeca1360b55afa245ef505af6f745e817559434 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 9 Sep 2025 17:40:50 +0200 Subject: [PATCH 56/61] Added support for cl_ext_float_atomics in CBasicTestFetchSub with atomic_half (#2366) Related to #2142, according to the work plan, extending CBasicTestFetchSub with support for atomic_half. I wasn't able to test that PR entirely due to missing CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT/CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT capabilities for atomic_half. I appreciate reviewers' attention, thanks. --- test_conformance/c11_atomics/host_atomics.h | 26 +- test_conformance/c11_atomics/test_atomics.cpp | 246 ++++++++++++++++-- 2 files changed, 245 insertions(+), 27 deletions(-) diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index 3bc88a23..e1787849 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -19,6 +19,8 @@ #include "harness/testHarness.h" #include +#include "CL/cl_half.h" + #ifdef WIN32 #include "Windows.h" #endif @@ -88,6 +90,8 @@ enum TExplicitMemoryOrderType #define HOST_FLAG cl_int +extern cl_half_rounding_mode gHalfRoundingMode; + // host atomic functions void host_atomic_thread_fence(TExplicitMemoryOrderType order); @@ -120,14 +124,26 @@ template CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { -#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) - return InterlockedExchangeSubtract(a, c); + if constexpr (std::is_same_v) + { + static std::mutex mx; + std::lock_guard lock(mx); + CorrespondingType old_value = *a; + *a = cl_half_from_float((cl_half_to_float(*a) - cl_half_to_float(c)), + gHalfRoundingMode); + return old_value; + } + else + { +#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) + return InterlockedExchangeSubtract(a, c); #elif defined(__GNUC__) - return __sync_fetch_and_sub(a, c); + return __sync_fetch_and_sub(a, c); #else - log_info("Host function not implemented: atomic_fetch_sub\n"); - return 0; + log_info("Host function not implemented: atomic_fetch_sub\n"); + return 0; #endif + } } template diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index a2be0549..73599dc1 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -1488,46 +1488,239 @@ REGISTER_TEST(svm_atomic_fetch_add) template class CBasicTestFetchSub : public CBasicTestMemOrderScope { + + double min_range; + double max_range; + double max_error; + std::vector ref_vals; + public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, - useSVM) - {} - virtual std::string ProgramCore() + useSVM), + min_range(-999.0), max_range(999.0), max_error(0.0) + { + if constexpr (std::is_same_v) + { + StartValue(0); + CBasicTestMemOrderScope::OldValueCheck(false); + } + } + template + float subtract_halfs(Iterator begin, Iterator end) + { + cl_half res = 0; + for (auto it = begin; it != end; ++it) + { + res = cl_half_from_float(cl_half_to_float(res) + - cl_half_to_float(*it), + gHalfRoundingMode); + } + return cl_half_to_float(res); + } + bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) override + { + if constexpr (std::is_same_v) + { + if (threadCount > ref_vals.size()) + { + ref_vals.resize(threadCount); + + for (cl_uint i = 0; i < threadCount; i++) + ref_vals[i] = cl_half_from_float( + get_random_float(min_range, max_range, d), + gHalfRoundingMode); + + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * ref_vals.size()); + + // Estimate highest possible summation error for given set. + std::vector sums; + std::sort(ref_vals.begin(), ref_vals.end(), + [](cl_half a, cl_half b) { + return cl_half_to_float(a) < cl_half_to_float(b); + }); + + sums.push_back( + subtract_halfs(ref_vals.begin(), ref_vals.end())); + sums.push_back( + subtract_halfs(ref_vals.rbegin(), ref_vals.rend())); + + std::sort(ref_vals.begin(), ref_vals.end(), + [](cl_half a, cl_half b) { + return std::abs(cl_half_to_float(a)) + < std::abs(cl_half_to_float(b)); + }); + + float precise = 0.f; + for (auto elem : ref_vals) precise -= cl_half_to_float(elem); + sums.push_back(precise); + + sums.push_back( + subtract_halfs(ref_vals.begin(), ref_vals.end())); + sums.push_back( + subtract_halfs(ref_vals.rbegin(), ref_vals.rend())); + + std::sort(sums.begin(), sums.end()); + max_error = std::abs(sums.front() - sums.back()); + + // restore unsorted order + memcpy(ref_vals.data(), startRefValues, + sizeof(HostDataType) * ref_vals.size()); + } + else + { + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * threadCount); + } + return true; + } + return false; + } + std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return " oldValues[tid] = atomic_fetch_sub" + postfix - + "(&destMemory[0], tid + 3 +(((" - + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" - + DataType().AddSubOperandTypeName() + ")-1)*8)" + memoryOrderScope - + ");\n"; + + if constexpr (std::is_same_v) + { + return " atomic_fetch_sub" + postfix + "(&destMemory[0], (" + + DataType().AddSubOperandTypeName() + ")oldValues[tid]" + + memoryOrderScope + ");\n" + + " oldValues[tid] = atomic_fetch_sub" + postfix + + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + + ")0" + memoryOrderScope + ");\n"; + } + else + { + return " oldValues[tid] = atomic_fetch_sub" + postfix + + "(&destMemory[0], tid + 3 +(((" + + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" + + DataType().AddSubOperandTypeName() + ")-1)*8)" + + memoryOrderScope + ");\n"; + } } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, - volatile HostAtomicType *destMemory, - HostDataType *oldValues) + void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) override { - oldValues[tid] = host_atomic_fetch_sub( - &destMemory[0], - (HostDataType)tid + 3 - + (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8), - MemoryOrder()); + if constexpr (std::is_same_v) + { + host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid], + MemoryOrder()); + oldValues[tid] = host_atomic_fetch_sub( + &destMemory[tid], (HostDataType)0, MemoryOrder()); + } + else + { + oldValues[tid] = + host_atomic_fetch_sub(&destMemory[0], + (HostDataType)tid + 3 + + (((HostDataType)tid + 3) + << (sizeof(HostDataType) - 1) * 8), + MemoryOrder()); + } } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, - HostDataType *startRefValues, - cl_uint whichDestValue) + bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) override { expected = StartValue(); - for (cl_uint i = 0; i < threadCount; i++) - expected -= (HostDataType)i + 3 - + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); + if constexpr (std::is_same_v) + { + if (whichDestValue == 0) + { + for (cl_uint i = 0; i < threadCount; i++) + { + expected = cl_half_from_float( + cl_half_to_float(expected) + - cl_half_to_float(startRefValues[i]), + gHalfRoundingMode); + } + } + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + expected -= (HostDataType)i + 3 + + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); + } return true; } + bool IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + cl_uint whichDestValue) override + { + if constexpr (std::is_same_v) + { + if (whichDestValue == 0) + return std::abs(cl_half_to_float(expected) + - cl_half_to_float(testValues[whichDestValue])) + > max_error; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, + testValues, + whichDestValue); + } + bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, + HostAtomicType *finalValues) override + { + if (std::is_same::value) + { + correct = true; + for (cl_uint i = 1; i < threadCount; i++) + { + if (refValues[i] != StartValue()) + { + log_error("Thread %d found %d mismatch(es)\n", i, + (cl_uint)refValues[i]); + correct = false; + } + } + return !correct; + } + return CBasicTestMemOrderScope::VerifyRefs(correct, + threadCount, + refValues, + finalValues); + } + int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) override + { + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override + { + if constexpr (std::is_same_v) + { + return threadCount; + } + return CBasicTestMemOrderScope::NumResults(threadCount, + deviceID); + } }; static int test_atomic_fetch_sub_generic(cl_device_id deviceID, @@ -1552,6 +1745,15 @@ static int test_atomic_fetch_sub_generic(cl_device_id deviceID, TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); + + if (gFloatAtomicsSupported) + { + CBasicTestFetchSub test_half( + TYPE_ATOMIC_HALF, useSVM); + EXECUTE_TEST(error, + test_half.Execute(deviceID, context, queue, num_elements)); + } + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchSub From a0bd81d574c8f6d70531c0c45c5223ee0447a02f Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 9 Sep 2025 17:44:35 +0200 Subject: [PATCH 57/61] Added test to verify negative result of clSetKernelArg with CL_INVALID_ARG_SIZE and memory object argument (#2450) Related to #2282, according to work plan from [here](https://github.com/KhronosGroup/OpenCL-CTS/issues/2282#issuecomment-3069182773) --- test_conformance/api/test_kernels.cpp | 47 +++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/test_conformance/api/test_kernels.cpp b/test_conformance/api/test_kernels.cpp index d7b30462..3c156d87 100644 --- a/test_conformance/api/test_kernels.cpp +++ b/test_conformance/api/test_kernels.cpp @@ -87,6 +87,14 @@ const char *sample_two_kernel_program[] = { "\n" "}\n" }; +const char *sample_mem_obj_size_test_kernel = R"( + __kernel void mem_obj_size_test(__global int *src, __global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = src[tid]; + } +)"; + const char *sample_local_size_test_kernel = R"( __kernel void local_size_test(__local int *src, __global int *dst) { @@ -726,6 +734,45 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg) return TEST_PASS; } +REGISTER_TEST(negative_invalid_arg_mem_obj) +{ + cl_int error = CL_SUCCESS; + clProgramWrapper program; + clKernelWrapper mem_obj_arg_kernel; + + // Setup the test + error = + create_single_kernel_helper(context, &program, nullptr, 1, + &sample_mem_obj_size_test_kernel, nullptr); + test_error(error, "Unable to build test program"); + + mem_obj_arg_kernel = clCreateKernel(program, "mem_obj_size_test", &error); + test_error(error, + "Unable to get mem_obj_size_test kernel for built program"); + + std::vector mem_data(256, 0); + clMemWrapper buffer = clCreateBuffer( + context, CL_MEM_USE_HOST_PTR, mem_data.size(), mem_data.data(), &error); + test_error(error, "clCreateBuffer failed"); + + // Run the test - CL_INVALID_ARG_SIZE + error = clSetKernelArg(mem_obj_arg_kernel, 0, sizeof(cl_mem) * 2, &buffer); + test_failure_error_ret( + error, CL_INVALID_ARG_SIZE, + "clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when " + "argument is a memory object and arg_size > sizeof(cl_mem)", + TEST_FAIL); + + error = clSetKernelArg(mem_obj_arg_kernel, 0, sizeof(cl_mem) / 2, &buffer); + test_failure_error_ret( + error, CL_INVALID_ARG_SIZE, + "clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when " + "argument is a memory object and arg_size < sizeof(cl_mem)", + TEST_FAIL); + + return TEST_PASS; +} + REGISTER_TEST(negative_invalid_kernel) { cl_int error = CL_SUCCESS; From df61cad39fdd326ae938bc0a66698753358c9519 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 9 Sep 2025 17:45:35 +0200 Subject: [PATCH 58/61] Added support for cl_ext_float_atomics in CBasicTestFetchMin/Max with atomic_float (#2353) Related to #2142, according to the work plan, extending `CBasicTestFetchMin`/`CBasicTestFetchMax` with support for atomic_float. --- test_conformance/c11_atomics/common.h | 14 +- test_conformance/c11_atomics/host_atomics.h | 35 +- test_conformance/c11_atomics/main.cpp | 1 + test_conformance/c11_atomics/test_atomics.cpp | 362 +++++++++++++++--- 4 files changed, 348 insertions(+), 64 deletions(-) diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index 1fca36b8..d321819f 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -24,8 +24,9 @@ #include "CL/cl_half.h" -#include +#include #include +#include #define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads) #define MAX_HOST_THREADS GetThreadCount() @@ -892,14 +893,15 @@ CBasicTest::ProgramHeader(cl_uint maxNumDestItems) header += std::string("__global volatile ") + aTypeName + " destMemory[" + ss.str() + "] = {\n"; ss.str(""); - if (CBasicTest::DataType()._type - != TYPE_ATOMIC_HALF) - ss << _startValue; - else + == TYPE_ATOMIC_FLOAT) + ss << std::setprecision(10) << _startValue; + else if (CBasicTest::DataType()._type + == TYPE_ATOMIC_HALF) ss << static_cast( cl_half_to_float(static_cast(_startValue))); - + else + ss << _startValue; for (cl_uint i = 0; i < maxNumDestItems; i++) { if (aTypeName == "atomic_flag") diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index e1787849..d9482fb7 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -172,19 +172,34 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp TExplicitMemoryOrderType order_success, TExplicitMemoryOrderType order_failure) { - CorrespondingType tmp; -#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) - tmp = InterlockedCompareExchange(a, desired, *expected); + CorrespondingType tmp; + if constexpr (std::is_same_v) + { + static std::mutex mtx; + std::lock_guard lock(mtx); + tmp = *reinterpret_cast(a); + if (tmp == *expected) + { + *reinterpret_cast(a) = desired; + return true; + } + *expected = tmp; + } + else + { +#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) + + tmp = InterlockedCompareExchange(a, desired, *expected); #elif defined(__GNUC__) - tmp = __sync_val_compare_and_swap(a, *expected, desired); + tmp = __sync_val_compare_and_swap(a, *expected, desired); #else - log_info("Host function not implemented: atomic_compare_exchange\n"); - tmp = 0; + log_info("Host function not implemented: atomic_compare_exchange\n"); + tmp = 0; #endif - if(tmp == *expected) - return true; - *expected = tmp; - return false; + if (tmp == *expected) return true; + *expected = tmp; + } + return false; } template diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp index f089d6da..485445f7 100644 --- a/test_conformance/c11_atomics/main.cpp +++ b/test_conformance/c11_atomics/main.cpp @@ -138,6 +138,7 @@ test_status InitCL(cl_device_id device) { device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT, sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr); test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); + if (is_extension_available(device, "cl_khr_fp16")) { cl_int error = clGetDeviceInfo( diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index 73599dc1..b51f4461 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -2608,54 +2608,178 @@ REGISTER_TEST(svm_atomic_fetch_xor2) template class CBasicTestFetchMin : public CBasicTestMemOrderScope { + double min_range; + double max_range; + public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, - useSVM) + useSVM), + min_range(-999.0), max_range(999.0) { StartValue(DataType().MaxValue()); + if constexpr (std::is_same_v) + { + CBasicTestMemOrderScope::OldValueCheck(false); + } } - virtual std::string ProgramCore() + std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return " oldValues[tid] = atomic_fetch_min" + postfix - + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, - volatile HostAtomicType *destMemory, - HostDataType *oldValues) - { - oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid], - MemoryOrder()); - } - virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, - MTdata d) - { - for (cl_uint i = 0; i < threadCount; i++) + if constexpr (std::is_same_v) { - startRefValues[i] = genrand_int32(d); - if (sizeof(HostDataType) >= 8) - startRefValues[i] |= (HostDataType)genrand_int32(d) << 16; + return " atomic_fetch_min" + postfix + + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n" + + " oldValues[tid] = atomic_fetch_min" + postfix + + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + + ")0" + memoryOrderScope + ");\n"; + } + else + { + return " oldValues[tid] = atomic_fetch_min" + postfix + + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + + ");\n"; + } + } + void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) override + { + if constexpr (std::is_same_v) + { + host_atomic_fetch_min(&destMemory[0], oldValues[tid], + MemoryOrder()); + oldValues[tid] = host_atomic_fetch_min( + &destMemory[tid], (HostDataType)0, MemoryOrder()); + } + else + { + oldValues[tid] = host_atomic_fetch_min( + &destMemory[0], oldValues[tid], MemoryOrder()); + } + } + bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) override + { + if constexpr (std::is_same_v) + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = get_random_float(min_range, max_range, d); + } + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = genrand_int32(d); + if (sizeof(HostDataType) >= 8) + { + cl_ulong v = startRefValues[i]; + v |= (cl_ulong)genrand_int32(d) << 16; + startRefValues[i] = v; + } + } } return true; } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, - HostDataType *startRefValues, - cl_uint whichDestValue) + bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) override { expected = StartValue(); - for (cl_uint i = 0; i < threadCount; i++) + if constexpr (std::is_same_v) { - if (startRefValues[i] < expected) expected = startRefValues[i]; + if (whichDestValue == 0) + for (cl_uint i = 0; i < threadCount; i++) + if (startRefValues[i] < expected) + expected = startRefValues[i]; + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + { + if (startRefValues[i] < expected) expected = startRefValues[i]; + } } return true; } + bool IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + cl_uint whichDestValue) override + { + if (std::is_same::value) + { + if (whichDestValue == 0) + return CBasicTestMemOrderScope:: + IsTestNotAsExpected(expected, testValues, whichDestValue); + return false; // ignore all but 0 which stores final result + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, + testValues, + whichDestValue); + } + bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, + HostAtomicType *finalValues) override + { + if (std::is_same::value) + { + correct = true; + for (cl_uint i = 1; i < threadCount; i++) + { + for (cl_uint i = 1; i < threadCount; i++) + { + log_error("Thread %d found %d mismatch(es)\n", i, + (cl_uint)refValues[i]); + correct = false; + } + } + return !correct; + } + return CBasicTestMemOrderScope::VerifyRefs(correct, + threadCount, + refValues, + finalValues); + } + int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) override + { + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override + { + if constexpr (std::is_same_v) + { + return threadCount; + } + return CBasicTestMemOrderScope::NumResults(threadCount, + deviceID); + } }; static int test_atomic_fetch_min_generic(cl_device_id deviceID, @@ -2680,6 +2804,15 @@ static int test_atomic_fetch_min_generic(cl_device_id deviceID, TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); + + if (gFloatAtomicsSupported) + { + CBasicTestFetchMin test_float( + TYPE_ATOMIC_FLOAT, useSVM); + EXECUTE_TEST( + error, test_float.Execute(deviceID, context, queue, num_elements)); + } + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchMin @@ -2742,54 +2875,178 @@ REGISTER_TEST(svm_atomic_fetch_min) template class CBasicTestFetchMax : public CBasicTestMemOrderScope { + double min_range; + double max_range; + public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, - useSVM) + useSVM), + min_range(-999.0), max_range(999.0) { StartValue(DataType().MinValue()); + if constexpr (std::is_same_v) + { + CBasicTestMemOrderScope::OldValueCheck(false); + } } - virtual std::string ProgramCore() + std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return " oldValues[tid] = atomic_fetch_max" + postfix - + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, - volatile HostAtomicType *destMemory, - HostDataType *oldValues) - { - oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid], - MemoryOrder()); - } - virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, - MTdata d) - { - for (cl_uint i = 0; i < threadCount; i++) + if constexpr (std::is_same_v) { - startRefValues[i] = genrand_int32(d); - if (sizeof(HostDataType) >= 8) - startRefValues[i] |= (HostDataType)genrand_int32(d) << 16; + return " atomic_fetch_max" + postfix + + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n" + + " oldValues[tid] = atomic_fetch_max" + postfix + + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + + ")0" + memoryOrderScope + ");\n"; + } + else + { + return " oldValues[tid] = atomic_fetch_max" + postfix + + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + + ");\n"; + } + } + void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) override + { + if constexpr (std::is_same_v) + { + host_atomic_fetch_max(&destMemory[0], oldValues[tid], + MemoryOrder()); + oldValues[tid] = host_atomic_fetch_max( + &destMemory[tid], (HostDataType)0, MemoryOrder()); + } + else + { + oldValues[tid] = host_atomic_fetch_max( + &destMemory[0], oldValues[tid], MemoryOrder()); + } + } + bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) override + { + if constexpr (std::is_same_v) + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = get_random_float(min_range, max_range, d); + } + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = genrand_int32(d); + if (sizeof(HostDataType) >= 8) + { + cl_ulong v = startRefValues[i]; + v |= (cl_ulong)genrand_int32(d) << 16; + startRefValues[i] = v; + } + } } return true; } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, - HostDataType *startRefValues, - cl_uint whichDestValue) + bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) override { expected = StartValue(); - for (cl_uint i = 0; i < threadCount; i++) + if constexpr (std::is_same_v) { - if (startRefValues[i] > expected) expected = startRefValues[i]; + if (whichDestValue == 0) + for (cl_uint i = 0; i < threadCount; i++) + if (startRefValues[i] > expected) + expected = startRefValues[i]; + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + { + if (startRefValues[i] > expected) expected = startRefValues[i]; + } } return true; } + bool IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + cl_uint whichDestValue) override + { + if (std::is_same::value) + { + if (whichDestValue == 0) + return CBasicTestMemOrderScope:: + IsTestNotAsExpected(expected, testValues, whichDestValue); + return false; // ignore all but 0 which stores final result + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, + testValues, + whichDestValue); + } + bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, + HostAtomicType *finalValues) override + { + if (std::is_same::value) + { + correct = true; + for (cl_uint i = 1; i < threadCount; i++) + { + if (refValues[i] != StartValue()) + { + log_error("Thread %d found %d mismatch(es)\n", i, + (cl_uint)refValues[i]); + correct = false; + } + } + return !correct; + } + return CBasicTestMemOrderScope::VerifyRefs(correct, + threadCount, + refValues, + finalValues); + } + int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) override + { + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override + { + if constexpr (std::is_same_v) + { + return threadCount; + } + return CBasicTestMemOrderScope::NumResults(threadCount, + deviceID); + } }; static int test_atomic_fetch_max_generic(cl_device_id deviceID, @@ -2814,6 +3071,15 @@ static int test_atomic_fetch_max_generic(cl_device_id deviceID, TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); + + if (gFloatAtomicsSupported) + { + CBasicTestFetchMax test_float( + TYPE_ATOMIC_FLOAT, useSVM); + EXECUTE_TEST( + error, test_float.Execute(deviceID, context, queue, num_elements)); + } + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchMax From 4fc861358ec5e66106195ca2daefa42dd38cdcd6 Mon Sep 17 00:00:00 2001 From: Starla Huang <82885378+starlahuang@users.noreply.github.com> Date: Tue, 9 Sep 2025 16:50:19 +0100 Subject: [PATCH 59/61] Resolve #852 issue (#1220) There are two changes in total. The first one to fix a small issue of current working directory so that second change can be applied. And the second one is for resolving #852 Removing hard-coded SPIR-V binaries in clUnloadPlatformCompiler tests. Fixes #852 --------- Signed-off-by: Ahmed Hesham Signed-off-by: Xing Huang --- test_conformance/compiler/CMakeLists.txt | 26 ++++- .../{ => spirv_asm}/write_kernel.spvasm32 | 0 .../{ => spirv_asm}/write_kernel.spvasm64 | 0 test_conformance/compiler/spirv_asm_list.txt | 4 + test_conformance/compiler/test_compile.cpp | 104 +++++------------- .../test_unload_platform_compiler.cpp | 59 +++++++--- ...est_unload_platform_compiler_resources.hpp | 44 -------- 7 files changed, 103 insertions(+), 134 deletions(-) rename test_conformance/compiler/{ => spirv_asm}/write_kernel.spvasm32 (100%) rename test_conformance/compiler/{ => spirv_asm}/write_kernel.spvasm64 (100%) create mode 100644 test_conformance/compiler/spirv_asm_list.txt diff --git a/test_conformance/compiler/CMakeLists.txt b/test_conformance/compiler/CMakeLists.txt index b64d3b31..498c6218 100644 --- a/test_conformance/compiler/CMakeLists.txt +++ b/test_conformance/compiler/CMakeLists.txt @@ -17,10 +17,31 @@ set(${MODULE_NAME}_SOURCES include(../CMakeCommon.txt) +# Include the relative paths to SPV assembly files +configure_file(spirv_asm_list.txt ${CMAKE_CURRENT_BINARY_DIR}/spirv_asm_list.txt) +include(${CMAKE_CURRENT_BINARY_DIR}/spirv_asm_list.txt) + +# Determine the corresponding binary outputs to the SPV assembly input files +set(COMPILER_ASM_REL_PATH spirv_asm) +set(COMPILER_ASM_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${COMPILER_ASM_REL_PATH}") +set(COMPILER_SPV_PATH "${CMAKE_CURRENT_BINARY_DIR}/spirv_bin") + # Copy the required test include directories into the build directory. if(NOT DEFINED COMPILER_TEST_RESOURCES) set(COMPILER_TEST_RESOURCES $) endif() + +set(COMPILER_SPV_EXTRA "") +if(SPIRV_TOOLS_DIR AND IS_ABSOLUTE "${SPIRV_TOOLS_DIR}" AND + IS_DIRECTORY "${SPIRV_TOOLS_DIR}") + message("Using SPIR-V tools from '${SPIRV_TOOLS_DIR}'") + set(COMPILER_SPV_EXTRA "--assembler=${SPIRV_TOOLS_DIR}/spirv-as" "--validator=${SPIRV_TOOLS_DIR}/spirv-val") +endif() +set(COMPILER_ASSEMBLY_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../spirv_new/spirv_asm/assemble_spirv.py) + +include(CMakePrintHelpers) +cmake_print_variables(COMPILER_ASSEMBLY_SCRIPT) + add_custom_command( COMMENT "Copying compiler test resources..." TARGET ${${MODULE_NAME}_OUT} @@ -30,7 +51,10 @@ add_custom_command( ${COMPILER_TEST_RESOURCES}/includeTestDirectory COMMAND ${CMAKE_COMMAND} -E copy_directory ${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory - ${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory) + ${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory + COMMAND ${COMPILER_ASSEMBLY_SCRIPT} --source-dir "${COMPILER_ASM_PATH}" --output-dir "${COMPILER_SPV_PATH}" ${COMPILER_SPV_EXTRA} --verbose + DEPENDS ${COMPILER_ASSEMBLY_SCRIPT} ${COMPILER_ASM} + VERBATIM) include(GNUInstallDirs) diff --git a/test_conformance/compiler/write_kernel.spvasm32 b/test_conformance/compiler/spirv_asm/write_kernel.spvasm32 similarity index 100% rename from test_conformance/compiler/write_kernel.spvasm32 rename to test_conformance/compiler/spirv_asm/write_kernel.spvasm32 diff --git a/test_conformance/compiler/write_kernel.spvasm64 b/test_conformance/compiler/spirv_asm/write_kernel.spvasm64 similarity index 100% rename from test_conformance/compiler/write_kernel.spvasm64 rename to test_conformance/compiler/spirv_asm/write_kernel.spvasm64 diff --git a/test_conformance/compiler/spirv_asm_list.txt b/test_conformance/compiler/spirv_asm_list.txt new file mode 100644 index 00000000..8150ce28 --- /dev/null +++ b/test_conformance/compiler/spirv_asm_list.txt @@ -0,0 +1,4 @@ +set(COMPILER_SPIRV_NEW_ASM +compiler_spirv_asm/write_kernel.spvasm32 +compiler_spirv_asm/write_kernel.spvasm64 +) diff --git a/test_conformance/compiler/test_compile.cpp b/test_conformance/compiler/test_compile.cpp index 907de9cb..70ca9449 100644 --- a/test_conformance/compiler/test_compile.cpp +++ b/test_conformance/compiler/test_compile.cpp @@ -14,6 +14,9 @@ // limitations under the License. // #include "testBase.h" + +#include + #if defined(_WIN32) #include #elif defined(__linux__) || defined(__APPLE__) @@ -3020,15 +3023,6 @@ REGISTER_TEST(execute_after_embedded_header_link) return 0; } -#if defined(__APPLE__) || defined(__linux) -#define _mkdir(x) mkdir(x, S_IRWXU) -#define _chdir chdir -#define _rmdir rmdir -#define _unlink unlink -#else -#include -#endif - REGISTER_TEST(execute_after_included_header_link) { int error; @@ -3047,100 +3041,60 @@ REGISTER_TEST(execute_after_included_header_link) } /* setup */ -#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__)) - /* Some tests systems doesn't allow one to write in the test directory */ - if (_chdir("/tmp") != 0) + std::error_code ec; + auto temp_dir_path = std::filesystem::temp_directory_path(ec); + if (ec) { - log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n", - __FILE__, __LINE__); + log_error("ERROR: Unable to get the temporary directory path\n"); return -1; } -#endif - if (_mkdir("foo") != 0) + temp_dir_path = temp_dir_path / "foo" / "bar"; + std::filesystem::create_directories(temp_dir_path, ec); + if (ec) { - log_error("ERROR: Unable to create directory foo! (in %s:%d)\n", - __FILE__, __LINE__); + log_error("ERROR: Unable to create directory: %s, error: %d (%s)\n", + temp_dir_path.u8string().c_str(), ec.value(), + ec.message().c_str()); return -1; } - if (_mkdir("foo/bar") != 0) - { - log_error("ERROR: Unable to create directory foo/bar! (in %s:%d)\n", - __FILE__, __LINE__); - return -1; - } - if (_chdir("foo/bar") != 0) - { - log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n", - __FILE__, __LINE__); - return -1; - } - FILE *simple_header_file = fopen(simple_header_name, "w"); + + const auto simple_header_path = temp_dir_path / simple_header_name; + FILE *simple_header_file = + fopen(simple_header_path.u8string().c_str(), "w"); if (simple_header_file == NULL) { log_error("ERROR: Unable to create simple header file %s! (in %s:%d)\n", - simple_header_name, __FILE__, __LINE__); + simple_header_path.u8string().c_str(), __FILE__, __LINE__); return -1; } if (fprintf(simple_header_file, "%s", simple_header) < 0) { log_error( "ERROR: Unable to write to simple header file %s! (in %s:%d)\n", - simple_header_name, __FILE__, __LINE__); + simple_header_path.u8string().c_str(), __FILE__, __LINE__); return -1; } if (fclose(simple_header_file) != 0) { log_error("ERROR: Unable to close simple header file %s! (in %s:%d)\n", - simple_header_name, __FILE__, __LINE__); + simple_header_path.u8string().c_str(), __FILE__, __LINE__); return -1; } - if (_chdir("../..") != 0) - { - log_error("ERROR: Unable to change to original working directory! (in " - "%s:%d)\n", - __FILE__, __LINE__); - return -1; - } -#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__)) - error = clCompileProgram(program, 1, &device, "-I/tmp/foo/bar", 0, NULL, + + const std::string include_path = + std::string("-I") + temp_dir_path.generic_u8string(); + error = clCompileProgram(program, 1, &device, include_path.c_str(), 0, NULL, NULL, NULL, NULL); -#else - error = clCompileProgram(program, 1, &device, "-Ifoo/bar", 0, NULL, NULL, - NULL, NULL); -#endif test_error(error, "Unable to compile a simple program with included header"); /* cleanup */ - if (_chdir("foo/bar") != 0) + std::filesystem::remove_all(temp_dir_path, ec); + if (ec) { - log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n", - __FILE__, __LINE__); - return -1; - } - if (_unlink(simple_header_name) != 0) - { - log_error("ERROR: Unable to remove simple header file %s! (in %s:%d)\n", - simple_header_name, __FILE__, __LINE__); - return -1; - } - if (_chdir("../..") != 0) - { - log_error("ERROR: Unable to change to original working directory! (in " - "%s:%d)\n", - __FILE__, __LINE__); - return -1; - } - if (_rmdir("foo/bar") != 0) - { - log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n", - __FILE__, __LINE__); - return -1; - } - if (_rmdir("foo") != 0) - { - log_error("ERROR: Unable to remove directory foo! (in %s:%d)\n", - __FILE__, __LINE__); + log_error("ERROR: Unable to delete directory: %s, error: %d (%s)", + temp_dir_path.u8string().c_str(), ec.value(), + ec.message().c_str()); return -1; } diff --git a/test_conformance/compiler/test_unload_platform_compiler.cpp b/test_conformance/compiler/test_unload_platform_compiler.cpp index 84442080..bb41f64d 100644 --- a/test_conformance/compiler/test_unload_platform_compiler.cpp +++ b/test_conformance/compiler/test_unload_platform_compiler.cpp @@ -25,6 +25,43 @@ #include #include #include +#include + +#if defined(_WIN32) +const std::string slash = "\\"; +#else +const std::string slash = "/"; +#endif +std::string compilerSpvBinaries = "test_conformance" + slash + "compiler" + + slash + "spirv_bin" + slash + "write_kernel.spv"; + +const std::string spvExt = ".spv"; + +std::vector readBinary(const char *file_name) +{ + using namespace std; + + ifstream file(file_name, ios::in | ios::binary | ios::ate); + + std::vector tmpBuffer(0); + + if (file.is_open()) + { + size_t size = file.tellg(); + tmpBuffer.resize(size); + file.seekg(0, ios::beg); + file.read(&tmpBuffer[0], size); + file.close(); + } + else + { + log_error("File %s not found\n", file_name); + } + + std::vector result(tmpBuffer.begin(), tmpBuffer.end()); + + return result; +} namespace { @@ -299,18 +336,12 @@ public: throw unload_test_failure("Failure getting device address bits"); } - switch (address_bits) - { - case 32: - m_spirv_binary = write_kernel_32_spv.data(); - m_spirv_size = write_kernel_32_spv.size(); - break; - case 64: - m_spirv_binary = write_kernel_64_spv.data(); - m_spirv_size = write_kernel_64_spv.size(); - break; - default: throw unload_test_failure("Invalid address bits"); - } + std::vector kernel_buffer; + + std::string file_name = + compilerSpvBinaries + std::to_string(address_bits); + m_spirv_binary = readBinary(file_name.c_str()); + m_spirv_size = m_spirv_binary.size(); } void create() final @@ -320,7 +351,7 @@ public: assert(nullptr == m_program); cl_int err = CL_INVALID_PLATFORM; - m_program = m_CreateProgramWithIL(m_context, m_spirv_binary, + m_program = m_CreateProgramWithIL(m_context, &m_spirv_binary[0], m_spirv_size, &err); if (CL_SUCCESS != err) throw unload_test_failure("clCreateProgramWithIL()", err); @@ -347,7 +378,7 @@ public: } private: - void *m_spirv_binary; + std::vector m_spirv_binary; size_t m_spirv_size; bool m_enabled; diff --git a/test_conformance/compiler/test_unload_platform_compiler_resources.hpp b/test_conformance/compiler/test_unload_platform_compiler_resources.hpp index 82f87ffc..a529c212 100644 --- a/test_conformance/compiler/test_unload_platform_compiler_resources.hpp +++ b/test_conformance/compiler/test_unload_platform_compiler_resources.hpp @@ -4,47 +4,3 @@ static const char write_kernel_source[] = R"( kernel void write_kernel(global unsigned int *p) { *p = 42; })"; - -/* Assembled SPIR-V 1.0 binary from write_kernel.spvasm64 */ -static std::array write_kernel_64_spv{ - { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x0e, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, - 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x37, 0x00, 0x03, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 } -}; - -/* Assembled SPIR-V 1.0 binary from write_kernel.spvasm32 */ -static std::array write_kernel_32_spv{ - { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x0e, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, - 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x37, 0x00, 0x03, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 } -}; From 3f6dd3c3336d877be50760a47a8fee823ece746d Mon Sep 17 00:00:00 2001 From: Yilong Guo Date: Tue, 9 Sep 2025 23:53:13 +0800 Subject: [PATCH 60/61] Fix buffer overflow in MutableDispatchWorkDim test (#2511) The test had critical buffer overflow issues: 1. Buffer size was calculated incorrectly: used update_elements (4) instead of total work items. For 3D kernels, this meant allocating 16 bytes when 64*4=256 bytes were needed for the updated 4x4x4 grid. 2. Original 2x2x2 grid writes 8 elements (32 bytes) but buffer was only 16 bytes, causing overflow on first execution. 3. Updated 4x4x4 grid writes 64 elements (256 bytes) with massive overflow into adjacent memory. 4. Verify function only checked one dimension instead of total elements in the 3D grid. Fixed by: - Calculating total work items as product of all dimensions - Using update_total_elements (64) for buffer allocation - Updating Verify calls to check correct number of elements - Adding constants for original_total_elements and update_total_elements --- .../mutable_command_work_dim.cpp | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_work_dim.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_work_dim.cpp index 43df9fbe..b317884b 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_work_dim.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_work_dim.cpp @@ -36,7 +36,7 @@ struct MutableDispatchWorkDim : public InfoMutableCommandBufferTest cl_int SetUp(int elements) override { - result_data.resize(update_elements); + result_data.resize(update_total_elements); return InfoMutableCommandBufferTest::SetUp(elements); } @@ -53,7 +53,7 @@ struct MutableDispatchWorkDim : public InfoMutableCommandBufferTest return !mutable_support || InfoMutableCommandBufferTest::Skip(); } - bool Verify(cl_mem buffer, cl_uint gid_elements) + bool Verify(cl_mem buffer, cl_uint expected_value, size_t total_elements) { std::memset(result_data.data(), 0, alloc_size); cl_int error = @@ -61,13 +61,13 @@ struct MutableDispatchWorkDim : public InfoMutableCommandBufferTest result_data.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); - for (size_t i = 0; i < gid_elements; i++) + for (size_t i = 0; i < total_elements; i++) { - if (result_data[i] != gid_elements) + if (result_data[i] != expected_value) { log_error("Data failed to verify at index %zu. " "Expected %u, result was %u\n", - i, gid_elements, result_data[i]); + i, expected_value, result_data[i]); return false; } } @@ -134,15 +134,15 @@ struct MutableDispatchWorkDim : public InfoMutableCommandBufferTest test_error(error, "clFinish failed."); // Verify results before any update - if (!Verify(stream1, global_size_3D[0])) + if (!Verify(stream1, global_size_3D[0], original_total_elements)) { return TEST_FAIL; } - if (!Verify(stream2, global_size_3D[1])) + if (!Verify(stream2, global_size_3D[1], original_total_elements)) { return TEST_FAIL; } - if (!Verify(stream3, global_size_3D[2])) + if (!Verify(stream3, global_size_3D[2], original_total_elements)) { return TEST_FAIL; } @@ -178,15 +178,15 @@ struct MutableDispatchWorkDim : public InfoMutableCommandBufferTest test_error(error, "clEnqueueCommandBufferKHR failed"); // Verify update is reflected in buffer output. - if (!Verify(stream1, update_global_size_3D[0])) + if (!Verify(stream1, update_global_size_3D[0], update_total_elements)) { return TEST_FAIL; } - if (!Verify(stream2, update_global_size_3D[1])) + if (!Verify(stream2, update_global_size_3D[1], update_total_elements)) { return TEST_FAIL; } - if (!Verify(stream3, update_global_size_3D[2])) + if (!Verify(stream3, update_global_size_3D[2], update_total_elements)) { return TEST_FAIL; } @@ -205,8 +205,13 @@ struct MutableDispatchWorkDim : public InfoMutableCommandBufferTest static constexpr std::array update_global_size_3D = { update_elements, update_elements, update_elements }; - // Size in bytes of each of the 3 cl_mem buffers - static const size_t alloc_size = update_elements * sizeof(cl_uint); + // Total number of work items in original and updated grids + static const size_t original_total_elements = + original_elements * original_elements * original_elements; + static const size_t update_total_elements = + update_elements * update_elements * update_elements; + // Size in bytes of each of the 3 cl_mem buffers (using the larger size) + static const size_t alloc_size = update_total_elements * sizeof(cl_uint); cl_mutable_command_khr command = nullptr; std::vector result_data; From 87fcb5b45c318dcbf471a92f0919afdcb51f1b87 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 9 Sep 2025 17:54:19 +0200 Subject: [PATCH 61/61] Make subgroup tests run on devices without non uniform work groups (#2513) This allows implementors to support `cl_khr_subgroup_ballot` and `cl_khr_subgroup_non_uniform_vote` without requiring non uniform workgroup support. --- .../subgroups/test_subgroup_ballot.cpp | 19 ++++++++++++++++--- .../test_subgroup_non_uniform_vote.cpp | 19 ++++++++++++++++--- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index 69976c50..04f90e17 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -896,8 +896,21 @@ REGISTER_TEST(subgroup_functions_ballot) return TEST_SKIPPED_ITSELF; } - constexpr size_t global_work_size = 170; - constexpr size_t local_work_size = 64; + int error = 0; + + // Non-uniform work-groups are an optional feature from 3.0 onward. + cl_bool device_supports_non_uniform_wg = CL_TRUE; + if (get_device_cl_version(device) >= Version(3, 0)) + { + error = clGetDeviceInfo( + device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, sizeof(cl_bool), + &device_supports_non_uniform_wg, nullptr); + test_error(error, "clGetDeviceInfo failed"); + } + + const size_t global_work_size = device_supports_non_uniform_wg ? 170 : 192; + const size_t local_work_size = 64; + WorkGroupParams test_params(global_work_size, local_work_size); test_params.save_kernel_source(sub_group_ballot_mask_source); test_params.save_kernel_source(sub_group_non_uniform_broadcast_source, @@ -907,7 +920,7 @@ REGISTER_TEST(subgroup_functions_ballot) RunTestForType rft(device, context, queue, num_elements, test_params); // non uniform broadcast functions - int error = run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); error |= run_non_uniform_broadcast_for_type(rft); error |= run_non_uniform_broadcast_for_type(rft); error |= run_non_uniform_broadcast_for_type(rft); diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp index 74e9144e..b37c1db9 100644 --- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp +++ b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp @@ -261,15 +261,28 @@ REGISTER_TEST(subgroup_functions_non_uniform_vote) return TEST_SKIPPED_ITSELF; } - constexpr size_t global_work_size = 170; - constexpr size_t local_work_size = 64; + int error = 0; + + // Non-uniform work-groups are an optional feature from 3.0 onward. + cl_bool device_supports_non_uniform_wg = CL_TRUE; + if (get_device_cl_version(device) >= Version(3, 0)) + { + error = clGetDeviceInfo( + device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, sizeof(cl_bool), + &device_supports_non_uniform_wg, nullptr); + test_error(error, "clGetDeviceInfo failed"); + } + + const size_t global_work_size = device_supports_non_uniform_wg ? 170 : 192; + const size_t local_work_size = 64; + WorkGroupParams test_params(global_work_size, local_work_size, 3); test_params.save_kernel_source( sub_group_non_uniform_any_all_all_equal_source); test_params.save_kernel_source(sub_group_elect_source, "sub_group_elect"); RunTestForType rft(device, context, queue, num_elements, test_params); - int error = run_vote_all_equal_for_type(rft); + error |= run_vote_all_equal_for_type(rft); error |= run_vote_all_equal_for_type(rft); error |= run_vote_all_equal_for_type(rft); error |= run_vote_all_equal_for_type(rft);