diff --git a/test_conformance/subgroups/main.cpp b/test_conformance/subgroups/main.cpp index 025d31df..dd53292b 100644 --- a/test_conformance/subgroups/main.cpp +++ b/test_conformance/subgroups/main.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -23,13 +23,13 @@ MTdata gMTdata; test_definition test_list[] = { - ADD_TEST( sub_group_info ), - ADD_TEST( work_item_functions ), - ADD_TEST( work_group_functions ), - ADD_TEST( barrier_functions ), + ADD_TEST(sub_group_info), + ADD_TEST(work_item_functions), + ADD_TEST(work_group_functions), + ADD_TEST(barrier_functions), }; -const int test_num = ARRAY_SIZE( test_list ); +const int test_num = ARRAY_SIZE(test_list); static test_status checkSubGroupsExtension(cl_device_id device) { @@ -38,19 +38,24 @@ static test_status checkSubGroupsExtension(cl_device_id device) auto version = get_device_cl_version(device); auto expected_min_version = Version(2, 0); - if (version < expected_min_version) { - version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str()); + if (version < expected_min_version) + { + version_expected_info("Test", expected_min_version.to_string().c_str(), + version.to_string().c_str()); return TEST_SKIP; } bool hasExtension = is_extension_available(device, "cl_khr_subgroups"); - if ((version == expected_min_version) && !hasExtension) { - log_info("Device does not support 'cl_khr_subgroups'. Skipping the test.\n"); + if ((version == expected_min_version) && !hasExtension) + { + log_info( + "Device does not support 'cl_khr_subgroups'. Skipping the test.\n"); return TEST_SKIP; } - if ((version > expected_min_version) && !hasExtension) { + if ((version > expected_min_version) && !hasExtension) + { log_error("'cl_khr_subgroups' is a required extension, failing.\n"); return TEST_FAIL; } @@ -58,7 +63,8 @@ static test_status checkSubGroupsExtension(cl_device_id device) return TEST_PASS; } -static test_status InitCL(cl_device_id device) { +static test_status InitCL(cl_device_id device) +{ auto version = get_device_cl_version(device); test_status ret = TEST_PASS; @@ -68,13 +74,15 @@ static test_status InitCL(cl_device_id device) { int error; error = clGetDeviceInfo(device, CL_DEVICE_MAX_NUM_SUB_GROUPS, - sizeof(max_sub_groups), &max_sub_groups, NULL); - if (error != CL_SUCCESS) { + sizeof(max_sub_groups), &max_sub_groups, NULL); + if (error != CL_SUCCESS) + { print_error(error, "Unable to get max number of subgroups"); return TEST_FAIL; } - if (max_sub_groups == 0) { + if (max_sub_groups == 0) + { ret = TEST_SKIP; } } @@ -83,12 +91,11 @@ static test_status InitCL(cl_device_id device) { ret = checkSubGroupsExtension(device); } return ret; - } int main(int argc, const char *argv[]) { gMTdata = init_genrand(0); - return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0, InitCL); + return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0, + InitCL); } - diff --git a/test_conformance/subgroups/procs.h b/test_conformance/subgroups/procs.h index 1be2508c..f10ef8cf 100644 --- a/test_conformance/subgroups/procs.h +++ b/test_conformance/subgroups/procs.h @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -30,11 +30,16 @@ extern MTdata gMTdata; extern "C" { #endif -extern int test_sub_group_info(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); -extern int test_work_item_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); -extern int test_work_group_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); -extern int test_barrier_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); -extern int test_pipe_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_sub_group_info(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_work_item_functions(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_work_group_functions(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_barrier_functions(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_pipe_functions(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); #ifdef __cplusplus } diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index 60e392d5..9a57e466 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -25,89 +25,212 @@ // Some template helpers template struct TypeName; -template <> struct TypeName { static const char * val() { return "half"; } }; -template <> struct TypeName { static const char * val() { return "uint"; } }; -template <> struct TypeName { static const char * val() { return "int"; } }; -template <> struct TypeName { static const char * val() { return "ulong"; } }; -template <> struct TypeName { static const char * val() { return "long"; } }; -template <> struct TypeName { static const char * val() { return "float"; } }; -template <> struct TypeName { static const char * val() { return "double"; } }; +template <> struct TypeName +{ + static const char *val() { return "half"; } +}; +template <> struct TypeName +{ + static const char *val() { return "uint"; } +}; +template <> struct TypeName +{ + static const char *val() { return "int"; } +}; +template <> struct TypeName +{ + static const char *val() { return "ulong"; } +}; +template <> struct TypeName +{ + static const char *val() { return "long"; } +}; +template <> struct TypeName +{ + static const char *val() { return "float"; } +}; +template <> struct TypeName +{ + static const char *val() { return "double"; } +}; template struct TypeDef; -template <> struct TypeDef { static const char * val() { return "typedef half Type;\n"; } }; -template <> struct TypeDef { static const char * val() { return "typedef uint Type;\n"; } }; -template <> struct TypeDef { static const char * val() { return "typedef int Type;\n"; } }; -template <> struct TypeDef { static const char * val() { return "typedef ulong Type;\n"; } }; -template <> struct TypeDef { static const char * val() { return "typedef long Type;\n"; } }; -template <> struct TypeDef { static const char * val() { return "typedef float Type;\n"; } }; -template <> struct TypeDef { static const char * val() { return "typedef double Type;\n"; } }; +template <> struct TypeDef +{ + static const char *val() { return "typedef half Type;\n"; } +}; +template <> struct TypeDef +{ + static const char *val() { return "typedef uint Type;\n"; } +}; +template <> struct TypeDef +{ + static const char *val() { return "typedef int Type;\n"; } +}; +template <> struct TypeDef +{ + static const char *val() { return "typedef ulong Type;\n"; } +}; +template <> struct TypeDef +{ + static const char *val() { return "typedef long Type;\n"; } +}; +template <> struct TypeDef +{ + static const char *val() { return "typedef float Type;\n"; } +}; +template <> struct TypeDef +{ + static const char *val() { return "typedef double Type;\n"; } +}; template struct TypeIdentity; -// template <> struct TypeIdentity { static cl_half val() { return (cl_half)0.0; } }; -// template <> struct TypeIdentity { static cl_half val() { return -(cl_half)65536.0; } }; -// template <> struct TypeIdentity { static cl_half val() { return (cl_half)65536.0; } }; +// template <> struct TypeIdentity { static cl_half val() { return +// (cl_half)0.0; } }; template <> struct TypeIdentity { static +// cl_half val() { return -(cl_half)65536.0; } }; template <> struct +// TypeIdentity { static cl_half val() { return (cl_half)65536.0; } +// }; -template <> struct TypeIdentity { static cl_uint val() { return (cl_uint)0; } }; -template <> struct TypeIdentity { static cl_uint val() { return (cl_uint)0; } }; -template <> struct TypeIdentity { static cl_uint val() { return (cl_uint)0xffffffff; } }; +template <> struct TypeIdentity +{ + static cl_uint val() { return (cl_uint)0; } +}; +template <> struct TypeIdentity +{ + static cl_uint val() { return (cl_uint)0; } +}; +template <> struct TypeIdentity +{ + static cl_uint val() { return (cl_uint)0xffffffff; } +}; -template <> struct TypeIdentity { static cl_int val() { return (cl_int)0 ; } }; -template <> struct TypeIdentity { static cl_int val() { return (cl_int)0x80000000; } }; -template <> struct TypeIdentity { static cl_int val() { return (cl_int)0x7fffffff; } }; +template <> struct TypeIdentity +{ + static cl_int val() { return (cl_int)0; } +}; +template <> struct TypeIdentity +{ + static cl_int val() { return (cl_int)0x80000000; } +}; +template <> struct TypeIdentity +{ + static cl_int val() { return (cl_int)0x7fffffff; } +}; -template <> struct TypeIdentity { static cl_ulong val() { return (cl_ulong)0 ; } }; -template <> struct TypeIdentity { static cl_ulong val() { return (cl_ulong)0 ; } }; -template <> struct TypeIdentity { static cl_ulong val() { return (cl_ulong)0xffffffffffffffffULL ; } }; +template <> struct TypeIdentity +{ + static cl_ulong val() { return (cl_ulong)0; } +}; +template <> struct TypeIdentity +{ + static cl_ulong val() { return (cl_ulong)0; } +}; +template <> struct TypeIdentity +{ + static cl_ulong val() { return (cl_ulong)0xffffffffffffffffULL; } +}; -template <> struct TypeIdentity { static cl_long val() { return (cl_long)0; } }; -template <> struct TypeIdentity { static cl_long val() { return (cl_long)0x8000000000000000ULL; } }; -template <> struct TypeIdentity { static cl_long val() { return (cl_long)0x7fffffffffffffffULL; } }; +template <> struct TypeIdentity +{ + static cl_long val() { return (cl_long)0; } +}; +template <> struct TypeIdentity +{ + static cl_long val() { return (cl_long)0x8000000000000000ULL; } +}; +template <> struct TypeIdentity +{ + static cl_long val() { return (cl_long)0x7fffffffffffffffULL; } +}; -template <> struct TypeIdentity { static float val() { return 0.F; } }; -template <> struct TypeIdentity { static float val() { return -std::numeric_limits::infinity(); } }; -template <> struct TypeIdentity { static float val() { return std::numeric_limits::infinity(); } }; +template <> struct TypeIdentity +{ + static float val() { return 0.F; } +}; +template <> struct TypeIdentity +{ + static float val() { return -std::numeric_limits::infinity(); } +}; +template <> struct TypeIdentity +{ + static float val() { return std::numeric_limits::infinity(); } +}; -template <> struct TypeIdentity { static double val() { return 0.L; } }; +template <> struct TypeIdentity +{ + static double val() { return 0.L; } +}; -template <> struct TypeIdentity { static double val() { return -std::numeric_limits::infinity(); } }; -template <> struct TypeIdentity { static double val() { return std::numeric_limits::infinity(); } }; +template <> struct TypeIdentity +{ + static double val() { return -std::numeric_limits::infinity(); } +}; +template <> struct TypeIdentity +{ + static double val() { return std::numeric_limits::infinity(); } +}; template struct TypeCheck; -template <> struct TypeCheck { static bool val(cl_device_id) { return true; } }; -template <> struct TypeCheck { static bool val(cl_device_id) { return true; } }; +template <> struct TypeCheck +{ + static bool val(cl_device_id) { return true; } +}; +template <> struct TypeCheck +{ + static bool val(cl_device_id) { return true; } +}; -static bool -int64_ok(cl_device_id device) +static bool int64_ok(cl_device_id device) { char profile[128]; int error; - error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL); - if (error) { + error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), + (void *)&profile, NULL); + if (error) + { log_info("clGetDeviceInfo failed with CL_DEVICE_PROFILE\n"); - return false; + return false; } if (strcmp(profile, "EMBEDDED_PROFILE") == 0) - return is_extension_available(device, "cles_khr_int64"); + return is_extension_available(device, "cles_khr_int64"); return true; } -template <> struct TypeCheck { static bool val(cl_device_id device) { return int64_ok(device); } }; -template <> struct TypeCheck { static bool val(cl_device_id device) { return int64_ok(device); } }; -template <> struct TypeCheck { static bool val(cl_device_id) { return true; } }; -template <> struct TypeCheck { - static bool val(cl_device_id device) { return is_extension_available(device, "cl_khr_fp16"); } +template <> struct TypeCheck +{ + static bool val(cl_device_id device) { return int64_ok(device); } }; -template <> struct TypeCheck { - static bool val(cl_device_id device) { +template <> struct TypeCheck +{ + static bool val(cl_device_id device) { return int64_ok(device); } +}; +template <> struct TypeCheck +{ + static bool val(cl_device_id) { return true; } +}; +template <> struct TypeCheck +{ + static bool val(cl_device_id device) + { + return is_extension_available(device, "cl_khr_fp16"); + } +}; +template <> struct TypeCheck +{ + static bool val(cl_device_id device) + { int error; cl_device_fp_config c; - error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c), (void *)&c, NULL); - if (error) { - log_info("clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n"); + error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c), + (void *)&c, NULL); + if (error) + { + log_info( + "clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n"); return false; } return c != 0; @@ -116,10 +239,10 @@ template <> struct TypeCheck { // Run a test kernel to compute the result of a built-in on an input -static int -run_kernel(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t global, size_t local, - void *idata, size_t isize, void *mdata, size_t msize, - void *odata, size_t osize, size_t tsize=0) +static int run_kernel(cl_context context, cl_command_queue queue, + cl_kernel kernel, size_t global, size_t local, + void *idata, size_t isize, void *mdata, size_t msize, + void *odata, size_t osize, size_t tsize = 0) { clMemWrapper in; clMemWrapper xy; @@ -136,8 +259,10 @@ run_kernel(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error); test_error(error, "clCreateBuffer failed"); - if (tsize) { - tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, tsize, NULL, &error); + if (tsize) + { + tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, + tsize, NULL, &error); test_error(error, "clCreateBuffer failed"); } @@ -150,21 +275,26 @@ run_kernel(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out); test_error(error, "clSetKernelArg failed"); - if (tsize) { + if (tsize) + { error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp); test_error(error, "clSetKernelArg failed"); } - error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0, NULL, NULL); + error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0, NULL, + NULL); test_error(error, "clEnqueueWriteBuffer failed"); - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, + NULL, NULL); test_error(error, "clEnqueueNDRangeKernel failed"); - error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL, NULL); + error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL, + NULL); test_error(error, "clEnqueueReadBuffer failed"); - error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0, NULL, NULL); + error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0, NULL, + NULL); test_error(error, "clEnqueueReadBuffer failed"); error = clFinish(queue); @@ -174,10 +304,13 @@ run_kernel(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t } // Driver for testing a single built in function -template -struct test { - static int - run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, const char *kname, const char *src, int dynscl=0) +template +struct test +{ + static int run(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, const char *kname, + const char *src, int dynscl = 0) { size_t tmp; int error; @@ -189,24 +322,25 @@ struct test { clProgramWrapper program; clKernelWrapper kernel; cl_platform_id platform; - cl_int sgmap[2*GSIZE]; + cl_int sgmap[2 * GSIZE]; Ty mapin[LSIZE]; Ty mapout[LSIZE]; - // Make sure a test of type Ty is supported by the device - if (!TypeCheck::val(device)) - return 0; + // Make sure a test of type Ty is supported by the device + if (!TypeCheck::val(device)) return 0; - error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL); + error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), + (void *)&platform, NULL); test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM"); kstrings[0] = "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n" - "#define XY(M,I) M[I].x = get_sub_group_local_id(); M[I].y = get_sub_group_id();\n"; + "#define XY(M,I) M[I].x = get_sub_group_local_id(); " + "M[I].y = get_sub_group_id();\n"; kstrings[1] = TypeDef::val(); kstrings[2] = src; - error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 3, kstrings, kname, "-cl-std=CL2.0"); - if (error != 0) - return error; + error = create_single_kernel_helper_with_build_options( + context, &program, &kernel, 3, kstrings, kname, "-cl-std=CL2.0"); + if (error != 0) return error; // Determine some local dimensions to use for the test. global = GSIZE; @@ -215,31 +349,42 @@ struct test { // Limit it a bit so we have muliple work groups // Ideally this will still be large enough to give us multiple subgroups - if (local > LSIZE) - local = LSIZE; + if (local > LSIZE) local = LSIZE; - // Get the sub group info + // Get the sub group info clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr; - clGetKernelSubGroupInfoKHR_ptr = (clGetKernelSubGroupInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, - "clGetKernelSubGroupInfoKHR"); - if (clGetKernelSubGroupInfoKHR_ptr == NULL) { - log_error("ERROR: clGetKernelSubGroupInfoKHR function not available"); + clGetKernelSubGroupInfoKHR_ptr = (clGetKernelSubGroupInfoKHR_fn) + clGetExtensionFunctionAddressForPlatform( + platform, "clGetKernelSubGroupInfoKHR"); + if (clGetKernelSubGroupInfoKHR_ptr == NULL) + { + log_error( + "ERROR: clGetKernelSubGroupInfoKHR function not available"); return -1; } - error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, - sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL); - test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR"); + error = clGetKernelSubGroupInfoKHR_ptr( + kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, + sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL); + test_error(error, + "clGetKernelSubGroupInfoKHR failed for " + "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR"); subgroup_size = (int)tmp; - error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, - sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL); - test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR"); + error = clGetKernelSubGroupInfoKHR_ptr( + kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, + sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL); + test_error(error, + "clGetKernelSubGroupInfoKHR failed for " + "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR"); num_subgroups = (int)tmp; - // Make sure the number of sub groups is what we expect - if (num_subgroups != (local + subgroup_size - 1)/ subgroup_size) { - log_error("ERROR: unexpected number of subgroups (%d) returned by clGetKernelSubGroupInfoKHR\n", num_subgroups); + // Make sure the number of sub groups is what we expect + if (num_subgroups != (local + subgroup_size - 1) / subgroup_size) + { + log_error("ERROR: unexpected number of subgroups (%d) returned by " + "clGetKernelSubGroupInfoKHR\n", + num_subgroups); return -1; } @@ -248,38 +393,38 @@ struct test { size_t input_array_size = GSIZE; size_t output_array_size = GSIZE; - if (dynscl != 0) { - input_array_size = (int)global / (int)local * num_subgroups * dynscl; - output_array_size = (int)global / (int)local * dynscl; + if (dynscl != 0) + { + input_array_size = + (int)global / (int)local * num_subgroups * dynscl; + output_array_size = (int)global / (int)local * dynscl; } idata.resize(input_array_size); odata.resize(output_array_size); - // Run the kernel once on zeroes to get the map - memset(&idata[0], 0, input_array_size * sizeof(Ty)); - error = run_kernel(context, queue, kernel, global, local, - &idata[0], input_array_size * sizeof(Ty), - sgmap, global*sizeof(cl_int)*2, - &odata[0], output_array_size * sizeof(Ty), - TSIZE*sizeof(Ty)); - if (error) - return error; + // Run the kernel once on zeroes to get the map + memset(&idata[0], 0, input_array_size * sizeof(Ty)); + error = run_kernel(context, queue, kernel, global, local, &idata[0], + input_array_size * sizeof(Ty), sgmap, + global * sizeof(cl_int) * 2, &odata[0], + output_array_size * sizeof(Ty), TSIZE * sizeof(Ty)); + if (error) return error; - // Generate the desired input for the kernel - Fns::gen(&idata[0], mapin, sgmap, subgroup_size, (int)local, (int)global / (int)local); + // Generate the desired input for the kernel + Fns::gen(&idata[0], mapin, sgmap, subgroup_size, (int)local, + (int)global / (int)local); - error = run_kernel(context, queue, kernel, global, local, - &idata[0], input_array_size * sizeof(Ty), - sgmap, global*sizeof(cl_int)*2, - &odata[0], output_array_size * sizeof(Ty), - TSIZE*sizeof(Ty)); - if (error) - return error; + error = run_kernel(context, queue, kernel, global, local, &idata[0], + input_array_size * sizeof(Ty), sgmap, + global * sizeof(cl_int) * 2, &odata[0], + output_array_size * sizeof(Ty), TSIZE * sizeof(Ty)); + if (error) return error; - // Check the result - return Fns::chk(&idata[0], &odata[0], mapin, mapout, sgmap, subgroup_size, (int)local, (int)global / (int)local); + // Check the result + return Fns::chk(&idata[0], &odata[0], mapin, mapout, sgmap, + subgroup_size, (int)local, (int)global / (int)local); } }; diff --git a/test_conformance/subgroups/test_barrier.cpp b/test_conformance/subgroups/test_barrier.cpp index c3f20da0..b85f4d81 100644 --- a/test_conformance/subgroups/test_barrier.cpp +++ b/test_conformance/subgroups/test_barrier.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -18,102 +18,113 @@ #include "harness/conversions.h" #include "harness/typeWrappers.h" -static const char * lbar_source = -"__kernel void test_lbar(const __global Type *in, __global int2 *xy, __global Type *out)\n" -"{\n" -" __local int tmp[200];\n" -" int gid = get_global_id(0);\n" -" int nid = get_sub_group_size();\n" -" int lid = get_sub_group_local_id();\n" -" xy[gid].x = lid;\n" -" xy[gid].y = get_sub_group_id();\n" -" if (get_sub_group_id() == 0) {\n" -" tmp[lid] = in[gid];\n" -" sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n" -" out[gid] = tmp[nid-1-lid];\n" -" } else {\n" -" out[gid] = -in[gid];\n" -" }\n" -"}\n"; +static const char *lbar_source = + "__kernel void test_lbar(const __global Type *in, __global int2 *xy, " + "__global Type *out)\n" + "{\n" + " __local int tmp[200];\n" + " int gid = get_global_id(0);\n" + " int nid = get_sub_group_size();\n" + " int lid = get_sub_group_local_id();\n" + " xy[gid].x = lid;\n" + " xy[gid].y = get_sub_group_id();\n" + " if (get_sub_group_id() == 0) {\n" + " tmp[lid] = in[gid];\n" + " sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n" + " out[gid] = tmp[nid-1-lid];\n" + " } else {\n" + " out[gid] = -in[gid];\n" + " }\n" + "}\n"; -static const char * gbar_source = -"__kernel void test_gbar(const __global Type *in, __global int2 *xy, __global Type *out, __global Type *tmp)\n" -"{\n" -" int gid = get_global_id(0);\n" -" int nid = get_sub_group_size();\n" -" int lid = get_sub_group_local_id();\n" -" int tof = get_group_id(0)*get_max_sub_group_size();\n" -" xy[gid].x = lid;\n" -" xy[gid].y = get_sub_group_id();\n" -" if (get_sub_group_id() == 0) {\n" -" tmp[tof+lid] = in[gid];\n" -" sub_group_barrier(CLK_GLOBAL_MEM_FENCE);\n" -" out[gid] = tmp[tof+nid-1-lid];\n" -" } else {\n" -" out[gid] = -in[gid];\n" -" }\n" -"}\n"; +static const char *gbar_source = + "__kernel void test_gbar(const __global Type *in, __global int2 *xy, " + "__global Type *out, __global Type *tmp)\n" + "{\n" + " int gid = get_global_id(0);\n" + " int nid = get_sub_group_size();\n" + " int lid = get_sub_group_local_id();\n" + " int tof = get_group_id(0)*get_max_sub_group_size();\n" + " xy[gid].x = lid;\n" + " xy[gid].y = get_sub_group_id();\n" + " if (get_sub_group_id() == 0) {\n" + " tmp[tof+lid] = in[gid];\n" + " sub_group_barrier(CLK_GLOBAL_MEM_FENCE);\n" + " out[gid] = tmp[tof+nid-1-lid];\n" + " } else {\n" + " out[gid] = -in[gid];\n" + " }\n" + "}\n"; // barrier test functions -template -struct BAR { +template struct BAR +{ static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng) { int i, ii, j, k, n; - int nj = (nw + ns - 1)/ns; + int nj = (nw + ns - 1) / ns; int e; ii = 0; - for (k=0; k nw ? nw - ii : ns; - for (i=0;i nw ? nw - ii : ns; - for (i=0; i, G, L>::run(device, context, queue, num_elements, "test_lbar", lbar_source); - error = test, G, L, G>::run(device, context, queue, num_elements, "test_gbar", gbar_source); + error = test, G, L>::run( + device, context, queue, num_elements, "test_lbar", lbar_source); + error = test, G, L, G>::run( + device, context, queue, num_elements, "test_gbar", gbar_source); return error; } - diff --git a/test_conformance/subgroups/test_queries.cpp b/test_conformance/subgroups/test_queries.cpp index 6055f658..79929295 100644 --- a/test_conformance/subgroups/test_queries.cpp +++ b/test_conformance/subgroups/test_queries.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -15,117 +15,160 @@ // #include "procs.h" -typedef struct { +typedef struct +{ cl_uint maxSubGroupSize; cl_uint numSubGroups; } result_data; -static const char * query_kernel_source = -"#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n" -"\n" -"typedef struct {\n" -" uint maxSubGroupSize;\n" -" uint numSubGroups;\n" -"} result_data;\n" -"\n" -"__kernel void query_kernel( __global result_data *outData )\n" -"{\n" -" int gid = get_global_id( 0 );\n" -" outData[gid].maxSubGroupSize = get_max_sub_group_size();\n" -" outData[gid].numSubGroups = get_num_sub_groups();\n" -"}"; +static const char *query_kernel_source = + "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n" + "\n" + "typedef struct {\n" + " uint maxSubGroupSize;\n" + " uint numSubGroups;\n" + "} result_data;\n" + "\n" + "__kernel void query_kernel( __global result_data *outData )\n" + "{\n" + " int gid = get_global_id( 0 );\n" + " outData[gid].maxSubGroupSize = get_max_sub_group_size();\n" + " outData[gid].numSubGroups = get_num_sub_groups();\n" + "}"; -int -test_sub_group_info(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +int test_sub_group_info(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) { static const size_t gsize0 = 80; int i, error; size_t realSize; size_t kernel_max_subgroup_size, kernel_subgroup_count; - size_t global[] = {gsize0,14,10}; - size_t local[] = {0,0,0}; + size_t global[] = { gsize0, 14, 10 }; + size_t local[] = { 0, 0, 0 }; result_data result[gsize0]; cl_uint max_dimensions; - error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_dimensions), &max_dimensions, NULL); - test_error(error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS"); + error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, + sizeof(max_dimensions), &max_dimensions, NULL); + test_error(error, + "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS"); cl_platform_id platform; clProgramWrapper program; clKernelWrapper kernel; clMemWrapper out; - error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &query_kernel_source, "query_kernel", "-cl-std=CL2.0"); - if (error != 0) - return error; + error = create_single_kernel_helper_with_build_options( + context, &program, &kernel, 1, &query_kernel_source, "query_kernel", + "-cl-std=CL2.0"); + if (error != 0) return error; // Determine some local dimensions to use for the test. - if (max_dimensions == 1) { - error = get_max_common_work_group_size(context, kernel, global[0], &local[0]); + if (max_dimensions == 1) + { + error = get_max_common_work_group_size(context, kernel, global[0], + &local[0]); test_error(error, "get_max_common_work_group_size failed"); - } else if (max_dimensions == 2) { - error = get_max_common_2D_work_group_size(context, kernel, global, local); + } + else if (max_dimensions == 2) + { + error = + get_max_common_2D_work_group_size(context, kernel, global, local); test_error(error, "get_max_common_2D_work_group_size failed"); - } else { - error = get_max_common_3D_work_group_size(context, kernel, global, local); + } + else + { + error = + get_max_common_3D_work_group_size(context, kernel, global, local); test_error(error, "get_max_common_3D_work_group_size failed"); } - error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL); + error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), + (void *)&platform, NULL); test_error(error, "clDeviceInfo failed for CL_DEVICE_PLATFORM"); clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr; - clGetKernelSubGroupInfoKHR_ptr = (clGetKernelSubGroupInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clGetKernelSubGroupInfoKHR"); - if (clGetKernelSubGroupInfoKHR_ptr == NULL) { + clGetKernelSubGroupInfoKHR_ptr = + (clGetKernelSubGroupInfoKHR_fn)clGetExtensionFunctionAddressForPlatform( + platform, "clGetKernelSubGroupInfoKHR"); + if (clGetKernelSubGroupInfoKHR_ptr == NULL) + { log_error("ERROR: clGetKernelSubGroupInfoKHR function not available"); return -1; } - error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, - sizeof(local), (void *)&local, sizeof(kernel_max_subgroup_size), (void *)&kernel_max_subgroup_size, &realSize); - test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR"); - log_info("The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR for the kernel is %d.\n", (int)kernel_max_subgroup_size); + error = clGetKernelSubGroupInfoKHR_ptr( + kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, + sizeof(local), (void *)&local, sizeof(kernel_max_subgroup_size), + (void *)&kernel_max_subgroup_size, &realSize); + test_error(error, + "clGetKernelSubGroupInfoKHR failed for " + "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR"); + log_info("The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR for the kernel " + "is %d.\n", + (int)kernel_max_subgroup_size); - if (realSize != sizeof(kernel_max_subgroup_size)) { - log_error( "ERROR: Returned size of max sub group size not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_max_subgroup_size), (int)realSize ); + if (realSize != sizeof(kernel_max_subgroup_size)) + { + log_error("ERROR: Returned size of max sub group size not valid! " + "(Expected %d, got %d)\n", + (int)sizeof(kernel_max_subgroup_size), (int)realSize); return -1; } - error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, - sizeof(local), (void *)&local, sizeof(kernel_subgroup_count), (void *)&kernel_subgroup_count, &realSize); - test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR"); - log_info("The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR for the kernel is %d.\n", (int)kernel_subgroup_count); + error = clGetKernelSubGroupInfoKHR_ptr( + kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, + sizeof(local), (void *)&local, sizeof(kernel_subgroup_count), + (void *)&kernel_subgroup_count, &realSize); + test_error(error, + "clGetKernelSubGroupInfoKHR failed for " + "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR"); + log_info( + "The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR for the kernel is %d.\n", + (int)kernel_subgroup_count); - if (realSize != sizeof(kernel_subgroup_count)) { - log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize ); + if (realSize != sizeof(kernel_subgroup_count)) + { + log_error("ERROR: Returned size of sub group count not valid! " + "(Expected %d, got %d)\n", + (int)sizeof(kernel_subgroup_count), (int)realSize); return -1; } // Verify that the kernel gets the same max_subgroup_size and subgroup_count - out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(result), NULL, &error); + out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(result), NULL, + &error); test_error(error, "clCreateBuffer failed"); error = clSetKernelArg(kernel, 0, sizeof(out), &out); test_error(error, "clSetKernelArg failed"); - error = clEnqueueNDRangeKernel(queue, kernel, max_dimensions, NULL, global, local, 0, NULL, NULL); + error = clEnqueueNDRangeKernel(queue, kernel, max_dimensions, NULL, global, + local, 0, NULL, NULL); test_error(error, "clEnqueueNDRangeKernel failed"); - error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result), &result, 0, NULL, NULL); + error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result), + &result, 0, NULL, NULL); test_error(error, "clEnqueueReadBuffer failed"); error = clFinish(queue); test_error(error, "clFinish failed"); - for (i=0; i<(int)gsize0; ++i) { - if (result[i].maxSubGroupSize != (cl_uint)kernel_max_subgroup_size) { - log_error("ERROR: get_max_subgroup_size() doesn't match result from clGetKernelSubGroupInfoKHR, %u vs %u\n", - result[i].maxSubGroupSize, (cl_uint)kernel_max_subgroup_size); + for (i = 0; i < (int)gsize0; ++i) + { + if (result[i].maxSubGroupSize != (cl_uint)kernel_max_subgroup_size) + { + log_error("ERROR: get_max_subgroup_size() doesn't match result " + "from clGetKernelSubGroupInfoKHR, %u vs %u\n", + result[i].maxSubGroupSize, + (cl_uint)kernel_max_subgroup_size); return -1; } - if (result[i].numSubGroups != (cl_uint)kernel_subgroup_count) { - log_error("ERROR: get_num_sub_groups() doesn't match result from clGetKernelSubGroupInfoKHR, %u vs %u\n", + if (result[i].numSubGroups != (cl_uint)kernel_subgroup_count) + { + log_error("ERROR: get_num_sub_groups() doesn't match result from " + "clGetKernelSubGroupInfoKHR, %u vs %u\n", result[i].numSubGroups, (cl_uint)kernel_subgroup_count); return -1; } @@ -133,4 +176,3 @@ test_sub_group_info(cl_device_id device, cl_context context, cl_command_queue qu return 0; } - diff --git a/test_conformance/subgroups/test_workgroup.cpp b/test_conformance/subgroups/test_workgroup.cpp index 2230a6f6..572220f7 100644 --- a/test_conformance/subgroups/test_workgroup.cpp +++ b/test_conformance/subgroups/test_workgroup.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -18,103 +18,113 @@ #include "harness/conversions.h" #include "harness/typeWrappers.h" -static const char * any_source = -"__kernel void test_any(const __global Type *in, __global int2 *xy, __global Type *out)\n" -"{\n" -" int gid = get_global_id(0);\n" -" XY(xy,gid);\n" -" out[gid] = sub_group_any(in[gid]);\n" -"}\n"; +static const char *any_source = "__kernel void test_any(const __global Type " + "*in, __global int2 *xy, __global Type *out)\n" + "{\n" + " int gid = get_global_id(0);\n" + " XY(xy,gid);\n" + " out[gid] = sub_group_any(in[gid]);\n" + "}\n"; -static const char * all_source = -"__kernel void test_all(const __global Type *in, __global int2 *xy, __global Type *out)\n" -"{\n" -" int gid = get_global_id(0);\n" -" XY(xy,gid);\n" -" out[gid] = sub_group_all(in[gid]);\n" -"}\n"; +static const char *all_source = "__kernel void test_all(const __global Type " + "*in, __global int2 *xy, __global Type *out)\n" + "{\n" + " int gid = get_global_id(0);\n" + " XY(xy,gid);\n" + " out[gid] = sub_group_all(in[gid]);\n" + "}\n"; -static const char * bcast_source = -"__kernel void test_bcast(const __global Type *in, __global int2 *xy, __global Type *out)\n" -"{\n" -" int gid = get_global_id(0);\n" -" XY(xy,gid);\n" -" Type x = in[gid];\n" -" size_t loid = (size_t)((int)x % 100);\n" -" out[gid] = sub_group_broadcast(x, loid);\n" -"}\n"; +static const char *bcast_source = + "__kernel void test_bcast(const __global Type *in, __global int2 *xy, " + "__global Type *out)\n" + "{\n" + " int gid = get_global_id(0);\n" + " XY(xy,gid);\n" + " Type x = in[gid];\n" + " size_t loid = (size_t)((int)x % 100);\n" + " out[gid] = sub_group_broadcast(x, loid);\n" + "}\n"; -static const char * redadd_source = -"__kernel void test_redadd(const __global Type *in, __global int2 *xy, __global Type *out)\n" -"{\n" -" int gid = get_global_id(0);\n" -" XY(xy,gid);\n" -" out[gid] = sub_group_reduce_add(in[gid]);\n" -"}\n"; +static const char *redadd_source = + "__kernel void test_redadd(const __global Type *in, __global int2 *xy, " + "__global Type *out)\n" + "{\n" + " int gid = get_global_id(0);\n" + " XY(xy,gid);\n" + " out[gid] = sub_group_reduce_add(in[gid]);\n" + "}\n"; -static const char * redmax_source = -"__kernel void test_redmax(const __global Type *in, __global int2 *xy, __global Type *out)\n" -"{\n" -" int gid = get_global_id(0);\n" -" XY(xy,gid);\n" -" out[gid] = sub_group_reduce_max(in[gid]);\n" -"}\n"; +static const char *redmax_source = + "__kernel void test_redmax(const __global Type *in, __global int2 *xy, " + "__global Type *out)\n" + "{\n" + " int gid = get_global_id(0);\n" + " XY(xy,gid);\n" + " out[gid] = sub_group_reduce_max(in[gid]);\n" + "}\n"; -static const char * redmin_source = -"__kernel void test_redmin(const __global Type *in, __global int2 *xy, __global Type *out)\n" -"{\n" -" int gid = get_global_id(0);\n" -" XY(xy,gid);\n" -" out[gid] = sub_group_reduce_min(in[gid]);\n" -"}\n"; +static const char *redmin_source = + "__kernel void test_redmin(const __global Type *in, __global int2 *xy, " + "__global Type *out)\n" + "{\n" + " int gid = get_global_id(0);\n" + " XY(xy,gid);\n" + " out[gid] = sub_group_reduce_min(in[gid]);\n" + "}\n"; -static const char * scinadd_source = -"__kernel void test_scinadd(const __global Type *in, __global int2 *xy, __global Type *out)\n" -"{\n" -" int gid = get_global_id(0);\n" -" XY(xy,gid);\n" -" out[gid] = sub_group_scan_inclusive_add(in[gid]);\n" -"}\n"; +static const char *scinadd_source = + "__kernel void test_scinadd(const __global Type *in, __global int2 *xy, " + "__global Type *out)\n" + "{\n" + " int gid = get_global_id(0);\n" + " XY(xy,gid);\n" + " out[gid] = sub_group_scan_inclusive_add(in[gid]);\n" + "}\n"; -static const char * scinmax_source = -"__kernel void test_scinmax(const __global Type *in, __global int2 *xy, __global Type *out)\n" -"{\n" -" int gid = get_global_id(0);\n" -" XY(xy,gid);\n" -" out[gid] = sub_group_scan_inclusive_max(in[gid]);\n" -"}\n"; +static const char *scinmax_source = + "__kernel void test_scinmax(const __global Type *in, __global int2 *xy, " + "__global Type *out)\n" + "{\n" + " int gid = get_global_id(0);\n" + " XY(xy,gid);\n" + " out[gid] = sub_group_scan_inclusive_max(in[gid]);\n" + "}\n"; -static const char * scinmin_source = -"__kernel void test_scinmin(const __global Type *in, __global int2 *xy, __global Type *out)\n" -"{\n" -" int gid = get_global_id(0);\n" -" XY(xy,gid);\n" -" out[gid] = sub_group_scan_inclusive_min(in[gid]);\n" -"}\n"; +static const char *scinmin_source = + "__kernel void test_scinmin(const __global Type *in, __global int2 *xy, " + "__global Type *out)\n" + "{\n" + " int gid = get_global_id(0);\n" + " XY(xy,gid);\n" + " out[gid] = sub_group_scan_inclusive_min(in[gid]);\n" + "}\n"; -static const char * scexadd_source = -"__kernel void test_scexadd(const __global Type *in, __global int2 *xy, __global Type *out)\n" -"{\n" -" int gid = get_global_id(0);\n" -" XY(xy,gid);\n" -" out[gid] = sub_group_scan_exclusive_add(in[gid]);\n" -"}\n"; +static const char *scexadd_source = + "__kernel void test_scexadd(const __global Type *in, __global int2 *xy, " + "__global Type *out)\n" + "{\n" + " int gid = get_global_id(0);\n" + " XY(xy,gid);\n" + " out[gid] = sub_group_scan_exclusive_add(in[gid]);\n" + "}\n"; -static const char * scexmax_source = -"__kernel void test_scexmax(const __global Type *in, __global int2 *xy, __global Type *out)\n" -"{\n" -" int gid = get_global_id(0);\n" -" XY(xy,gid);\n" -" out[gid] = sub_group_scan_exclusive_max(in[gid]);\n" -"}\n"; +static const char *scexmax_source = + "__kernel void test_scexmax(const __global Type *in, __global int2 *xy, " + "__global Type *out)\n" + "{\n" + " int gid = get_global_id(0);\n" + " XY(xy,gid);\n" + " out[gid] = sub_group_scan_exclusive_max(in[gid]);\n" + "}\n"; -static const char * scexmin_source = -"__kernel void test_scexmin(const __global Type *in, __global int2 *xy, __global Type *out)\n" -"{\n" -" int gid = get_global_id(0);\n" -" XY(xy,gid);\n" -" out[gid] = sub_group_scan_exclusive_min(in[gid]);\n" -"}\n"; +static const char *scexmin_source = + "__kernel void test_scexmin(const __global Type *in, __global int2 *xy, " + "__global Type *out)\n" + "{\n" + " int gid = get_global_id(0);\n" + " XY(xy,gid);\n" + " out[gid] = sub_group_scan_exclusive_min(in[gid]);\n" + "}\n"; // These need to stay in sync with the kernel source below #define NUM_LOC 49 @@ -129,166 +139,174 @@ static const char * scexmin_source = #define INST_WAIT 0x2 #define INST_COUNT 0x3 -static const char * ifp_source = -"#define NUM_LOC 49\n" -"#define INST_LOC_MASK 0x7f\n" -"#define INST_OP_SHIFT 0\n" -"#define INST_OP_MASK 0xf\n" -"#define INST_LOC_SHIFT 4\n" -"#define INST_VAL_SHIFT 12\n" -"#define INST_VAL_MASK 0x7ffff\n" -"#define INST_END 0x0\n" -"#define INST_STORE 0x1\n" -"#define INST_WAIT 0x2\n" -"#define INST_COUNT 0x3\n" -"\n" -"__kernel void\n" -"test_ifp(const __global int *in, __global int2 *xy, __global int *out)\n" -"{\n" -" __local atomic_int loc[NUM_LOC];\n" -"\n" -" // Don't run if there is only one sub group\n" -" if (get_num_sub_groups() == 1)\n" -" return;\n" -"\n" -" // First initialize loc[]\n" -" int lid = (int)get_local_id(0);\n" -"\n" -" if (lid < NUM_LOC)\n" -" atomic_init(loc+lid, 0);\n" -"\n" -" work_group_barrier(CLK_LOCAL_MEM_FENCE);\n" -"\n" -" // Compute pointer to this sub group's \"instructions\"\n" -" const __global int *pc = in +\n" -" ((int)get_group_id(0)*(int)get_enqueued_num_sub_groups() +\n" -" (int)get_sub_group_id()) *\n" -" (NUM_LOC+1);\n" -"\n" -" // Set up to \"run\"\n" -" bool ok = (int)get_sub_group_local_id() == 0;\n" -" bool run = true;\n" -"\n" -" while (run) {\n" -" int inst = *pc++;\n" -" int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;\n" -" int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;\n" -" int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;\n" -"\n" -" switch (iop) {\n" -" case INST_STORE:\n" -" if (ok)\n" -" atomic_store(loc+iloc, ival);\n" -" break;\n" -" case INST_WAIT:\n" -" if (ok) {\n" -" while (atomic_load(loc+iloc) != ival)\n" -" ;\n" -" }\n" -" break;\n" -" case INST_COUNT:\n" -" if (ok) {\n" -" int i;\n" -" for (i=0;i> INST_OP_SHIFT) & INST_OP_MASK;\n" + " int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;\n" + " int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;\n" + "\n" + " switch (iop) {\n" + " case INST_STORE:\n" + " if (ok)\n" + " atomic_store(loc+iloc, ival);\n" + " break;\n" + " case INST_WAIT:\n" + " if (ok) {\n" + " while (atomic_load(loc+iloc) != ival)\n" + " ;\n" + " }\n" + " break;\n" + " case INST_COUNT:\n" + " if (ok) {\n" + " int i;\n" + " for (i=0;i -struct AA { +template struct AA +{ static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng) { int i, ii, j, k, n; - int nj = (nw + ns - 1)/ns; + int nj = (nw + ns - 1) / ns; int e; ii = 0; - for (k=0; k nw ? nw - ii : ns; e = (int)(genrand_int32(gMTdata) % 3); // Initialize data matrix indexed by local id and sub group id - switch (e) { - case 0: - memset(&t[ii], 0, n*sizeof(cl_int)); - break; - case 1: - memset(&t[ii], 0, n*sizeof(cl_int)); - i = (int)(genrand_int32(gMTdata) % (cl_uint)n); - t[ii + i] = 41; - break; - case 2: - memset(&t[ii], 0xff, n*sizeof(cl_int)); - break; + switch (e) + { + case 0: memset(&t[ii], 0, n * sizeof(cl_int)); break; + case 1: + memset(&t[ii], 0, n * sizeof(cl_int)); + i = (int)(genrand_int32(gMTdata) % (cl_uint)n); + t[ii + i] = 41; + break; + case 2: memset(&t[ii], 0xff, n * sizeof(cl_int)); break; } } // Now map into work group using map from device - for (j=0;j nw ? nw - ii : ns; // Compute target - if (Which == 0) { + if (Which == 0) + { taa = 0; - for (i=0; i -struct RED { +template struct RED +{ static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) { int i, ii, j, k, n; - int nj = (nw + ns - 1)/ns; + int nj = (nw + ns - 1) / ns; ii = 0; - for (k=0; k nw ? nw - ii : ns; - for (i=0; i::val()); + log_info(" sub_group_reduce_%s(%s)...\n", + Which == 0 ? "add" : (Which == 1 ? "max" : "min"), + TypeName::val()); - for (k=0; k nw ? nw - ii : ns; // Compute target - if (Which == 0) { + if (Which == 0) + { // add tr = mx[ii]; - for (i=1; i mx[ii + i] ? tr : mx[ii + i]; - } else if (Which == 2) { + } + else if (Which == 2) + { // min tr = mx[ii]; - for (i=1; i mx[ii + i] ? mx[ii + i] : tr; } // Check result - for (i=0; i::val(), i, j, k); + for (i = 0; i < n; ++i) + { + rr = my[ii + i]; + if (rr != tr) + { + log_error("ERROR: sub_group_reduce_%s(%s) mismatch for " + "local id %d in sub group %d in group %d\n", + Which == 0 ? "add" + : (Which == 1 ? "max" : "min"), + TypeName::val(), i, j, k); return -1; } } @@ -383,7 +420,7 @@ struct RED { x += nw; y += nw; - m += 2*nw; + m += 2 * nw; } return 0; @@ -391,69 +428,91 @@ struct RED { }; // Scan Inclusive functions -template -struct SCIN { +template struct SCIN +{ static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) { int i, ii, j, k, n; - int nj = (nw + ns - 1)/ns; + int nj = (nw + ns - 1) / ns; ii = 0; - for (k=0; k nw ? nw - ii : ns; - for (i=0; i::val()); + log_info(" sub_group_scan_inclusive_%s(%s)...\n", + Which == 0 ? "add" : (Which == 1 ? "max" : "min"), + TypeName::val()); - for (k=0; k nw ? nw - ii : ns; // Check result - for (i=0; i mx[ii + i] ? tr : mx[ii + i]); - } else { - tr = i == 0 ? mx[ii] : (tr > mx[ii + i] ? mx[ii + i] : tr); + } + else if (Which == 1) + { + tr = i == 0 ? mx[ii] + : (tr > mx[ii + i] ? tr : mx[ii + i]); + } + else + { + tr = i == 0 ? mx[ii] + : (tr > mx[ii + i] ? mx[ii + i] : tr); } - rr = my[ii+i]; - if (rr != tr) { - log_error("ERROR: sub_group_scan_inclusive_%s(%s) mismatch for local id %d in sub group %d in group %d\n", - Which == 0 ? "add" : (Which == 1 ? "max" : "min"), TypeName::val(), i, j, k); + rr = my[ii + i]; + if (rr != tr) + { + log_error( + "ERROR: sub_group_scan_inclusive_%s(%s) mismatch " + "for local id %d in sub group %d in group %d\n", + Which == 0 ? "add" : (Which == 1 ? "max" : "min"), + TypeName::val(), i, j, k); return -1; } } @@ -461,7 +520,7 @@ struct SCIN { x += nw; y += nw; - m += 2*nw; + m += 2 * nw; } return 0; @@ -469,69 +528,91 @@ struct SCIN { }; // Scan Exclusive functions -template -struct SCEX { +template struct SCEX +{ static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) { int i, ii, j, k, n; - int nj = (nw + ns - 1)/ns; + int nj = (nw + ns - 1) / ns; ii = 0; - for (k=0; k nw ? nw - ii : ns; - for (i=0; i::val()); + log_info(" sub_group_scan_exclusive_%s(%s)...\n", + Which == 0 ? "add" : (Which == 1 ? "max" : "min"), + TypeName::val()); - for (k=0; k nw ? nw - ii : ns; // Check result - for (i=0; i::val() : tr + trt; - } else if (Which == 1) { - tr = i == 0 ? TypeIdentity::val() : (trt > tr ? trt : tr); - } else { - tr = i == 0 ? TypeIdentity::val() : (trt > tr ? tr : trt); + for (i = 0; i < n; ++i) + { + if (Which == 0) + { + tr = i == 0 ? TypeIdentity::val() : tr + trt; } - trt = mx[ii+i]; - rr = my[ii+i]; + else if (Which == 1) + { + tr = i == 0 ? TypeIdentity::val() + : (trt > tr ? trt : tr); + } + else + { + tr = i == 0 ? TypeIdentity::val() + : (trt > tr ? tr : trt); + } + trt = mx[ii + i]; + rr = my[ii + i]; - if (rr != tr) { - log_error("ERROR: sub_group_scan_exclusive_%s(%s) mismatch for local id %d in sub group %d in group %d\n", - Which == 0 ? "add" : (Which == 1 ? "max" : "min"), TypeName::val(), i, j, k); + if (rr != tr) + { + log_error( + "ERROR: sub_group_scan_exclusive_%s(%s) mismatch " + "for local id %d in sub group %d in group %d\n", + Which == 0 ? "add" : (Which == 1 ? "max" : "min"), + TypeName::val(), i, j, k); return -1; } } @@ -539,7 +620,7 @@ struct SCEX { x += nw; y += nw; - m += 2*nw; + m += 2 * nw; } return 0; @@ -547,64 +628,77 @@ struct SCEX { }; // Broadcast functios -template -struct BC { +template struct BC +{ static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) { int i, ii, j, k, l, n; - int nj = (nw + ns - 1)/ns; + int nj = (nw + ns - 1) / ns; int d = ns > 100 ? 100 : ns; ii = 0; - for (k=0; k nw ? nw - ii : ns; - l = (int)(genrand_int32(gMTdata) & 0x7fffffff) % (d > n ? n : d); + l = (int)(genrand_int32(gMTdata) & 0x7fffffff) + % (d > n ? n : d); - for (i=0; i::val()); - for (k=0; k nw ? nw - ii : ns; l = (int)mx[ii] % 100; - tr = mx[ii+l]; + tr = mx[ii + l]; // Check result - for (i=0; i::val(), i, j, k); + for (i = 0; i < n; ++i) + { + rr = my[ii + i]; + if (rr != tr) + { + log_error("ERROR: sub_group_broadcast(%s) mismatch for " + "local id %d in sub group %d in group %d\n", + TypeName::val(), i, j, k); return -1; } } @@ -612,7 +706,7 @@ struct BC { x += nw; y += nw; - m += 2*nw; + m += 2 * nw; } return 0; @@ -625,10 +719,10 @@ struct BC { // local_size must be > NUM_LOC // Input needs num_groups * num_sub_groups * (NUM_LOC+1) elements -static inline int -inst(int op, int loc, int val) +static inline int inst(int op, int loc, int val) { - return (val << INST_VAL_SHIFT) | (loc << INST_LOC_SHIFT) | (op << INST_OP_SHIFT); + return (val << INST_VAL_SHIFT) | (loc << INST_LOC_SHIFT) + | (op << INST_OP_SHIFT); } void gen_insts(cl_int *x, cl_int *p, int n) @@ -639,36 +733,37 @@ void gen_insts(cl_int *x, cl_int *p, int n) // Create a random permutation of 0...NUM_LOC-1 ii[0] = 0; - for (i=1; i> INST_OP_SHIFT) & INST_OP_MASK; cl_int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK; cl_int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK; scont = false; - switch (iop) { - case INST_STORE: - loc[iloc] = ival; - ++p[i]; - scont = true; - break; - case INST_WAIT: - if (loc[iloc] == ival) { + switch (iop) + { + case INST_STORE: + loc[iloc] = ival; ++p[i]; scont = true; - } - break; - case INST_COUNT: - loc[iloc] += ival; - ++p[i]; - scont = true; - break; - case INST_END: - ++nend; - break; + break; + case INST_WAIT: + if (loc[iloc] == ival) + { + ++p[i]; + scont = true; + } + break; + case INST_COUNT: + loc[iloc] += ival; + ++p[i]; + scont = true; + break; + case INST_END: ++nend; break; } } while (scont); } @@ -724,42 +823,48 @@ void run_insts(cl_int *x, cl_int *p, int n) } -struct IFP { +struct IFP +{ static void gen(cl_int *x, cl_int *t, cl_int *, int ns, int nw, int ng) { int k; int nj = (nw + ns - 1) / ns; // We need at least 2 sub groups per group for this test - if (nj == 1) - return; + if (nj == 1) return; - for (k=0; k, G, L>::run(device, context, queue, num_elements, "test_any", any_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_all", all_source); + error = test, G, L>::run(device, context, queue, num_elements, + "test_any", any_source); + error |= test, G, L>::run(device, context, queue, num_elements, + "test_all", all_source); - // error |= test, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source); + // error |= test, G, L>::run(device, context, queue, + // num_elements, "test_bcast", bcast_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_bcast", bcast_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_bcast", bcast_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_bcast", bcast_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_bcast", bcast_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_bcast", bcast_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_bcast", bcast_source); - // error |= test, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source); + // error |= test, G, L>::run(device, context, queue, + // num_elements, "test_redadd", redadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redadd", redadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redadd", redadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redadd", redadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redadd", redadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redadd", redadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redadd", redadd_source); - // error |= test, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source); + // error |= test, G, L>::run(device, context, queue, + // num_elements, "test_redmax", redmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redmax", redmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redmax", redmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redmax", redmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redmax", redmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redmax", redmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redmax", redmax_source); - // error |= test, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source); + // error |= test, G, L>::run(device, context, queue, + // num_elements, "test_redmin", redmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redmin", redmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redmin", redmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redmin", redmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redmin", redmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redmin", redmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_redmin", redmin_source); - // error |= test, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source); + // error |= test, G, L>::run(device, context, + // queue, num_elements, "test_scinadd", scinadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinadd", scinadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinadd", scinadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinadd", scinadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinadd", scinadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinadd", scinadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinadd", scinadd_source); - // error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source); + // error |= test, G, L>::run(device, context, + // queue, num_elements, "test_scinmax", scinmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinmax", scinmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinmax", scinmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinmax", scinmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinmax", scinmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinmax", scinmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinmax", scinmax_source); - // error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source); + // error |= test, G, L>::run(device, context, + // queue, num_elements, "test_scinmin", scinmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinmin", scinmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinmin", scinmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinmin", scinmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinmin", scinmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinmin", scinmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scinmin", scinmin_source); - // error |= test, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source); + // error |= test, G, L>::run(device, context, + // queue, num_elements, "test_scexadd", scexadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexadd", scexadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexadd", scexadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexadd", scexadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexadd", scexadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexadd", scexadd_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexadd", scexadd_source); - // error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source); + // error |= test, G, L>::run(device, context, + // queue, num_elements, "test_scexmax", scexmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexmax", scexmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexmax", scexmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexmax", scexmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexmax", scexmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexmax", scexmax_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexmax", scexmax_source); - // error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source); - error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source); + // error |= test, G, L>::run(device, context, + // queue, num_elements, "test_scexmin", scexmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexmin", scexmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexmin", scexmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexmin", scexmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexmin", scexmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexmin", scexmin_source); + error |= test, G, L>::run( + device, context, queue, num_elements, "test_scexmin", scexmin_source); - error |= test::run(device, context, queue, num_elements, "test_ifp", ifp_source, NUM_LOC + 1); + error |= test::run(device, context, queue, num_elements, + "test_ifp", ifp_source, NUM_LOC + 1); return error; } - diff --git a/test_conformance/subgroups/test_workitem.cpp b/test_conformance/subgroups/test_workitem.cpp index 7bd81cdc..125ad9e9 100644 --- a/test_conformance/subgroups/test_workitem.cpp +++ b/test_conformance/subgroups/test_workitem.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -17,140 +17,186 @@ #include "harness/conversions.h" #include "harness/typeWrappers.h" -struct get_test_data { +struct get_test_data +{ cl_uint subGroupSize; cl_uint maxSubGroupSize; cl_uint numSubGroups; cl_uint enqNumSubGroups; cl_uint subGroupId; cl_uint subGroupLocalId; - bool operator==(get_test_data x) { - return subGroupSize == x.subGroupSize && - maxSubGroupSize == x.maxSubGroupSize && - numSubGroups == x.numSubGroups && - subGroupId == x.subGroupId && - subGroupLocalId == x.subGroupLocalId; + bool operator==(get_test_data x) + { + return subGroupSize == x.subGroupSize + && maxSubGroupSize == x.maxSubGroupSize + && numSubGroups == x.numSubGroups && subGroupId == x.subGroupId + && subGroupLocalId == x.subGroupLocalId; } }; -static const char * get_test_source = -"#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n" -"\n" -"typedef struct {\n" -" uint subGroupSize;\n" -" uint maxSubGroupSize;\n" -" uint numSubGroups;\n" -" uint enqNumSubGroups;\n" -" uint subGroupId;\n" -" uint subGroupLocalId;\n" -"} get_test_data;\n" -"\n" -"__kernel void get_test( __global get_test_data *outData )\n" -"{\n" -" int gid = get_global_id( 0 );\n" -" outData[gid].subGroupSize = get_sub_group_size();\n" -" outData[gid].maxSubGroupSize = get_max_sub_group_size();\n" -" outData[gid].numSubGroups = get_num_sub_groups();\n" -" outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n" -" outData[gid].subGroupId = get_sub_group_id();\n" -" outData[gid].subGroupLocalId = get_sub_group_local_id();\n" -"}"; +static const char *get_test_source = + "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n" + "\n" + "typedef struct {\n" + " uint subGroupSize;\n" + " uint maxSubGroupSize;\n" + " uint numSubGroups;\n" + " uint enqNumSubGroups;\n" + " uint subGroupId;\n" + " uint subGroupLocalId;\n" + "} get_test_data;\n" + "\n" + "__kernel void get_test( __global get_test_data *outData )\n" + "{\n" + " int gid = get_global_id( 0 );\n" + " outData[gid].subGroupSize = get_sub_group_size();\n" + " outData[gid].maxSubGroupSize = get_max_sub_group_size();\n" + " outData[gid].numSubGroups = get_num_sub_groups();\n" + " outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n" + " outData[gid].subGroupId = get_sub_group_id();\n" + " outData[gid].subGroupLocalId = get_sub_group_local_id();\n" + "}"; -static int -check_group(const get_test_data *result, int nw, cl_uint ensg, int maxwgs) +static int check_group(const get_test_data *result, int nw, cl_uint ensg, + int maxwgs) { int first = -1; int last = -1; int i, j; cl_uint hit[32]; - for (i=0; i maxwgs) { - log_error("ERROR: get_max_subgroup_size() returned incorrect result: %u\n", result[first].maxSubGroupSize); + if (result[first].maxSubGroupSize == 0 + || result[first].maxSubGroupSize > maxwgs) + { + log_error( + "ERROR: get_max_subgroup_size() returned incorrect result: %u\n", + result[first].maxSubGroupSize); return -1; } - if (result[first].subGroupSize > result[first].maxSubGroupSize) { + if (result[first].subGroupSize > result[first].maxSubGroupSize) + { log_error("ERROR: get_sub_group_size() > get_max_sub_group_size()\n"); return -1; } - if (result[last].subGroupSize > result[first].subGroupSize) { + if (result[last].subGroupSize > result[first].subGroupSize) + { log_error("ERROR: last sub group larger than first sub group\n"); return -1; } - if (result[first].numSubGroups == 0 || result[first].numSubGroups > ensg) { - log_error("ERROR: get_num_sub_groups() returned incorrect result: %u \n", result[first].numSubGroups); + if (result[first].numSubGroups == 0 || result[first].numSubGroups > ensg) + { + log_error( + "ERROR: get_num_sub_groups() returned incorrect result: %u \n", + result[first].numSubGroups); return -1; } memset(hit, 0, sizeof(hit)); - for (i=0; i= result[first].numSubGroups) { - log_error("ERROR: get_sub_group_id() returned out of range value: %u\n", result[i].subGroupId); + if (result[i].subGroupId >= result[first].numSubGroups) + { + log_error( + "ERROR: get_sub_group_id() returned out of range value: %u\n", + result[i].subGroupId); return -1; } - if (result[i].enqNumSubGroups != ensg) { - log_error("ERROR: get_enqueued_num_sub_groups() returned incorrect value: %u\n", result[i].enqNumSubGroups); + if (result[i].enqNumSubGroups != ensg) + { + log_error("ERROR: get_enqueued_num_sub_groups() returned incorrect " + "value: %u\n", + result[i].enqNumSubGroups); return -1; } - if (result[first].numSubGroups > 1) { - if (result[i].subGroupId < result[first].numSubGroups-1) { - if (result[i].subGroupSize != result[first].subGroupSize) { - log_error("ERROR: unexpected variation in get_*_sub_group_*()\n"); + if (result[first].numSubGroups > 1) + { + if (result[i].subGroupId < result[first].numSubGroups - 1) + { + if (result[i].subGroupSize != result[first].subGroupSize) + { + log_error( + "ERROR: unexpected variation in get_*_sub_group_*()\n"); return -1; } - if (result[i].subGroupLocalId >= result[first].subGroupSize) { - log_error("ERROR: get_sub_group_local_id() returned out of bounds value: %u \n", result[i].subGroupLocalId); - return -1; - } - } else { - if (result[i].subGroupSize != result[last].subGroupSize) { - log_error("ERROR: unexpected variation in get_*_sub_group_*()\n"); - return -1; - } - if (result[i].subGroupLocalId >= result[last].subGroupSize) { - log_error("ERROR: get_sub_group_local_id() returned out of bounds value: %u \n", result[i].subGroupLocalId); + if (result[i].subGroupLocalId >= result[first].subGroupSize) + { + log_error("ERROR: get_sub_group_local_id() returned out of " + "bounds value: %u \n", + result[i].subGroupLocalId); return -1; } } - } else { - if (result[i].subGroupSize != result[first].subGroupSize) { - log_error("ERROR: unexpected variation in get_*_sub_group_*()\n"); + else + { + if (result[i].subGroupSize != result[last].subGroupSize) + { + log_error( + "ERROR: unexpected variation in get_*_sub_group_*()\n"); + return -1; + } + if (result[i].subGroupLocalId >= result[last].subGroupSize) + { + log_error("ERROR: get_sub_group_local_id() returned out of " + "bounds value: %u \n", + result[i].subGroupLocalId); + return -1; + } + } + } + else + { + if (result[i].subGroupSize != result[first].subGroupSize) + { + log_error( + "ERROR: unexpected variation in get_*_sub_group_*()\n"); return -1; } - if (result[i].subGroupLocalId >= result[first].subGroupSize) { - log_error("ERROR: get_sub_group_local_id() returned out of bounds value: %u \n", result[i].subGroupLocalId); + if (result[i].subGroupLocalId >= result[first].subGroupSize) + { + log_error("ERROR: get_sub_group_local_id() returned out of " + "bounds value: %u \n", + result[i].subGroupLocalId); return -1; } } - j = (result[first].subGroupSize + 31)/32 * result[i].subGroupId + (result[i].subGroupLocalId >> 5); - if (j < sizeof(hit)/4) { + j = (result[first].subGroupSize + 31) / 32 * result[i].subGroupId + + (result[i].subGroupLocalId >> 5); + if (j < sizeof(hit) / 4) + { cl_uint b = 1U << (result[i].subGroupLocalId & 0x1fU); - if ((hit[j] & b) != 0) { - log_error("ERROR: get_sub_group_local_id() repeated a result in the same sub group\n"); + if ((hit[j] & b) != 0) + { + log_error("ERROR: get_sub_group_local_id() repeated a result " + "in the same sub group\n"); return -1; } hit[j] |= b; @@ -160,8 +206,8 @@ check_group(const get_test_data *result, int nw, cl_uint ensg, int maxwgs) return 0; } -int -test_work_item_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +int test_work_item_functions(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) { static const size_t lsize = 200; int error; @@ -170,28 +216,28 @@ test_work_item_functions(cl_device_id device, cl_context context, cl_command_que cl_uint ensg; size_t global; size_t local; - get_test_data result[lsize*6]; + get_test_data result[lsize * 6]; clProgramWrapper program; clKernelWrapper kernel; clMemWrapper out; - error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &get_test_source, "get_test", "-cl-std=CL2.0"); - if (error != 0) - return error; + error = create_single_kernel_helper_with_build_options( + context, &program, &kernel, 1, &get_test_source, "get_test", + "-cl-std=CL2.0"); + if (error != 0) return error; error = get_max_allowed_work_group_size(context, kernel, &local, NULL); - if (error != 0) - return error; + if (error != 0) return error; maxwgs = (int)local; // Limit it a bit so we have muliple work groups // Ideally this will still be large enough to give us multiple subgroups - if (local > lsize) - local = lsize; + if (local > lsize) local = lsize; // Create our buffer - out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(result), NULL, &error); + out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(result), NULL, + &error); test_error(error, "clCreateBuffer failed"); // Set argument @@ -206,13 +252,16 @@ test_work_item_functions(cl_device_id device, cl_context context, cl_command_que // Collect the data memset((void *)&result, 0xf0, sizeof(result)); - error = clEnqueueWriteBuffer(queue, out, CL_FALSE, 0, sizeof(result), (void *)&result, 0, NULL, NULL); + error = clEnqueueWriteBuffer(queue, out, CL_FALSE, 0, sizeof(result), + (void *)&result, 0, NULL, NULL); test_error(error, "clEnqueueWriteBuffer failed"); - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, + NULL, NULL); test_error(error, "clEnqueueNDRangeKernel failed"); - error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result), (void *)&result, 0, NULL, NULL); + error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result), + (void *)&result, 0, NULL, NULL); test_error(error, "clEnqueueReadBuffer failed"); error = clFinish(queue); @@ -223,30 +272,32 @@ test_work_item_functions(cl_device_id device, cl_context context, cl_command_que // Check the first group error = check_group(result, nw, ensg, maxwgs); - if (error) - return error; + if (error) return error; q = (int)global / nw; r = (int)global % nw; // Check the remaining work groups including the last if it is the same size - for (k=1; k