Initial open source release of OpenCL 2.2 CTS.

2026-03-21 06:49:02 +00:00 · 2017-05-16 18:25:37 +05:30
parent 6911ba5116
commit 2821bf1323
1035 changed files with 343518 additions and 0 deletions
--- a/test_conformance/clcpp/workgroups/CMakeLists.txt
+++ b/test_conformance/clcpp/workgroups/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_WORKGROUPS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
--- a/test_conformance/clcpp/workgroups/common.hpp
+++ b/test_conformance/clcpp/workgroups/common.hpp
@@ -0,0 +1,97 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_COMMON_HPP
+
+#include <string>
+#include <vector>
+#include <limits>
+
+enum class work_group_op : int {
+    add, min, max    
+};
+
+std::string to_string(work_group_op op)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return "add";
+        case work_group_op::min:
+            return "min";
+        case work_group_op::max:
+            return "max";
+        default:
+            break;
+    }
+    return "";
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+std::vector<CL_INT_TYPE> generate_input(size_t count, size_t wg_size)
+{
+    std::vector<CL_INT_TYPE> input(count, CL_INT_TYPE(1));      
+    switch (op)
+    {
+        case work_group_op::add:
+            return input;
+        case work_group_op::min:
+            {                
+                size_t j = wg_size;
+                for(size_t i = 0; i < count; i++)
+                {                
+                    input[i] = static_cast<CL_INT_TYPE>(j);
+                    j--;
+                    if(j == 0)
+                    {
+                        j = wg_size;
+                    }
+                }
+            }
+            break;         
+        case work_group_op::max:          
+            {                
+                size_t j = 0;
+                for(size_t i = 0; i < count; i++)
+                {                
+                    input[i] = static_cast<CL_INT_TYPE>(j);
+                    j++;
+                    if(j == wg_size)
+                    {
+                        j = 0;
+                    }
+                }
+            }
+    }
+    return input;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+std::vector<CL_INT_TYPE> generate_output(size_t count, size_t wg_size)
+{       
+    switch (op)
+    {
+        case work_group_op::add:
+            return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
+        case work_group_op::min:  
+            return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::max)());       
+        case work_group_op::max:          
+            return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::min)());
+    }
+    return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_COMMON_HPP
--- a/test_conformance/clcpp/workgroups/main.cpp
+++ b/test_conformance/clcpp/workgroups/main.cpp
@@ -0,0 +1,34 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_wg_all.hpp"
+#include "test_wg_any.hpp"
+#include "test_wg_broadcast.hpp"
+#include "test_wg_reduce.hpp"
+#include "test_wg_scan_inclusive.hpp"
+#include "test_wg_scan_exclusive.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
--- a/test_conformance/clcpp/workgroups/test_wg_all.hpp
+++ b/test_conformance/clcpp/workgroups/test_wg_all.hpp
@@ -0,0 +1,218 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ALL_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ALL_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_wg_all_kernel_code()
+{
+    return
+        "__kernel void test_wg_all(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    int result = work_group_all(input[tid] < input[tid+1]);\n"
+        "    if(result == 0) {\n        output[tid] = 0;\n        return;\n    }\n"
+        "    output[tid] = 1;\n"
+        "}\n";
+}
+#else
+std::string generate_wg_all_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_all(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    bool result = work_group_all(input[tid] < input[tid+1]);\n"
+           "    if(!result) {\n        output[tid] = 0;\n        return;\n    }\n"
+           "    output[tid] = 1;\n"
+           "}\n";
+}
+#endif
+
+int verify_wg_all(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < count; i += wg_size)
+    {
+        // Work-group all
+        bool all = true;
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
+        {
+            if(!(in[i+j] < in[i+j+1]))
+            {
+                all = false;
+                break;
+            }
+        }
+
+        // Convert bool to uint
+        cl_uint all_uint = all ? 1 : 0;
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
+        {
+            if (all_uint != out[i + j])
+            {
+                log_info(
+                    "work_group_all %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<cl_uint>().c_str(),
+                    i + j,
+                    static_cast<size_t>(all_uint),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+std::vector<cl_uint> generate_input_wg_all(size_t count, size_t wg_size)
+{
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(i);
+        // In one place in ~half of workgroups input[tid] < input[tid+1] will
+        // generate false, that means for that workgroups work_group_all()
+        // should return false
+        if((j == wg_size/2) && (i > count/2))
+        {
+            input[i] = input[i - 1];
+        }
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+}
+
+std::vector<cl_uint> generate_output_wg_all(size_t count, size_t wg_size)
+{
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+}
+
+int work_group_all(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_wg_all_kernel_code();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<cl_uint> input = generate_input_wg_all(flat_work_size + 1, wg_size);
+    std::vector<cl_uint> output = generate_output_wg_all(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_wg_all(input, output, flat_work_size, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_all failed");
+    }
+    log_info("work_group_all passed\n");
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_all)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err = CL_SUCCESS;
+
+    err = work_group_all(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+
+    if(err != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ALL_HPP
--- a/test_conformance/clcpp/workgroups/test_wg_any.hpp
+++ b/test_conformance/clcpp/workgroups/test_wg_any.hpp
@@ -0,0 +1,218 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ANY_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ANY_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_wg_any_kernel_code()
+{
+    return
+        "__kernel void test_wg_any(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    int result = work_group_any(input[tid] == input[tid+1]);\n"
+        "    if(result == 0) {\n        output[tid] = 0;\n        return;\n    }\n"
+        "    output[tid] = 1;\n"
+        "}\n";
+}
+#else
+std::string generate_wg_any_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_any(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    bool result = work_group_any(input[tid] == input[tid+1]);\n"
+           "    if(!result) {\n        output[tid] = 0;\n        return;\n    }\n"
+           "    output[tid] = 1;\n"
+           "}\n";
+}
+#endif
+
+int verify_wg_any(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < count; i += wg_size)
+    {
+        // Work-group any
+        bool any = false;
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
+        {
+            if(in[i+j] == in[i+j+1])
+            {
+                any = true;
+                break;
+            }
+        }
+
+        // Convert bool to uint
+        cl_uint any_uint = any ? 1 : 0;
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
+        {
+            if (any_uint != out[i + j])
+            {
+                log_info(
+                    "work_group_any %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<cl_uint>().c_str(),
+                    i + j,
+                    static_cast<size_t>(any_uint),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+std::vector<cl_uint> generate_input_wg_any(size_t count, size_t wg_size)
+{
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(i);
+        // In one place in ~half of workgroups input[tid] == input[tid+1] will
+        // generate true, that means for that workgroups work_group_any()
+        // should return true
+        if((j == wg_size/2) && (i > count/2))
+        {
+            input[i] = input[i - 1];
+        }
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+}
+
+std::vector<cl_uint> generate_output_wg_any(size_t count, size_t wg_size)
+{
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+}
+
+int work_group_any(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_wg_any_kernel_code();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_any");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_any", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_any");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<cl_uint> input = generate_input_wg_any(flat_work_size + 1, wg_size);
+    std::vector<cl_uint> output = generate_output_wg_any(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_wg_any(input, output, flat_work_size, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_any failed");
+    }
+    log_info("work_group_any passed\n");
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_any)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err = CL_SUCCESS;
+
+    err = work_group_any(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+
+    if(err != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ANY_HPP
--- a/test_conformance/clcpp/workgroups/test_wg_broadcast.hpp
+++ b/test_conformance/clcpp/workgroups/test_wg_broadcast.hpp
@@ -0,0 +1,458 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_wg_broadcast_1D_kernel_code()
+{
+    return
+        "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "    uint result = work_group_broadcast(input[tid], get_group_id(0) % get_local_size(0));\n"
+        "    output[tid] = result;\n"
+        "}\n";
+}
+std::string generate_wg_broadcast_2D_kernel_code()
+{
+    return
+        "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid_x = get_global_id(0);\n"
+        "    ulong tid_y = get_global_id(1);\n"
+        "    size_t x = get_group_id(0) % get_local_size(0);\n"
+        "    size_t y = get_group_id(1) % get_local_size(1);\n"
+        "    size_t idx = (tid_y * get_global_size(0)) + tid_x;\n"
+        "    uint result = work_group_broadcast(input[idx], x, y);\n"
+        "    output[idx] = result;\n"
+        "}\n";
+}
+std::string generate_wg_broadcast_3D_kernel_code()
+{
+    return
+        "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid_x = get_global_id(0);\n"
+        "    ulong tid_y = get_global_id(1);\n"
+        "    ulong tid_z = get_global_id(2);\n"
+        "    size_t x = get_group_id(0) % get_local_size(0);\n"
+        "    size_t y = get_group_id(1) % get_local_size(1);\n"
+        "    size_t z = get_group_id(2) % get_local_size(2);\n"
+        "    ulong idx = (tid_z * get_global_size(1) * get_global_size(0)) + (tid_y * get_global_size(0)) + tid_x;\n"
+        "    uint result = work_group_broadcast(input[idx], x, y, z);\n"
+        "    output[idx] = result;\n"
+        "}\n";
+}
+#else
+std::string generate_wg_broadcast_1D_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    uint result = work_group_broadcast(input[tid], get_group_id(0) % get_local_size(0));\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+std::string generate_wg_broadcast_2D_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid_x = get_global_id(0);\n"
+           "    ulong tid_y = get_global_id(1);\n"
+           "    size_t x = get_group_id(0) % get_local_size(0);\n"
+           "    size_t y = get_group_id(1) % get_local_size(1);\n"
+           "    size_t idx = (tid_y * get_global_size(0)) + tid_x;\n"
+           "    uint result = work_group_broadcast(input[idx], x, y);\n"
+           "    output[idx] = result;\n"
+           "}\n";
+}
+std::string generate_wg_broadcast_3D_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid_x = get_global_id(0);\n"
+           "    ulong tid_y = get_global_id(1);\n"
+           "    ulong tid_z = get_global_id(2);\n"
+           "    size_t x = get_group_id(0) % get_local_size(0);\n"
+           "    size_t y = get_group_id(1) % get_local_size(1);\n"
+           "    size_t z = get_group_id(2) % get_local_size(2);\n"
+           "    ulong idx = (tid_z * get_global_size(1) * get_global_size(0)) + (tid_y * get_global_size(0)) + tid_x;\n"
+           "    uint result = work_group_broadcast(input[idx], x, y, z);\n"
+           "    output[idx] = result;\n"
+           "}\n";
+}
+#endif
+
+int
+verify_wg_broadcast_1D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t n, size_t wg_size)
+{
+    size_t i, j;
+    size_t group_id;
+
+    for (i=0,group_id=0; i<n; i+=wg_size,group_id++)
+    {
+        int local_size = (n-i) > wg_size ? wg_size : (n-i);
+        cl_uint broadcast_result = in[i + (group_id % local_size)];
+        for (j=0; j<local_size; j++)
+        {
+            if ( broadcast_result != out[i+j] )
+            {
+                log_info("work_group_broadcast: Error at %lu: expected = %u, got = %u\n", i+j, broadcast_result, out[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return CL_SUCCESS;
+}
+
+int
+verify_wg_broadcast_2D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out,
+                       size_t nx, size_t ny,
+                       size_t wg_size_x, size_t wg_size_y)
+{
+    size_t i, j, _i, _j;
+    size_t group_id_x, group_id_y;
+
+    for (i=0,group_id_y=0; i<ny; i+=wg_size_y,group_id_y++)
+    {
+        size_t y = group_id_y % wg_size_y;
+        size_t local_size_y = (ny-i) > wg_size_y ? wg_size_y : (ny-i);
+        for (_i=0; _i < local_size_y; _i++)
+        {
+            for (j=0,group_id_x=0; j<nx; j+=wg_size_x,group_id_x++)
+            {
+                size_t x = group_id_x % wg_size_x;
+                size_t local_size_x = (nx-j) > wg_size_x ? wg_size_x : (nx-j);
+                cl_uint broadcast_result = in[(i + y) * nx + (j + x)];
+                for (_j=0; _j < local_size_x; _j++)
+                {
+                    size_t indx = (i + _i) * nx + (j + _j);
+                    if ( broadcast_result != out[indx] )
+                    {
+                        log_info("%lu\n", indx);
+                        log_info("%lu\n", ((i + y) * nx + (j + x)));
+                         log_info("%lu\n", out.size());
+                        log_info("work_group_broadcast: Error at (%lu, %lu): expected = %u, got = %u\n", j+_j, i+_i, broadcast_result, out[indx]);
+                        return -1;
+                    }
+                }
+            }
+        }
+    }
+
+    return CL_SUCCESS;
+}
+
+int
+verify_wg_broadcast_3D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out,
+                       size_t nx, size_t ny, size_t nz,
+                       size_t wg_size_x, size_t wg_size_y, size_t wg_size_z)
+{
+    size_t i, j, k, _i, _j, _k;
+    size_t group_id_x, group_id_y, group_id_z;
+
+    for (i=0,group_id_z=0; i<nz; i+=wg_size_z,group_id_z++)
+    {
+        size_t z = group_id_z % wg_size_z;
+        size_t local_size_z = (nz-i) > wg_size_z ? wg_size_z : (nz-i);
+        for (_i=0; _i < local_size_z; _i++)
+        {
+            for (j=0,group_id_y=0; j<ny; j+=wg_size_y,group_id_y++)
+            {
+                size_t y = group_id_y % wg_size_y;
+                size_t local_size_y = (ny-j) > wg_size_y ? wg_size_y : (ny-j);
+                for (_j=0; _j < local_size_y; _j++)
+                {
+                    for (k=0,group_id_x=0; k<nx; k+=wg_size_x,group_id_x++)
+                    {
+                        size_t x = group_id_x % wg_size_x;
+                        size_t local_size_x = (nx-k) > wg_size_x ? wg_size_x : (nx-k);
+                        cl_uint broadcast_result = in[(i + z) * ny * nz + (j + y) * nx + (k + x)];
+                        for (_k=0; _k < local_size_x; _k++)
+                        {
+                            size_t indx = (i + _i) * ny * nx + (j + _j) * nx + (k + _k);
+                            if ( broadcast_result != out[indx] )
+                            {
+                                log_info(
+                                    "work_group_broadcast: Error at (%lu, %lu, %lu): expected = %u, got = %u\n",
+                                    k+_k, j+_j, i+_i,
+                                    broadcast_result, out[indx]);
+                                return -1;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+std::vector<cl_uint> generate_input_wg_broadcast(size_t count, size_t wg_size)
+{
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(j);
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+}
+
+std::vector<cl_uint> generate_output_wg_broadcast(size_t count, size_t wg_size)
+{
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+}
+
+int work_group_broadcast(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, size_t dim)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t flat_wg_size;
+    size_t wg_size[] = { 1, 1, 1};
+    size_t work_size[] = { 1, 1, 1};
+    int err;
+
+    // Get kernel source code
+    std::string code_str;
+    if(dim > 2) code_str = generate_wg_broadcast_3D_kernel_code();
+    else if(dim > 1) code_str = generate_wg_broadcast_2D_kernel_code();
+    else code_str = generate_wg_broadcast_1D_kernel_code();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast");
+    RETURN_ON_ERROR(err)
+#endif
+
+    // Get max flat workgroup size
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &flat_wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Set local work size
+    wg_size[0] = flat_wg_size;
+    if(dim > 2)
+    {
+        if (flat_wg_size >=512)
+        {
+            wg_size[0] = wg_size[1] = wg_size[2] = 8;
+        }
+        else if (flat_wg_size >= 64)
+        {
+            wg_size[0] = wg_size[1] = wg_size[2] = 4;
+        }
+        else if (flat_wg_size >= 8)
+        {
+            wg_size[0] = wg_size[1] = wg_size[2] = 2;
+        }
+        else
+        {
+            wg_size[0] = wg_size[1] = wg_size[2] = 1;
+        }
+    }
+    else if(dim > 1)
+    {
+        if (flat_wg_size >= 256)
+        {
+            wg_size[0] = wg_size[1] = 16;
+        }
+        else if (flat_wg_size >=64)
+        {
+            wg_size[0] = wg_size[1] = 8;
+        }
+        else if (flat_wg_size >= 16)
+        {
+            wg_size[0] = wg_size[1] = 4;
+        }
+        else
+        {
+            wg_size[0] = wg_size[1] = 1;
+        }
+    }
+
+    // Calculate flat local work size
+    flat_wg_size = wg_size[0];
+    if(dim > 1) flat_wg_size *= wg_size[1];
+    if(dim > 2) flat_wg_size *= wg_size[2];
+
+    // Calculate global work size
+    size_t flat_work_size = count;
+    // 3D
+    if(dim > 2)
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count / 3) / (wg_size[0] * wg_size[1] * wg_size[2]))
+        );
+        work_size[0] = wg_number * wg_size[0];
+        work_size[1] = wg_number * wg_size[1];
+        work_size[2] = wg_number * wg_size[2];
+        flat_work_size = work_size[0] * work_size[1] * work_size[2];
+    }
+    // 2D
+    else if(dim > 1)
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count / 2) / (wg_size[0] * wg_size[1]))
+        );
+        work_size[0] = wg_number * wg_size[0];
+        work_size[1] = wg_number * wg_size[1];
+        flat_work_size = work_size[0] * work_size[1];
+    }
+    // 1D
+    else
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count) / wg_size[0])
+        );
+        flat_work_size = wg_number * wg_size[0];
+        work_size[0] = flat_work_size;
+    }
+
+    std::vector<cl_uint> input = generate_input_wg_broadcast(flat_work_size, flat_wg_size);
+    std::vector<cl_uint> output = generate_output_wg_broadcast(flat_work_size, flat_wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    int result = CL_SUCCESS;
+    // 3D
+    if(dim > 2)
+    {
+        result = verify_wg_broadcast_3D(
+            input, output,
+            work_size[0], work_size[1], work_size[2],
+            wg_size[0], wg_size[1], wg_size[2]
+        );
+    }
+    // 2D
+    else if(dim > 1)
+    {
+        result = verify_wg_broadcast_2D(
+            input, output,
+            work_size[0], work_size[1],
+            wg_size[0], wg_size[1]
+        );
+    }
+    // 1D
+    else
+    {
+        result = verify_wg_broadcast_1D(
+            input, output,
+            work_size[0],
+            wg_size[0]
+        );
+    }
+
+    RETURN_ON_ERROR_MSG(result, "work_group_broadcast_%luD failed", dim);
+    log_info("work_group_broadcast_%luD passed\n", dim);
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_broadcast)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_broadcast(device, context, queue, n_elems, 1);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_broadcast(device, context, queue, n_elems, 2);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_broadcast(device, context, queue, n_elems, 3);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP
--- a/test_conformance/clcpp/workgroups/test_wg_reduce.hpp
+++ b/test_conformance/clcpp/workgroups/test_wg_reduce.hpp
@@ -0,0 +1,331 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_REDUCE_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_REDUCE_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_reduce_kernel_code()
+{
+    return
+        "__kernel void test_wg_reduce(global " + type_name<CL_INT_TYPE>() + " *input, global " + type_name<CL_INT_TYPE>() + " *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    " + type_name<CL_INT_TYPE>() + " result = work_group_reduce_" + to_string(op) + "(input[tid]);\n"
+        "    output[tid] = result;\n"
+        "}\n";
+}
+#else
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_reduce_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_reduce(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                        "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = work_group_reduce<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+#endif
+
+template <class CL_INT_TYPE>
+int verify_wg_reduce_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE sum = 0;
+        // Work-group sum
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+            sum += in[i + j];
+
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (sum != out[i + j])
+            {
+                log_info(
+                    "work_group_reduce_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(sum),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_reduce_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+        // Work-group min
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+            min = std::min<CL_INT_TYPE>(min, in[i + j]);
+
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (min != out[i + j])
+            {
+                log_info(
+                    "work_group_reduce_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(min),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_reduce_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+        // Work-group max
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+            max = std::max<CL_INT_TYPE>(max, in[i + j]);
+
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (max != out[i + j])
+            {
+                log_info(
+                    "work_group_reduce_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(max),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int verify_wg_reduce(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_wg_reduce_add(in, out, wg_size);
+        case work_group_op::min:
+            return verify_wg_reduce_min(in, out, wg_size);
+        case work_group_op::max:
+            return verify_wg_reduce_max(in, out, wg_size);
+    }
+    return -1;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int work_group_reduce(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_wg_reduce_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_wg_reduce<CL_INT_TYPE, op>(input, output, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_reduce_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("work_group_reduce_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_reduce_add)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_reduce<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_reduce_min)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_reduce<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_reduce_max)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_reduce<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_REDUCE_HPP
--- a/test_conformance/clcpp/workgroups/test_wg_scan_exclusive.hpp
+++ b/test_conformance/clcpp/workgroups/test_wg_scan_exclusive.hpp
@@ -0,0 +1,324 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_EXCLUSIVE_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_EXCLUSIVE_HPP
+
+#include <vector>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_scan_exclusive_kernel_code()
+{
+    return
+        "__kernel void test_wg_scan_exclusive(global " + type_name<CL_INT_TYPE>() + " *input, global " + type_name<CL_INT_TYPE>() + " *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_exclusive_" + to_string(op) + "(input[tid]);\n"
+        "    output[tid] = result;\n"
+        "}\n";
+}
+#else
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_scan_exclusive_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_scan_exclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                                "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_exclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+#endif
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_exclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE sum = 0;
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (sum != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(sum),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+            sum += in[i + j];
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_exclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (min != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(min),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+            min = (std::min)(min, in[i + j]);
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_exclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (max != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(max),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+            max = (std::max)(max, in[i + j]);
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int verify_wg_scan_exclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_wg_scan_exclusive_add(in, out, wg_size);
+        case work_group_op::min:
+            return verify_wg_scan_exclusive_min(in, out, wg_size);
+        case work_group_op::max:
+            return verify_wg_scan_exclusive_max(in, out, wg_size);
+    }
+    return -1;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int work_group_scan_exclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_wg_scan_exclusive_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_exclusive");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_exclusive", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_exclusive");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_wg_scan_exclusive<CL_INT_TYPE, op>(input, output, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_scan_exclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("work_group_scan_exclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_exclusive_add)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_exclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_exclusive_min)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_exclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_exclusive_max)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_exclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_EXCLUSIVE_HPP
--- a/test_conformance/clcpp/workgroups/test_wg_scan_inclusive.hpp
+++ b/test_conformance/clcpp/workgroups/test_wg_scan_inclusive.hpp
@@ -0,0 +1,324 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_INCLUSIVE_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_INCLUSIVE_HPP
+
+#include <vector>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_scan_inclusive_kernel_code()
+{
+    return
+        "__kernel void test_wg_scan_inclusive(global " + type_name<CL_INT_TYPE>() + " *input, global " + type_name<CL_INT_TYPE>() + " *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_inclusive_" + to_string(op) + "(input[tid]);\n"
+        "    output[tid] = result;\n"
+        "}\n";
+}
+#else
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_scan_inclusive_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_scan_inclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                                "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_inclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+#endif
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_inclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE sum = 0;
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            sum += in[i + j];
+            if (sum != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_inclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(sum),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_inclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            min = (std::min)(min, in[i + j]);
+            if (min != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_inclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(min),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_inclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            max = (std::max)(max, in[i + j]);
+            if (max != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_inclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(max),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int verify_wg_scan_inclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_wg_scan_inclusive_add(in, out, wg_size);
+        case work_group_op::min:
+            return verify_wg_scan_inclusive_min(in, out, wg_size);
+        case work_group_op::max:
+            return verify_wg_scan_inclusive_max(in, out, wg_size);
+    }
+    return -1;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int work_group_scan_inclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_wg_scan_inclusive_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_wg_scan_inclusive<CL_INT_TYPE, op>(input, output, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_scan_inclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("work_group_scan_inclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_inclusive_add)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_inclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_inclusive_min)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_inclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_inclusive_max)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_inclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_INCLUSIVE_HPP