diff --git a/test_conformance/subgroups/CMakeLists.txt b/test_conformance/subgroups/CMakeLists.txt
index 174a9fe7..eb6a6079 100644
--- a/test_conformance/subgroups/CMakeLists.txt
+++ b/test_conformance/subgroups/CMakeLists.txt
@@ -6,6 +6,7 @@ set(${MODULE_NAME}_SOURCES
     test_queries.cpp
     test_workitem.cpp
     test_workgroup.cpp
+    test_ifp.cpp
 )
 
 include(../CMakeCommon.txt)
diff --git a/test_conformance/subgroups/main.cpp b/test_conformance/subgroups/main.cpp
index dd53292b..f9a9a9d5 100644
--- a/test_conformance/subgroups/main.cpp
+++ b/test_conformance/subgroups/main.cpp
@@ -23,49 +23,22 @@
 MTdata gMTdata;
 
 test_definition test_list[] = {
-    ADD_TEST(sub_group_info),
-    ADD_TEST(work_item_functions),
-    ADD_TEST(work_group_functions),
-    ADD_TEST(barrier_functions),
+    ADD_TEST_VERSION(sub_group_info_ext, Version(2, 0)),
+    ADD_TEST_VERSION(sub_group_info_core, Version(2, 1)),
+    ADD_TEST_VERSION(work_item_functions_ext, Version(2, 0)),
+    ADD_TEST_VERSION(work_item_functions_core, Version(2, 1)),
+    ADD_TEST_VERSION(work_group_functions_ext, Version(2, 0)),
+    ADD_TEST_VERSION(work_group_functions_core, Version(2, 1)),
+    ADD_TEST_VERSION(barrier_functions_ext, Version(2, 0)),
+    ADD_TEST_VERSION(barrier_functions_core, Version(2, 1)),
+    ADD_TEST_VERSION(ifp_ext, Version(2, 0)),
+    ADD_TEST_VERSION(ifp_core, Version(2, 1))
 };
 
 const int test_num = ARRAY_SIZE(test_list);
 
-static test_status checkSubGroupsExtension(cl_device_id device)
-{
-    // The extension is optional in OpenCL 2.0 (minimum required version) and
-    // required in later versions.
-    auto version = get_device_cl_version(device);
-    auto expected_min_version = Version(2, 0);
-
-    if (version < expected_min_version)
-    {
-        version_expected_info("Test", expected_min_version.to_string().c_str(),
-                              version.to_string().c_str());
-        return TEST_SKIP;
-    }
-
-    bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
-
-    if ((version == expected_min_version) && !hasExtension)
-    {
-        log_info(
-            "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
-        return TEST_SKIP;
-    }
-
-    if ((version > expected_min_version) && !hasExtension)
-    {
-        log_error("'cl_khr_subgroups' is a required extension, failing.\n");
-        return TEST_FAIL;
-    }
-
-    return TEST_PASS;
-}
-
 static test_status InitCL(cl_device_id device)
 {
-
     auto version = get_device_cl_version(device);
     test_status ret = TEST_PASS;
     if (version >= Version(3, 0))
@@ -86,10 +59,6 @@ static test_status InitCL(cl_device_id device)
             ret = TEST_SKIP;
         }
     }
-    else
-    {
-        ret = checkSubGroupsExtension(device);
-    }
     return ret;
 }
 
diff --git a/test_conformance/subgroups/procs.h b/test_conformance/subgroups/procs.h
index e3713770..3ebb13b5 100644
--- a/test_conformance/subgroups/procs.h
+++ b/test_conformance/subgroups/procs.h
@@ -26,15 +26,35 @@
 
 extern MTdata gMTdata;
 
-extern int test_sub_group_info(cl_device_id device, cl_context context,
-                               cl_command_queue queue, int num_elements);
-extern int test_work_item_functions(cl_device_id device, cl_context context,
+extern int test_sub_group_info_ext(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int num_elements);
+extern int test_sub_group_info_core(cl_device_id device, cl_context context,
                                     cl_command_queue queue, int num_elements);
-extern int test_work_group_functions(cl_device_id device, cl_context context,
-                                     cl_command_queue queue, int num_elements);
-extern int test_barrier_functions(cl_device_id device, cl_context context,
-                                  cl_command_queue queue, int num_elements);
+extern int test_work_item_functions_ext(cl_device_id device, cl_context context,
+                                        cl_command_queue queue,
+                                        int num_elements);
+extern int test_work_item_functions_core(cl_device_id device,
+                                         cl_context context,
+                                         cl_command_queue queue,
+                                         int num_elements);
+extern int test_work_group_functions_ext(cl_device_id device,
+                                         cl_context context,
+                                         cl_command_queue queue,
+                                         int num_elements);
+extern int test_work_group_functions_core(cl_device_id device,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements);
+extern int test_barrier_functions_ext(cl_device_id device, cl_context context,
+                                      cl_command_queue queue, int num_elements);
+extern int test_barrier_functions_core(cl_device_id device, cl_context context,
+                                       cl_command_queue queue,
+                                       int num_elements);
 extern int test_pipe_functions(cl_device_id device, cl_context context,
                                cl_command_queue queue, int num_elements);
+extern int test_ifp_ext(cl_device_id device, cl_context context,
+                        cl_command_queue queue, int num_elements);
+extern int test_ifp_core(cl_device_id device, cl_context context,
+                         cl_command_queue queue, int num_elements);
 
 #endif /*_procs_h*/
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index 9a57e466..6e84ccb3 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -23,6 +23,39 @@
 #include <limits>
 #include <vector>
 
+class subgroupsAPI {
+public:
+    subgroupsAPI(cl_platform_id platform, bool useCoreSubgroups)
+    {
+        static_assert(CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE
+                          == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
+                      "Enums have to be the same");
+        static_assert(CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE
+                          == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
+                      "Enums have to be the same");
+        if (useCoreSubgroups)
+        {
+            _clGetKernelSubGroupInfo_ptr = &clGetKernelSubGroupInfo;
+            clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfo";
+        }
+        else
+        {
+            _clGetKernelSubGroupInfo_ptr = (clGetKernelSubGroupInfoKHR_fn)
+                clGetExtensionFunctionAddressForPlatform(
+                    platform, "clGetKernelSubGroupInfoKHR");
+            clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfoKHR";
+        }
+    }
+    clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr()
+    {
+        return _clGetKernelSubGroupInfo_ptr;
+    }
+    const char *clGetKernelSubGroupInfo_name;
+
+private:
+    clGetKernelSubGroupInfoKHR_fn _clGetKernelSubGroupInfo_ptr;
+};
+
 // Some template helpers
 template <typename Ty> struct TypeName;
 template <> struct TypeName<cl_half>
@@ -310,7 +343,7 @@ struct test
 {
     static int run(cl_device_id device, cl_context context,
                    cl_command_queue queue, int num_elements, const char *kname,
-                   const char *src, int dynscl = 0)
+                   const char *src, int dynscl, bool useCoreSubgroups)
     {
         size_t tmp;
         int error;
@@ -318,7 +351,6 @@ struct test
         size_t realSize;
         size_t global;
         size_t local;
-        const char *kstrings[3];
         clProgramWrapper program;
         clKernelWrapper kernel;
         cl_platform_id platform;
@@ -332,14 +364,21 @@ struct test
         error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
                                 (void *)&platform, NULL);
         test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM");
+        std::stringstream kernel_sstr;
+        if (useCoreSubgroups)
+        {
+            kernel_sstr
+                << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
+        }
+        kernel_sstr << "#define XY(M,I) M[I].x = get_sub_group_local_id(); "
+                       "M[I].y = get_sub_group_id();\n";
+        kernel_sstr << TypeDef<Ty>::val();
+        kernel_sstr << src;
+        const std::string &kernel_str = kernel_sstr.str();
+        const char *kernel_src = kernel_str.c_str();
 
-        kstrings[0] = "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
-                      "#define XY(M,I) M[I].x = get_sub_group_local_id(); "
-                      "M[I].y = get_sub_group_id();\n";
-        kstrings[1] = TypeDef<Ty>::val();
-        kstrings[2] = src;
         error = create_single_kernel_helper_with_build_options(
-            context, &program, &kernel, 3, kstrings, kname, "-cl-std=CL2.0");
+            context, &program, &kernel, 1, &kernel_src, kname, "-cl-std=CL2.0");
         if (error != 0) return error;
 
         // Determine some local dimensions to use for the test.
@@ -352,40 +391,46 @@ struct test
         if (local > LSIZE) local = LSIZE;
 
         // Get the sub group info
-        clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr;
-        clGetKernelSubGroupInfoKHR_ptr = (clGetKernelSubGroupInfoKHR_fn)
-            clGetExtensionFunctionAddressForPlatform(
-                platform, "clGetKernelSubGroupInfoKHR");
-        if (clGetKernelSubGroupInfoKHR_ptr == NULL)
+        subgroupsAPI subgroupsApiSet(platform, useCoreSubgroups);
+        clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr =
+            subgroupsApiSet.clGetKernelSubGroupInfo_ptr();
+        if (clGetKernelSubGroupInfo_ptr == NULL)
         {
-            log_error(
-                "ERROR: clGetKernelSubGroupInfoKHR function not available");
-            return -1;
+            log_error("ERROR: %s function not available",
+                      subgroupsApiSet.clGetKernelSubGroupInfo_name);
+            return TEST_FAIL;
+        }
+        error = clGetKernelSubGroupInfo_ptr(
+            kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+            sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
+        if (error != CL_SUCCESS)
+        {
+            log_error("ERROR: %s function error for "
+                      "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE",
+                      subgroupsApiSet.clGetKernelSubGroupInfo_name);
+            return TEST_FAIL;
         }
 
-        error = clGetKernelSubGroupInfoKHR_ptr(
-            kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
-            sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
-        test_error(error,
-                   "clGetKernelSubGroupInfoKHR failed for "
-                   "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR");
         subgroup_size = (int)tmp;
 
-        error = clGetKernelSubGroupInfoKHR_ptr(
-            kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
+        error = clGetKernelSubGroupInfo_ptr(
+            kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
             sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
-        test_error(error,
-                   "clGetKernelSubGroupInfoKHR failed for "
-                   "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR");
-        num_subgroups = (int)tmp;
+        if (error != CL_SUCCESS)
+        {
+            log_error("ERROR: %s function error for "
+                      "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE",
+                      subgroupsApiSet.clGetKernelSubGroupInfo_name);
+            return TEST_FAIL;
+        }
 
+        num_subgroups = (int)tmp;
         // Make sure the number of sub groups is what we expect
         if (num_subgroups != (local + subgroup_size - 1) / subgroup_size)
         {
-            log_error("ERROR: unexpected number of subgroups (%d) returned by "
-                      "clGetKernelSubGroupInfoKHR\n",
+            log_error("ERROR: unexpected number of subgroups (%d) returned\n",
                       num_subgroups);
-            return -1;
+            return TEST_FAIL;
         }
 
         std::vector<Ty> idata;
diff --git a/test_conformance/subgroups/test_barrier.cpp b/test_conformance/subgroups/test_barrier.cpp
index b85f4d81..e6ce1d2e 100644
--- a/test_conformance/subgroups/test_barrier.cpp
+++ b/test_conformance/subgroups/test_barrier.cpp
@@ -139,10 +139,10 @@ template <int Which> struct BAR
     }
 };
 
-
 // Entry point from main
 int test_barrier_functions(cl_device_id device, cl_context context,
-                           cl_command_queue queue, int num_elements)
+                           cl_command_queue queue, int num_elements,
+                           bool useCoreSubgroups)
 {
     int error;
 
@@ -150,10 +150,33 @@ int test_barrier_functions(cl_device_id device, cl_context context,
 #define G 2000
 #define L 200
 
-    error = test<cl_int, BAR<0>, G, L>::run(
-        device, context, queue, num_elements, "test_lbar", lbar_source);
+    error = test<cl_int, BAR<0>, G, L>::run(device, context, queue,
+                                            num_elements, "test_lbar",
+                                            lbar_source, 0, useCoreSubgroups);
     error = test<cl_int, BAR<1>, G, L, G>::run(
-        device, context, queue, num_elements, "test_gbar", gbar_source);
+        device, context, queue, num_elements, "test_gbar", gbar_source, 0,
+        useCoreSubgroups);
 
     return error;
 }
+
+int test_barrier_functions_core(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int num_elements)
+{
+    return test_barrier_functions(device, context, queue, num_elements, true);
+}
+
+int test_barrier_functions_ext(cl_device_id device, cl_context context,
+                               cl_command_queue queue, int num_elements)
+{
+    bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
+
+    if (!hasExtension)
+    {
+        log_info(
+            "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    return test_barrier_functions(device, context, queue, num_elements, false);
+}
\ No newline at end of file
diff --git a/test_conformance/subgroups/test_ifp.cpp b/test_conformance/subgroups/test_ifp.cpp
new file mode 100644
index 00000000..02850e5f
--- /dev/null
+++ b/test_conformance/subgroups/test_ifp.cpp
@@ -0,0 +1,364 @@
+//
+// Copyright (c) 2020 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "subhelpers.h"
+#include "harness/conversions.h"
+#include "harness/typeWrappers.h"
+
+
+// These need to stay in sync with the kernel source below
+#define NUM_LOC 49
+#define INST_LOC_MASK 0x7f
+#define INST_OP_SHIFT 0
+#define INST_OP_MASK 0xf
+#define INST_LOC_SHIFT 4
+#define INST_VAL_SHIFT 12
+#define INST_VAL_MASK 0x7ffff
+#define INST_END 0x0
+#define INST_STORE 0x1
+#define INST_WAIT 0x2
+#define INST_COUNT 0x3
+
+static const char *ifp_source =
+    "#define NUM_LOC 49\n"
+    "#define INST_LOC_MASK 0x7f\n"
+    "#define INST_OP_SHIFT 0\n"
+    "#define INST_OP_MASK 0xf\n"
+    "#define INST_LOC_SHIFT 4\n"
+    "#define INST_VAL_SHIFT 12\n"
+    "#define INST_VAL_MASK 0x7ffff\n"
+    "#define INST_END 0x0\n"
+    "#define INST_STORE 0x1\n"
+    "#define INST_WAIT 0x2\n"
+    "#define INST_COUNT 0x3\n"
+    "\n"
+    "__kernel void\n"
+    "test_ifp(const __global int *in, __global int2 *xy, __global int *out)\n"
+    "{\n"
+    "    __local atomic_int loc[NUM_LOC];\n"
+    "\n"
+    "    // Don't run if there is only one sub group\n"
+    "    if (get_num_sub_groups() == 1)\n"
+    "        return;\n"
+    "\n"
+    "    // First initialize loc[]\n"
+    "    int lid = (int)get_local_id(0);\n"
+    "\n"
+    "    if (lid < NUM_LOC)\n"
+    "        atomic_init(loc+lid, 0);\n"
+    "\n"
+    "    work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
+    "\n"
+    "    // Compute pointer to this sub group's \"instructions\"\n"
+    "    const __global int *pc = in +\n"
+    "        ((int)get_group_id(0)*(int)get_enqueued_num_sub_groups() +\n"
+    "         (int)get_sub_group_id()) *\n"
+    "        (NUM_LOC+1);\n"
+    "\n"
+    "    // Set up to \"run\"\n"
+    "    bool ok = (int)get_sub_group_local_id() == 0;\n"
+    "    bool run = true;\n"
+    "\n"
+    "    while (run) {\n"
+    "        int inst = *pc++;\n"
+    "        int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;\n"
+    "        int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;\n"
+    "        int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;\n"
+    "\n"
+    "        switch (iop) {\n"
+    "        case INST_STORE:\n"
+    "            if (ok)\n"
+    "                atomic_store(loc+iloc, ival);\n"
+    "            break;\n"
+    "        case INST_WAIT:\n"
+    "            if (ok) {\n"
+    "                while (atomic_load(loc+iloc) != ival)\n"
+    "                    ;\n"
+    "            }\n"
+    "            break;\n"
+    "        case INST_COUNT:\n"
+    "            if (ok) {\n"
+    "                int i;\n"
+    "                for (i=0;i<ival;++i)\n"
+    "                    atomic_fetch_add(loc+iloc, 1);\n"
+    "            }\n"
+    "            break;\n"
+    "        case INST_END:\n"
+    "            run = false;\n"
+    "            break;\n"
+    "        }\n"
+    "\n"
+    "        sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
+    "    }\n"
+    "\n"
+    "    work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
+    "\n"
+    "    // Save this group's result\n"
+    "    __global int *op = out + (int)get_group_id(0)*NUM_LOC;\n"
+    "    if (lid < NUM_LOC)\n"
+    "        op[lid] = atomic_load(loc+lid);\n"
+    "}\n";
+
+
+// Independent forward progress stuff
+// Note:
+//   Output needs num_groups * NUM_LOC elements
+//   local_size must be > NUM_LOC
+//   Input needs num_groups * num_sub_groups * (NUM_LOC+1) elements
+
+static inline int inst(int op, int loc, int val)
+{
+    return (val << INST_VAL_SHIFT) | (loc << INST_LOC_SHIFT)
+        | (op << INST_OP_SHIFT);
+}
+
+void gen_insts(cl_int *x, cl_int *p, int n)
+{
+    int i, j0, j1;
+    int val;
+    int ii[NUM_LOC];
+
+    // Create a random permutation of 0...NUM_LOC-1
+    ii[0] = 0;
+    for (i = 1; i < NUM_LOC; ++i)
+    {
+        j0 = random_in_range(0, i, gMTdata);
+        if (j0 != i) ii[i] = ii[j0];
+        ii[j0] = i;
+    }
+
+    // Initialize "instruction pointers"
+    memset(p, 0, n * 4);
+
+    for (i = 0; i < NUM_LOC; ++i)
+    {
+        // Randomly choose 2 different sub groups
+        // One does a random amount of work, and the other waits for it
+        j0 = random_in_range(0, n - 1, gMTdata);
+
+        do
+        {
+            j1 = random_in_range(0, n - 1, gMTdata);
+        } while (j1 == j0);
+
+        // Randomly choose a wait value and assign "instructions"
+        val = random_in_range(100, 200 + 10 * NUM_LOC, gMTdata);
+        x[j0 * (NUM_LOC + 1) + p[j0]] = inst(INST_COUNT, ii[i], val);
+        x[j1 * (NUM_LOC + 1) + p[j1]] = inst(INST_WAIT, ii[i], val);
+        ++p[j0];
+        ++p[j1];
+    }
+
+    // Last "inst" for each sub group is END
+    for (i = 0; i < n; ++i) x[i * (NUM_LOC + 1) + p[i]] = inst(INST_END, 0, 0);
+}
+
+// Execute one group's "instructions"
+void run_insts(cl_int *x, cl_int *p, int n)
+{
+    int i, nend;
+    bool scont;
+    cl_int loc[NUM_LOC];
+
+    // Initialize result and "instruction pointers"
+    memset(loc, 0, sizeof(loc));
+    memset(p, 0, 4 * n);
+
+    // Repetitively loop over subgroups with each executing "instructions" until
+    // blocked The loop terminates when all subgroups have hit the "END
+    // instruction"
+    do
+    {
+        nend = 0;
+        for (i = 0; i < n; ++i)
+        {
+            do
+            {
+                cl_int inst = x[i * (NUM_LOC + 1) + p[i]];
+                cl_int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;
+                cl_int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;
+                cl_int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;
+                scont = false;
+
+                switch (iop)
+                {
+                    case INST_STORE:
+                        loc[iloc] = ival;
+                        ++p[i];
+                        scont = true;
+                        break;
+                    case INST_WAIT:
+                        if (loc[iloc] == ival)
+                        {
+                            ++p[i];
+                            scont = true;
+                        }
+                        break;
+                    case INST_COUNT:
+                        loc[iloc] += ival;
+                        ++p[i];
+                        scont = true;
+                        break;
+                    case INST_END: ++nend; break;
+                }
+            } while (scont);
+        }
+    } while (nend < n);
+
+    // Return result, reusing "p"
+    memcpy(p, loc, sizeof(loc));
+}
+
+
+struct IFP
+{
+    static void gen(cl_int *x, cl_int *t, cl_int *, int ns, int nw, int ng)
+    {
+        int k;
+        int nj = (nw + ns - 1) / ns;
+
+        // We need at least 2 sub groups per group for this test
+        if (nj == 1) return;
+
+        for (k = 0; k < ng; ++k)
+        {
+            gen_insts(x, t, nj);
+            x += nj * (NUM_LOC + 1);
+        }
+    }
+
+    static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, int ns,
+                   int nw, int ng)
+    {
+        int i, k;
+        int nj = (nw + ns - 1) / ns;
+
+        // We need at least 2 sub groups per group for this tes
+        if (nj == 1) return 0;
+
+        log_info("  independent forward progress...\n");
+
+        for (k = 0; k < ng; ++k)
+        {
+            run_insts(x, t, nj);
+            for (i = 0; i < NUM_LOC; ++i)
+            {
+                if (t[i] != y[i])
+                {
+                    log_error(
+                        "ERROR: mismatch at element %d in work group %d\n", i,
+                        k);
+                    return -1;
+                }
+            }
+            x += nj * (NUM_LOC + 1);
+            y += NUM_LOC;
+        }
+
+        return 0;
+    }
+};
+
+int test_ifp(cl_device_id device, cl_context context, cl_command_queue queue,
+             int num_elements, bool useCoreSubgroups)
+{
+    int error;
+
+    // Adjust these individually below if desired/needed
+#define G 2000
+#define L 200
+    error = test<cl_int, IFP, G, L>::run(device, context, queue, num_elements,
+                                         "test_ifp", ifp_source, NUM_LOC + 1,
+                                         useCoreSubgroups);
+    return error;
+}
+
+static test_status checkIFPSupport(cl_device_id device, bool &ifpSupport)
+{
+    cl_uint ifp_supported;
+    cl_uint error;
+    error = clGetDeviceInfo(device,
+                            CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS,
+                            sizeof(ifp_supported), &ifp_supported, NULL);
+    if (error != CL_SUCCESS)
+    {
+        print_error(
+            error,
+            "Unable to get CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS "
+            "capability");
+        return TEST_FAIL;
+    }
+    // skip testing ifp
+    if (ifp_supported != 1)
+    {
+        log_info("INDEPENDENT FORWARD PROGRESS not supported...\n");
+        ifpSupport = false;
+    }
+    else
+    {
+        log_info("INDEPENDENT FORWARD PROGRESS supported...\n");
+        ifpSupport = true;
+    }
+    return TEST_PASS;
+}
+
+int test_ifp_core(cl_device_id device, cl_context context,
+                  cl_command_queue queue, int num_elements)
+{
+    bool ifpSupport = true;
+    test_status error;
+    error = checkIFPSupport(device, ifpSupport);
+    if (error != TEST_PASS)
+    {
+        return error;
+    }
+    if (ifpSupport == false)
+    {
+        log_info("Independed forward progress skipped.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    return test_ifp(device, context, queue, num_elements, true);
+}
+
+int test_ifp_ext(cl_device_id device, cl_context context,
+                 cl_command_queue queue, int num_elements)
+{
+    bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
+    bool ifpSupport = true;
+
+    if (!hasExtension)
+    {
+        log_info(
+            "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+    // ifp only in subgroup functions tests:
+    test_status error;
+    error = checkIFPSupport(device, ifpSupport);
+    if (error != TEST_PASS)
+    {
+        return error;
+    }
+    if (ifpSupport == false)
+    {
+        log_info(
+            "Error reason: the extension cl_khr_subgroups requires that "
+            "Independed forward progress has to be supported by device.\n");
+        return TEST_FAIL;
+    }
+    return test_ifp(device, context, queue, num_elements, false);
+}
\ No newline at end of file
diff --git a/test_conformance/subgroups/test_queries.cpp b/test_conformance/subgroups/test_queries.cpp
index 79929295..2ad3d7fa 100644
--- a/test_conformance/subgroups/test_queries.cpp
+++ b/test_conformance/subgroups/test_queries.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 #include "procs.h"
+#include "subhelpers.h"
 
 typedef struct
 {
@@ -21,23 +22,10 @@ typedef struct
     cl_uint numSubGroups;
 } result_data;
 
-static const char *query_kernel_source =
-    "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
-    "\n"
-    "typedef struct {\n"
-    "    uint maxSubGroupSize;\n"
-    "    uint numSubGroups;\n"
-    "} result_data;\n"
-    "\n"
-    "__kernel void query_kernel( __global result_data *outData )\n"
-    "{\n"
-    "    int gid = get_global_id( 0 );\n"
-    "    outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
-    "    outData[gid].numSubGroups = get_num_sub_groups();\n"
-    "}";
 
 int test_sub_group_info(cl_device_id device, cl_context context,
-                        cl_command_queue queue, int num_elements)
+                        cl_command_queue queue, int num_elements,
+                        bool useCoreSubgroups)
 {
     static const size_t gsize0 = 80;
     int i, error;
@@ -58,9 +46,29 @@ int test_sub_group_info(cl_device_id device, cl_context context,
     clProgramWrapper program;
     clKernelWrapper kernel;
     clMemWrapper out;
+    std::stringstream kernel_sstr;
+    if (useCoreSubgroups)
+    {
+        kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
+    }
+    kernel_sstr
+        << "\n"
+           "typedef struct {\n"
+           "    uint maxSubGroupSize;\n"
+           "    uint numSubGroups;\n"
+           "} result_data;\n"
+           "\n"
+           "__kernel void query_kernel( __global result_data *outData )\n"
+           "{\n"
+           "    int gid = get_global_id( 0 );\n"
+           "    outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
+           "    outData[gid].numSubGroups = get_num_sub_groups();\n"
+           "}";
 
+    const std::string &kernel_str = kernel_sstr.str();
+    const char *kernel_src = kernel_str.c_str();
     error = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &query_kernel_source, "query_kernel",
+        context, &program, &kernel, 1, &kernel_src, "query_kernel",
         "-cl-std=CL2.0");
     if (error != 0) return error;
 
@@ -88,44 +96,50 @@ int test_sub_group_info(cl_device_id device, cl_context context,
                             (void *)&platform, NULL);
     test_error(error, "clDeviceInfo failed for CL_DEVICE_PLATFORM");
 
-    clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr;
-    clGetKernelSubGroupInfoKHR_ptr =
-        (clGetKernelSubGroupInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(
-            platform, "clGetKernelSubGroupInfoKHR");
-    if (clGetKernelSubGroupInfoKHR_ptr == NULL)
+    subgroupsAPI subgroupsApiSet(platform, useCoreSubgroups);
+    clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr =
+        subgroupsApiSet.clGetKernelSubGroupInfo_ptr();
+    if (clGetKernelSubGroupInfo_ptr == NULL)
     {
-        log_error("ERROR: clGetKernelSubGroupInfoKHR function not available");
-        return -1;
+        log_error("ERROR: %s function not available",
+                  subgroupsApiSet.clGetKernelSubGroupInfo_name);
+        return TEST_FAIL;
     }
 
-    error = clGetKernelSubGroupInfoKHR_ptr(
-        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
-        sizeof(local), (void *)&local, sizeof(kernel_max_subgroup_size),
+    error = clGetKernelSubGroupInfo_ptr(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, sizeof(local),
+        (void *)&local, sizeof(kernel_max_subgroup_size),
         (void *)&kernel_max_subgroup_size, &realSize);
-    test_error(error,
-               "clGetKernelSubGroupInfoKHR failed for "
-               "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR");
-    log_info("The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR for the kernel "
-             "is %d.\n",
-             (int)kernel_max_subgroup_size);
-
+    if (error != CL_SUCCESS)
+    {
+        log_error("ERROR: %s function error for "
+                  "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE",
+                  subgroupsApiSet.clGetKernelSubGroupInfo_name);
+        return TEST_FAIL;
+    }
+    log_info(
+        "The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE for the kernel is %d.\n",
+        (int)kernel_max_subgroup_size);
     if (realSize != sizeof(kernel_max_subgroup_size))
     {
         log_error("ERROR: Returned size of max sub group size not valid! "
                   "(Expected %d, got %d)\n",
                   (int)sizeof(kernel_max_subgroup_size), (int)realSize);
-        return -1;
+        return TEST_FAIL;
     }
-
-    error = clGetKernelSubGroupInfoKHR_ptr(
-        kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
-        sizeof(local), (void *)&local, sizeof(kernel_subgroup_count),
+    error = clGetKernelSubGroupInfo_ptr(
+        kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, sizeof(local),
+        (void *)&local, sizeof(kernel_subgroup_count),
         (void *)&kernel_subgroup_count, &realSize);
-    test_error(error,
-               "clGetKernelSubGroupInfoKHR failed for "
-               "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR");
+    if (error != CL_SUCCESS)
+    {
+        log_error("ERROR: %s function error "
+                  "for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE",
+                  subgroupsApiSet.clGetKernelSubGroupInfo_name);
+        return TEST_FAIL;
+    }
     log_info(
-        "The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR for the kernel is %d.\n",
+        "The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE for the kernel is %d.\n",
         (int)kernel_subgroup_count);
 
     if (realSize != sizeof(kernel_subgroup_count))
@@ -133,7 +147,7 @@ int test_sub_group_info(cl_device_id device, cl_context context,
         log_error("ERROR: Returned size of sub group count not valid! "
                   "(Expected %d, got %d)\n",
                   (int)sizeof(kernel_subgroup_count), (int)realSize);
-        return -1;
+        return TEST_FAIL;
     }
 
     // Verify that the kernel gets the same max_subgroup_size and subgroup_count
@@ -176,3 +190,24 @@ int test_sub_group_info(cl_device_id device, cl_context context,
 
     return 0;
 }
+
+int test_sub_group_info_core(cl_device_id device, cl_context context,
+                             cl_command_queue queue, int num_elements)
+{
+    return test_sub_group_info(device, context, queue, num_elements, true);
+}
+
+int test_sub_group_info_ext(cl_device_id device, cl_context context,
+                            cl_command_queue queue, int num_elements)
+{
+    bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
+
+    if (!hasExtension)
+    {
+        log_info(
+            "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    return test_sub_group_info(device, context, queue, num_elements, false);
+}
\ No newline at end of file
diff --git a/test_conformance/subgroups/test_workgroup.cpp b/test_conformance/subgroups/test_workgroup.cpp
index 572220f7..bc9d6190 100644
--- a/test_conformance/subgroups/test_workgroup.cpp
+++ b/test_conformance/subgroups/test_workgroup.cpp
@@ -126,98 +126,6 @@ static const char *scexmin_source =
     "    out[gid] = sub_group_scan_exclusive_min(in[gid]);\n"
     "}\n";
 
-// These need to stay in sync with the kernel source below
-#define NUM_LOC 49
-#define INST_LOC_MASK 0x7f
-#define INST_OP_SHIFT 0
-#define INST_OP_MASK 0xf
-#define INST_LOC_SHIFT 4
-#define INST_VAL_SHIFT 12
-#define INST_VAL_MASK 0x7ffff
-#define INST_END 0x0
-#define INST_STORE 0x1
-#define INST_WAIT 0x2
-#define INST_COUNT 0x3
-
-static const char *ifp_source =
-    "#define NUM_LOC 49\n"
-    "#define INST_LOC_MASK 0x7f\n"
-    "#define INST_OP_SHIFT 0\n"
-    "#define INST_OP_MASK 0xf\n"
-    "#define INST_LOC_SHIFT 4\n"
-    "#define INST_VAL_SHIFT 12\n"
-    "#define INST_VAL_MASK 0x7ffff\n"
-    "#define INST_END 0x0\n"
-    "#define INST_STORE 0x1\n"
-    "#define INST_WAIT 0x2\n"
-    "#define INST_COUNT 0x3\n"
-    "\n"
-    "__kernel void\n"
-    "test_ifp(const __global int *in, __global int2 *xy, __global int *out)\n"
-    "{\n"
-    "    __local atomic_int loc[NUM_LOC];\n"
-    "\n"
-    "    // Don't run if there is only one sub group\n"
-    "    if (get_num_sub_groups() == 1)\n"
-    "        return;\n"
-    "\n"
-    "    // First initialize loc[]\n"
-    "    int lid = (int)get_local_id(0);\n"
-    "\n"
-    "    if (lid < NUM_LOC)\n"
-    "        atomic_init(loc+lid, 0);\n"
-    "\n"
-    "    work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
-    "\n"
-    "    // Compute pointer to this sub group's \"instructions\"\n"
-    "    const __global int *pc = in +\n"
-    "        ((int)get_group_id(0)*(int)get_enqueued_num_sub_groups() +\n"
-    "         (int)get_sub_group_id()) *\n"
-    "        (NUM_LOC+1);\n"
-    "\n"
-    "    // Set up to \"run\"\n"
-    "    bool ok = (int)get_sub_group_local_id() == 0;\n"
-    "    bool run = true;\n"
-    "\n"
-    "    while (run) {\n"
-    "        int inst = *pc++;\n"
-    "        int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;\n"
-    "        int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;\n"
-    "        int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;\n"
-    "\n"
-    "        switch (iop) {\n"
-    "        case INST_STORE:\n"
-    "            if (ok)\n"
-    "                atomic_store(loc+iloc, ival);\n"
-    "            break;\n"
-    "        case INST_WAIT:\n"
-    "            if (ok) {\n"
-    "                while (atomic_load(loc+iloc) != ival)\n"
-    "                    ;\n"
-    "            }\n"
-    "            break;\n"
-    "        case INST_COUNT:\n"
-    "            if (ok) {\n"
-    "                int i;\n"
-    "                for (i=0;i<ival;++i)\n"
-    "                    atomic_fetch_add(loc+iloc, 1);\n"
-    "            }\n"
-    "            break;\n"
-    "        case INST_END:\n"
-    "            run = false;\n"
-    "            break;\n"
-    "        }\n"
-    "\n"
-    "        sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
-    "    }\n"
-    "\n"
-    "    work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
-    "\n"
-    "    // Save this group's result\n"
-    "    __global int *op = out + (int)get_group_id(0)*NUM_LOC;\n"
-    "    if (lid < NUM_LOC)\n"
-    "        op[lid] = atomic_load(loc+lid);\n"
-    "}\n";
 
 // Any/All test functions
 template <int Which> struct AA
@@ -713,169 +621,11 @@ template <typename Ty> struct BC
     }
 };
 
-// Independent forward progress stuff
-// Note:
-//   Output needs num_groups * NUM_LOC elements
-//   local_size must be > NUM_LOC
-//   Input needs num_groups * num_sub_groups * (NUM_LOC+1) elements
-
-static inline int inst(int op, int loc, int val)
-{
-    return (val << INST_VAL_SHIFT) | (loc << INST_LOC_SHIFT)
-        | (op << INST_OP_SHIFT);
-}
-
-void gen_insts(cl_int *x, cl_int *p, int n)
-{
-    int i, j0, j1;
-    int val;
-    int ii[NUM_LOC];
-
-    // Create a random permutation of 0...NUM_LOC-1
-    ii[0] = 0;
-    for (i = 1; i < NUM_LOC; ++i)
-    {
-        j0 = random_in_range(0, i, gMTdata);
-        if (j0 != i) ii[i] = ii[j0];
-        ii[j0] = i;
-    }
-
-    // Initialize "instruction pointers"
-    memset(p, 0, n * 4);
-
-    for (i = 0; i < NUM_LOC; ++i)
-    {
-        // Randomly choose 2 different sub groups
-        // One does a random amount of work, and the other waits for it
-        j0 = random_in_range(0, n - 1, gMTdata);
-
-        do
-        {
-            j1 = random_in_range(0, n - 1, gMTdata);
-        } while (j1 == j0);
-
-        // Randomly choose a wait value and assign "instructions"
-        val = random_in_range(100, 200 + 10 * NUM_LOC, gMTdata);
-        x[j0 * (NUM_LOC + 1) + p[j0]] = inst(INST_COUNT, ii[i], val);
-        x[j1 * (NUM_LOC + 1) + p[j1]] = inst(INST_WAIT, ii[i], val);
-        ++p[j0];
-        ++p[j1];
-    }
-
-    // Last "inst" for each sub group is END
-    for (i = 0; i < n; ++i) x[i * (NUM_LOC + 1) + p[i]] = inst(INST_END, 0, 0);
-}
-
-// Execute one group's "instructions"
-void run_insts(cl_int *x, cl_int *p, int n)
-{
-    int i, nend;
-    bool scont;
-    cl_int loc[NUM_LOC];
-
-    // Initialize result and "instruction pointers"
-    memset(loc, 0, sizeof(loc));
-    memset(p, 0, 4 * n);
-
-    // Repetitively loop over subgroups with each executing "instructions" until
-    // blocked The loop terminates when all subgroups have hit the "END
-    // instruction"
-    do
-    {
-        nend = 0;
-        for (i = 0; i < n; ++i)
-        {
-            do
-            {
-                cl_int inst = x[i * (NUM_LOC + 1) + p[i]];
-                cl_int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;
-                cl_int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;
-                cl_int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;
-                scont = false;
-
-                switch (iop)
-                {
-                    case INST_STORE:
-                        loc[iloc] = ival;
-                        ++p[i];
-                        scont = true;
-                        break;
-                    case INST_WAIT:
-                        if (loc[iloc] == ival)
-                        {
-                            ++p[i];
-                            scont = true;
-                        }
-                        break;
-                    case INST_COUNT:
-                        loc[iloc] += ival;
-                        ++p[i];
-                        scont = true;
-                        break;
-                    case INST_END: ++nend; break;
-                }
-            } while (scont);
-        }
-    } while (nend < n);
-
-    // Return result, reusing "p"
-    memcpy(p, loc, sizeof(loc));
-}
-
-
-struct IFP
-{
-    static void gen(cl_int *x, cl_int *t, cl_int *, int ns, int nw, int ng)
-    {
-        int k;
-        int nj = (nw + ns - 1) / ns;
-
-        // We need at least 2 sub groups per group for this test
-        if (nj == 1) return;
-
-        for (k = 0; k < ng; ++k)
-        {
-            gen_insts(x, t, nj);
-            x += nj * (NUM_LOC + 1);
-        }
-    }
-
-    static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, int ns,
-                   int nw, int ng)
-    {
-        int i, k;
-        int nj = (nw + ns - 1) / ns;
-
-        // We need at least 2 sub groups per group for this tes
-        if (nj == 1) return 0;
-
-        log_info("  independent forward progress...\n");
-
-        for (k = 0; k < ng; ++k)
-        {
-            run_insts(x, t, nj);
-            for (i = 0; i < NUM_LOC; ++i)
-            {
-                if (t[i] != y[i])
-                {
-                    log_error(
-                        "ERROR: mismatch at element %d in work group %d\n", i,
-                        k);
-                    return -1;
-                }
-            }
-            x += nj * (NUM_LOC + 1);
-            y += NUM_LOC;
-        }
-
-        return 0;
-    }
-};
-
 
 // Entry point from main
 int test_work_group_functions(cl_device_id device, cl_context context,
-                              cl_command_queue queue, int num_elements)
+                              cl_command_queue queue, int num_elements,
+                              bool useCoreSubgroups)
 {
     int error;
 
@@ -884,161 +634,242 @@ int test_work_group_functions(cl_device_id device, cl_context context,
 #define L 200
 
     error = test<int, AA<0>, G, L>::run(device, context, queue, num_elements,
-                                        "test_any", any_source);
+                                        "test_any", any_source, 0,
+                                        useCoreSubgroups);
     error |= test<int, AA<1>, G, L>::run(device, context, queue, num_elements,
-                                         "test_all", all_source);
+                                         "test_all", all_source, 0,
+                                         useCoreSubgroups);
 
     // error |= test<cl_half, BC<cl_half>, G, L>::run(device, context, queue,
     // num_elements, "test_bcast", bcast_source);
     error |= test<cl_uint, BC<cl_uint>, G, L>::run(
-        device, context, queue, num_elements, "test_bcast", bcast_source);
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
     error |= test<cl_int, BC<cl_int>, G, L>::run(
-        device, context, queue, num_elements, "test_bcast", bcast_source);
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
     error |= test<cl_ulong, BC<cl_ulong>, G, L>::run(
-        device, context, queue, num_elements, "test_bcast", bcast_source);
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
     error |= test<cl_long, BC<cl_long>, G, L>::run(
-        device, context, queue, num_elements, "test_bcast", bcast_source);
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
     error |= test<float, BC<float>, G, L>::run(
-        device, context, queue, num_elements, "test_bcast", bcast_source);
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
     error |= test<double, BC<double>, G, L>::run(
-        device, context, queue, num_elements, "test_bcast", bcast_source);
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
 
     // error |= test<cl_half, RED<cl_half,0>, G, L>::run(device, context, queue,
     // num_elements, "test_redadd", redadd_source);
     error |= test<cl_uint, RED<cl_uint, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_redadd", redadd_source);
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
     error |= test<cl_int, RED<cl_int, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_redadd", redadd_source);
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
     error |= test<cl_ulong, RED<cl_ulong, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_redadd", redadd_source);
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
     error |= test<cl_long, RED<cl_long, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_redadd", redadd_source);
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
     error |= test<float, RED<float, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_redadd", redadd_source);
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
     error |= test<double, RED<double, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_redadd", redadd_source);
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
 
     // error |= test<cl_half, RED<cl_half,1>, G, L>::run(device, context, queue,
     // num_elements, "test_redmax", redmax_source);
     error |= test<cl_uint, RED<cl_uint, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_redmax", redmax_source);
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
     error |= test<cl_int, RED<cl_int, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_redmax", redmax_source);
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
     error |= test<cl_ulong, RED<cl_ulong, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_redmax", redmax_source);
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
     error |= test<cl_long, RED<cl_long, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_redmax", redmax_source);
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
     error |= test<float, RED<float, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_redmax", redmax_source);
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
     error |= test<double, RED<double, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_redmax", redmax_source);
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
 
     // error |= test<cl_half, RED<cl_half,2>, G, L>::run(device, context, queue,
     // num_elements, "test_redmin", redmin_source);
     error |= test<cl_uint, RED<cl_uint, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_redmin", redmin_source);
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
     error |= test<cl_int, RED<cl_int, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_redmin", redmin_source);
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
     error |= test<cl_ulong, RED<cl_ulong, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_redmin", redmin_source);
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
     error |= test<cl_long, RED<cl_long, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_redmin", redmin_source);
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
     error |= test<float, RED<float, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_redmin", redmin_source);
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
     error |= test<double, RED<double, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_redmin", redmin_source);
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
 
     // error |= test<cl_half, SCIN<cl_half,0>, G, L>::run(device, context,
     // queue, num_elements, "test_scinadd", scinadd_source);
     error |= test<cl_uint, SCIN<cl_uint, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_scinadd", scinadd_source);
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
     error |= test<cl_int, SCIN<cl_int, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_scinadd", scinadd_source);
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
     error |= test<cl_ulong, SCIN<cl_ulong, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_scinadd", scinadd_source);
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
     error |= test<cl_long, SCIN<cl_long, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_scinadd", scinadd_source);
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
     error |= test<float, SCIN<float, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_scinadd", scinadd_source);
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
     error |= test<double, SCIN<double, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_scinadd", scinadd_source);
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
 
     // error |= test<cl_half, SCIN<cl_half,1>, G, L>::run(device, context,
     // queue, num_elements, "test_scinmax", scinmax_source);
     error |= test<cl_uint, SCIN<cl_uint, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_scinmax", scinmax_source);
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
     error |= test<cl_int, SCIN<cl_int, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_scinmax", scinmax_source);
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
     error |= test<cl_ulong, SCIN<cl_ulong, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_scinmax", scinmax_source);
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
     error |= test<cl_long, SCIN<cl_long, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_scinmax", scinmax_source);
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
     error |= test<float, SCIN<float, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_scinmax", scinmax_source);
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
     error |= test<double, SCIN<double, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_scinmax", scinmax_source);
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
 
     // error |= test<cl_half, SCIN<cl_half,2>, G, L>::run(device, context,
     // queue, num_elements, "test_scinmin", scinmin_source);
     error |= test<cl_uint, SCIN<cl_uint, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_scinmin", scinmin_source);
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
     error |= test<cl_int, SCIN<cl_int, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_scinmin", scinmin_source);
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
     error |= test<cl_ulong, SCIN<cl_ulong, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_scinmin", scinmin_source);
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
     error |= test<cl_long, SCIN<cl_long, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_scinmin", scinmin_source);
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
     error |= test<float, SCIN<float, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_scinmin", scinmin_source);
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
     error |= test<double, SCIN<double, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_scinmin", scinmin_source);
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
 
     // error |= test<cl_half, SCEX<cl_half,0>, G, L>::run(device, context,
     // queue, num_elements, "test_scexadd", scexadd_source);
     error |= test<cl_uint, SCEX<cl_uint, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_scexadd", scexadd_source);
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
     error |= test<cl_int, SCEX<cl_int, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_scexadd", scexadd_source);
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
     error |= test<cl_ulong, SCEX<cl_ulong, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_scexadd", scexadd_source);
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
     error |= test<cl_long, SCEX<cl_long, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_scexadd", scexadd_source);
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
     error |= test<float, SCEX<float, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_scexadd", scexadd_source);
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
     error |= test<double, SCEX<double, 0>, G, L>::run(
-        device, context, queue, num_elements, "test_scexadd", scexadd_source);
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
 
     // error |= test<cl_half, SCEX<cl_half,1>, G, L>::run(device, context,
     // queue, num_elements, "test_scexmax", scexmax_source);
     error |= test<cl_uint, SCEX<cl_uint, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_scexmax", scexmax_source);
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
     error |= test<cl_int, SCEX<cl_int, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_scexmax", scexmax_source);
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
     error |= test<cl_ulong, SCEX<cl_ulong, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_scexmax", scexmax_source);
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
     error |= test<cl_long, SCEX<cl_long, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_scexmax", scexmax_source);
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
     error |= test<float, SCEX<float, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_scexmax", scexmax_source);
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
     error |= test<double, SCEX<double, 1>, G, L>::run(
-        device, context, queue, num_elements, "test_scexmax", scexmax_source);
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
 
     // error |= test<cl_half, SCEX<cl_half,2>, G, L>::run(device, context,
     // queue, num_elements, "test_scexmin", scexmin_source);
     error |= test<cl_uint, SCEX<cl_uint, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_scexmin", scexmin_source);
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
     error |= test<cl_int, SCEX<cl_int, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_scexmin", scexmin_source);
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
     error |= test<cl_ulong, SCEX<cl_ulong, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_scexmin", scexmin_source);
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
     error |= test<cl_long, SCEX<cl_long, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_scexmin", scexmin_source);
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
     error |= test<float, SCEX<float, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_scexmin", scexmin_source);
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
     error |= test<double, SCEX<double, 2>, G, L>::run(
-        device, context, queue, num_elements, "test_scexmin", scexmin_source);
-
-    error |= test<cl_int, IFP, G, L>::run(device, context, queue, num_elements,
-                                          "test_ifp", ifp_source, NUM_LOC + 1);
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
     return error;
 }
+
+int test_work_group_functions_core(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int num_elements)
+{
+    return test_work_group_functions(device, context, queue, num_elements,
+                                     true);
+}
+
+int test_work_group_functions_ext(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int num_elements)
+{
+    bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
+
+    if (!hasExtension)
+    {
+        log_info(
+            "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+    return test_work_group_functions(device, context, queue, num_elements,
+                                     false);
+}
diff --git a/test_conformance/subgroups/test_workitem.cpp b/test_conformance/subgroups/test_workitem.cpp
index 125ad9e9..b77bfe1a 100644
--- a/test_conformance/subgroups/test_workitem.cpp
+++ b/test_conformance/subgroups/test_workitem.cpp
@@ -34,29 +34,6 @@ struct get_test_data
     }
 };
 
-static const char *get_test_source =
-    "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
-    "\n"
-    "typedef struct {\n"
-    "    uint subGroupSize;\n"
-    "    uint maxSubGroupSize;\n"
-    "    uint numSubGroups;\n"
-    "    uint enqNumSubGroups;\n"
-    "    uint subGroupId;\n"
-    "    uint subGroupLocalId;\n"
-    "} get_test_data;\n"
-    "\n"
-    "__kernel void get_test( __global get_test_data *outData )\n"
-    "{\n"
-    "    int gid = get_global_id( 0 );\n"
-    "    outData[gid].subGroupSize = get_sub_group_size();\n"
-    "    outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
-    "    outData[gid].numSubGroups = get_num_sub_groups();\n"
-    "    outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n"
-    "    outData[gid].subGroupId = get_sub_group_id();\n"
-    "    outData[gid].subGroupLocalId = get_sub_group_local_id();\n"
-    "}";
-
 static int check_group(const get_test_data *result, int nw, cl_uint ensg,
                        int maxwgs)
 {
@@ -207,7 +184,8 @@ static int check_group(const get_test_data *result, int nw, cl_uint ensg,
 }
 
 int test_work_item_functions(cl_device_id device, cl_context context,
-                             cl_command_queue queue, int num_elements)
+                             cl_command_queue queue, int num_elements,
+                             bool useCoreSubgroups)
 {
     static const size_t lsize = 200;
     int error;
@@ -220,9 +198,37 @@ int test_work_item_functions(cl_device_id device, cl_context context,
     clProgramWrapper program;
     clKernelWrapper kernel;
     clMemWrapper out;
-
+    std::stringstream kernel_sstr;
+    if (useCoreSubgroups)
+    {
+        kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
+    }
+    kernel_sstr
+        << "\n"
+           "\n"
+           "typedef struct {\n"
+           "    uint subGroupSize;\n"
+           "    uint maxSubGroupSize;\n"
+           "    uint numSubGroups;\n"
+           "    uint enqNumSubGroups;\n"
+           "    uint subGroupId;\n"
+           "    uint subGroupLocalId;\n"
+           "} get_test_data;\n"
+           "\n"
+           "__kernel void get_test( __global get_test_data *outData )\n"
+           "{\n"
+           "    int gid = get_global_id( 0 );\n"
+           "    outData[gid].subGroupSize = get_sub_group_size();\n"
+           "    outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
+           "    outData[gid].numSubGroups = get_num_sub_groups();\n"
+           "    outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n"
+           "    outData[gid].subGroupId = get_sub_group_id();\n"
+           "    outData[gid].subGroupLocalId = get_sub_group_local_id();\n"
+           "}";
+    const std::string &kernel_str = kernel_sstr.str();
+    const char *kernel_src = kernel_str.c_str();
     error = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &get_test_source, "get_test",
+        context, &program, &kernel, 1, &kernel_src, "get_test",
         "-cl-std=CL2.0");
     if (error != 0) return error;
 
@@ -301,3 +307,25 @@ int test_work_item_functions(cl_device_id device, cl_context context,
 
     return 0;
 }
+
+int test_work_item_functions_core(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int num_elements)
+{
+    return test_work_item_functions(device, context, queue, num_elements, true);
+}
+
+int test_work_item_functions_ext(cl_device_id device, cl_context context,
+                                 cl_command_queue queue, int num_elements)
+{
+    bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
+
+    if (!hasExtension)
+    {
+        log_info(
+            "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    return test_work_item_functions(device, context, queue, num_elements,
+                                    false);
+}
\ No newline at end of file