Initial open source release of OpenCL 2.0 CTS.

2026-03-20 14:39:01 +00:00 · 2017-05-16 18:50:35 +05:30
parent 6911ba5116
commit 3a440d17c8
883 changed files with 318212 additions and 0 deletions
--- a/test_conformance/SVM/main.cpp
+++ b/test_conformance/SVM/main.cpp
@@ -0,0 +1,338 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <vector>
+#include <sstream>
+#include "../../test_common/harness/testHarness.h"
+
+#include "common.h"
+
+// SVM Atomic wrappers.
+// Platforms that support SVM atomics (atomics that work across the host and devices) need to implement these host side functions correctly.
+// Platforms that do not support SVM atomics can simpy implement these functions as empty stubs since the functions will not be called.
+// For now only Windows x86 is implemented, add support for other platforms as needed.
+cl_int AtomicLoadExplicit(volatile cl_int * pValue, cl_memory_order order)
+{
+#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
+  return *pValue;  // provided the value is aligned x86 doesn't need anything more than this for seq_cst.
+#elif defined(__GNUC__)
+	return __sync_add_and_fetch(pValue, 0);
+#else
+  log_error("ERROR: AtomicLoadExplicit function not implemented\n");
+  return -1;
+#endif
+}
+// all the x86 atomics are seq_cst, so don't need to do anything with the memory order parameter.
+cl_int AtomicFetchAddExplicit(volatile cl_int *object, cl_int operand, cl_memory_order o)
+{
+#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
+  return InterlockedExchangeAdd( (volatile LONG*) object, operand);
+#elif defined(__GNUC__)
+  return __sync_fetch_and_add(object, operand);
+#else
+  log_error("ERROR: AtomicFetchAddExplicit function not implemented\n");
+  return -1;
+#endif
+}
+
+cl_int AtomicExchangeExplicit(volatile cl_int *object, cl_int desired, cl_memory_order mo)
+{
+#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
+  return InterlockedExchange( (volatile LONG*) object, desired);
+#elif defined(__GNUC__)
+  return __sync_lock_test_and_set(object, desired);
+#else
+  log_error("ERROR: AtomicExchangeExplicit function not implemented\n");
+  return -1;
+#endif
+}
+
+
+const char *linked_list_create_and_verify_kernels[] = {
+  "typedef struct Node {\n"
+  "    int global_id;\n"
+  "    int position_in_list;\n"
+  "    __global struct Node* pNext;\n"
+  "} Node;\n"
+  "\n"
+  // The allocation_index parameter must be initialized on the host to N work-items
+  // The first N nodes in pNodes will be the heads of the lists.
+  "__kernel void create_linked_lists(__global Node* pNodes, volatile __attribute__((nosvm)) __global int* allocation_index, int list_length)\n"
+  "{\n"
+  "    size_t i = get_global_id(0);\n"
+  "    __global Node *pNode = &pNodes[i];\n"
+  "\n"
+  "    pNode->global_id = i;\n"
+  "    pNode->position_in_list = 0;\n"
+  "\n"
+  "    __global Node *pNew;\n"
+  "    for(int j=1; j < list_length; j++)\n"
+  "    {\n"
+  "        pNew = &pNodes[ atomic_inc(allocation_index) ];// allocate a new node\n"
+  "        pNew->global_id = i;\n"
+  "        pNew->position_in_list = j;\n"
+  "        pNode->pNext = pNew;  // link new node onto end of list\n"
+  "        pNode = pNew;   // move to end of list\n"
+  "    }\n"
+  "}\n"
+
+  "__kernel void verify_linked_lists(__global Node* pNodes, volatile __global uint* num_correct, int list_length)\n"
+  "{\n"
+  "    size_t i = get_global_id(0);\n"
+  "    __global Node *pNode = &pNodes[i];\n"
+  "\n"
+  "    for(int j=0; j < list_length; j++)\n"
+  "    {\n"
+  "        if( pNode->global_id == i && pNode->position_in_list == j)\n"
+  "        {\n"
+  "            atomic_inc(num_correct);\n"
+  "        } \n"
+  "        else {\n"
+  "            break;\n"
+  "        }\n"
+  "        pNode = pNode->pNext;\n"
+  "    }\n"
+  "}\n"
+};
+
+
+// The first N nodes in pNodes will be the heads of the lists.
+void create_linked_lists(Node* pNodes, size_t num_lists, int list_length)
+{
+  size_t allocation_index = num_lists;  // heads of lists are in first num_lists nodes.
+
+  for(cl_uint i = 0; i < num_lists; i++)
+  {
+    Node *pNode = &pNodes[i];
+    pNode->global_id = i;
+    pNode->position_in_list = 0;
+    Node *pNew;
+    for(int j=1; j < list_length; j++)
+    {
+      pNew = &pNodes[ allocation_index++ ];// allocate a new node
+      pNew->global_id = i;
+      pNew->position_in_list = j;
+      pNode->pNext = pNew;  // link new node onto end of list
+      pNode = pNew;   // move to end of list
+    }
+  }
+}
+
+cl_int verify_linked_lists(Node* pNodes, size_t num_lists, int list_length)
+{
+  cl_int error = CL_SUCCESS;
+  int numCorrect = 0;
+
+  log_info(" and verifying on host ");
+  for(cl_uint i=0; i < num_lists; i++)
+  {
+    Node *pNode = &pNodes[i];
+    for(int j=0; j < list_length; j++)
+    {
+      if( pNode->global_id == i && pNode->position_in_list == j)
+      {
+        numCorrect++;
+      }
+      else {
+        break;
+      }
+      pNode = pNode->pNext;
+    }
+  }
+  if(numCorrect != list_length * (cl_uint)num_lists)
+  {
+    error = -1;
+    log_info("Failed\n");
+  }
+  else
+    log_info("Passed\n");
+
+  return error;
+}
+
+// Note that we don't use the context provided by the test harness since it doesn't support multiple devices,
+// so we create are own context here that has all devices, we use the same platform that the harness used.
+cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps)
+{
+  cl_int error;
+
+  cl_platform_id platform_id;
+  // find out what platform the harness is using.
+  error = clGetDeviceInfo(device_from_harness, CL_DEVICE_PLATFORM,sizeof(cl_platform_id),&platform_id,NULL);
+  test_error(error,"clGetDeviceInfo failed");
+
+  error = clGetDeviceIDs(platform_id,  CL_DEVICE_TYPE_ALL, 0, NULL, num_devices );
+  test_error(error, "clGetDeviceIDs failed");
+
+  std::vector<cl_device_id> devicesTmp(*num_devices), devices, capable_devices;
+
+  error = clGetDeviceIDs(platform_id,  CL_DEVICE_TYPE_ALL, *num_devices, &devicesTmp[0], NULL );
+  test_error(error, "clGetDeviceIDs failed");
+
+  devices.push_back(device_from_harness);
+  for (size_t i = 0; i < devicesTmp.size(); ++i)
+  {
+    if (device_from_harness != devicesTmp[i])
+      devices.push_back(devicesTmp[i]);
+  }
+
+  // Select only the devices that support the SVM level needed for the test.
+  // Note that if requested SVM capabilities are not supported by any device then the test still passes (even though it does not execute).
+  cl_device_svm_capabilities caps;
+  cl_uint num_capable_devices = 0;
+  for(cl_uint i = 0; i < *num_devices; i++)
+  {
+    size_t ret_len = 0;
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, 0, 0, &ret_len);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clGetDeviceInfo failed %s\n", IGetErrorString(error));
+      return -1;
+    }
+
+    std::vector<char> oclVersion(ret_len + 1);
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(char) * oclVersion.size(), &oclVersion[0], 0);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clGetDeviceInfo failed %s\n", IGetErrorString(error));
+      return -1;
+    }
+
+    std::string versionStr(&oclVersion[7]);
+    std::stringstream stream;
+    stream << versionStr;
+
+    double version = 0.0;
+    stream >> version;
+
+    if(device_from_harness != devices[i] && version < 2.0)
+    {
+      continue;
+    }
+
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
+    test_error(error,"clGetDeviceInfo failed for CL_DEVICE_MEM_SHARING");
+    if(caps & (~(CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER |  CL_DEVICE_SVM_FINE_GRAIN_SYSTEM | CL_DEVICE_SVM_ATOMICS)))
+    {
+      log_error("clGetDeviceInfo returned an invalid cl_device_svm_capabilities value");
+      return -1;
+    }
+    if((caps & required_svm_caps) == required_svm_caps)
+    {
+      capable_devices.push_back(devices[i]);
+      ++num_capable_devices;
+    }
+  }
+  devices = capable_devices;  // the only devices we care about from here on are the ones capable of supporting the requested SVM level.
+  *num_devices = num_capable_devices;
+  if(num_capable_devices == 0)
+    //    if(svm_level > CL_DEVICE_COARSE_SVM && 0 == num_capable_devices)
+  {
+    log_info("Requested SVM level not supported by any device on this platform, test not executed.\n");
+    return 1; // 1 indicates do not execute, but counts as passing.
+  }
+
+  cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, NULL };
+  *context = clCreateContext(context_properties, *num_devices, &devices[0], NULL, NULL, &error);
+  test_error(error, "Unable to create context" );
+
+  //    *queues = (cl_command_queue *) malloc( *num_devices * sizeof( cl_command_queue ) );
+
+  for(cl_uint i = 0; i < *num_devices; i++)
+  {
+    queues[i] = clCreateCommandQueueWithProperties(*context, devices[i], 0, &error);
+    test_error(error, "clCreateCommandQueue failed");
+  }
+
+  if(ppCodeString)
+  {
+    *program = clCreateProgramWithSource(*context, 1, ppCodeString , NULL, &error);
+    test_error( error, "clCreateProgramWithSource failed" );
+
+    error = clBuildProgram(*program,0,NULL,"-cl-std=CL2.0",NULL,NULL);
+    if (error != CL_SUCCESS)
+    {
+      print_error(error, "clBuildProgram failed");
+
+      char *buildLog = NULL;
+      size_t buildLogSize = 0;
+      error = clGetProgramBuildInfo (*program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &buildLogSize);
+      buildLog = (char*)malloc(buildLogSize);
+      memset(buildLog, 0, buildLogSize);
+      error = clGetProgramBuildInfo (*program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL);
+      char string[15000];
+      sprintf(string,"%s", buildLog);
+      //MessageBox(NULL,(LPCWSTR)string,(LPCWSTR)"OpenCL Error",MB_OK);
+      //MessageBox(NULL,string,"OpenCL Error",MB_OK);
+      free(buildLog);
+      log_info("%s",string);
+      if (error) {
+        print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
+        return -1;
+      }
+      return -1;
+    }
+  }
+
+  return 0;
+}
+
+basefn    basefn_list[] = {
+  test_byte_granularity,
+  test_set_kernel_exec_info_svm_ptrs,
+  test_fine_grain_memory_consistency,
+  test_fine_grain_sync_buffers,
+  test_shared_address_space_fine_grain,
+  test_shared_sub_buffers,
+  test_shared_address_space_fine_grain_buffers,
+  test_allocate_shared_buffer,
+  test_shared_address_space_coarse_grain_old_api,
+  test_shared_address_space_coarse_grain_new_api,
+  test_cross_buffer_pointers_coarse_grain,
+  test_svm_pointer_passing,
+  test_enqueue_api,
+};
+
+const char    *basefn_names[] = {
+  "svm_byte_granularity",
+  "svm_set_kernel_exec_info_svm_ptrs",
+  "svm_fine_grain_memory_consistency",
+  "svm_fine_grain_sync_buffers",
+  "svm_shared_address_space_fine_grain",
+  "svm_shared_sub_buffers",
+  "svm_shared_address_space_fine_grain_buffers",
+  "svm_allocate_shared_buffer",
+  "svm_shared_address_space_coarse_grain_old_api",
+  "svm_shared_address_space_coarse_grain_new_api",
+  "svm_cross_buffer_pointers_coarse_grain",
+  "svm_pointer_passing",
+  "svm_enqueue_api",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+
+int main(int argc, const char *argv[])
+{
+  return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, true, 0 );
+}
+
+
+