Files
OpenCL-CTS/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
Sreelakshmi Haridas Maruthur 15b54aa0bd External memory updates (#1676)
* Vulkan: Fix descriptor sets

Use descriptor set arrays when programming
arrays for compute shader.

Change-Id: Idabab775a256a223660eb7a850e26f290453659e

* Vulkan: Fix queue propertyies

Transfer bit for queue family is not required
to be reported by the implementation, it is
implicit for compute.

Change-Id: I7424b00e25e35145433dd74b0b4dfe7eeeaf98c8

* Vulkan: Allow implementation to choose dedicated memory

Dedicated vs non-dedicated memory must be queried by
the app.  Implementations are not required to support
exportable non-dedicated memory.

Change-Id: Idbc46ace1be20f61d1b58b34756f6d79a7745911

* Fix formatting

Auto-generated formatting fix

* Fix bug in dedicated memory.

* Add check for if OpenCL assumes linear tiling

Change-Id: Idd2e24d9d69e1fbc3ccb4a279067533104185332

* Changed macro name to reflect spec

CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_HANDLE_TYPES_KHR to
CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR

Also changed some functions to not use the KHR variants.

---------

Co-authored-by: Joshua Kelly <joshkell@qti.qualcomm.com>
2023-09-05 09:04:38 -07:00

1951 lines
78 KiB
C++

//
// Copyright (c) 2022 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <vulkan_interop_common.hpp>
#include <vulkan_wrapper.hpp>
#include <CL/cl.h>
#include <CL/cl_ext.h>
#include <assert.h>
#include <vector>
#include <iostream>
#include <memory>
#include <string.h>
#include "harness/errorHelpers.h"
#define MAX_BUFFERS 5
#define MAX_IMPORTS 5
#define BUFFERSIZE 3000
static cl_uchar uuid[CL_UUID_SIZE_KHR];
static cl_device_id deviceId = NULL;
namespace {
struct Params
{
uint32_t numBuffers;
uint32_t bufferSize;
uint32_t interBufferOffset;
};
}
const char *kernel_text_numbuffer_1 = " \
__kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a) { \n\
int gid = get_global_id(0); \n\
if (gid < bufferSize) { \n\
a[gid]++; \n\
} \n\
}";
const char *kernel_text_numbuffer_2 = " \
__kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a, __global unsigned char *b) { \n\
int gid = get_global_id(0); \n\
if (gid < bufferSize) { \n\
a[gid]++; \n\
b[gid]++;\n\
} \n\
}";
const char *kernel_text_numbuffer_4 = " \
__kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a, __global unsigned char *b, __global unsigned char *c, __global unsigned char *d) { \n\
int gid = get_global_id(0); \n\
if (gid < bufferSize) { \n\
a[gid]++;\n\
b[gid]++; \n\
c[gid]++; \n\
d[gid]++; \n\
} \n\
}";
const char *kernel_text_verify = " \
__kernel void checkKernel(__global unsigned char *ptr, int size, int expVal, __global unsigned char *err) \n\
{ \n\
int idx = get_global_id(0); \n\
if ((idx < size) && (*err == 0)) { \n\
if (ptr[idx] != expVal){ \n\
*err = 1; \n\
} \n\
} \n\
}";
int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
cl_command_queue &cmd_queue2, cl_kernel *kernel,
cl_kernel &verify_kernel, VulkanDevice &vkDevice,
uint32_t numBuffers, uint32_t bufferSize,
bool use_fence)
{
int err = CL_SUCCESS;
size_t global_work_size[1];
uint8_t *error_2;
cl_mem error_1;
cl_kernel update_buffer_kernel;
cl_kernel kernel_cq;
clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
const char *program_source_const = kernel_text_numbuffer_2;
size_t program_source_length = strlen(program_source_const);
cl_program program = clCreateProgramWithSource(
context, 1, &program_source_const, &program_source_length, &err);
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
print_error(err, "Error: Failed to build program \n");
return err;
}
// create the kernel
kernel_cq = clCreateKernel(program, "clUpdateBuffer", &err);
if (err != CL_SUCCESS)
{
print_error(err, "clCreateKernel failed \n");
return err;
}
const std::vector<VulkanExternalMemoryHandleType>
vkExternalMemoryHandleTypeList =
getSupportedVulkanExternalMemoryHandleTypeList();
VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
std::shared_ptr<VulkanFence> fence = nullptr;
VulkanQueue &vkQueue = vkDevice.getQueue();
std::vector<char> vkBufferShader = readFile("buffer.spv");
VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
vkDescriptorSetLayoutBindingList.addBinding(
0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
vkDescriptorSetLayoutBindingList.addBinding(
1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
vkBufferShaderModule);
VulkanDescriptorPool vkDescriptorPool(vkDevice,
vkDescriptorSetLayoutBindingList);
VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
vkDescriptorSetLayout);
if (use_fence)
{
fence = std::make_shared<VulkanFence>(vkDevice);
}
else
{
clVk2CLExternalSemaphore = new clExternalSemaphore(
vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
clCl2VkExternalSemaphore = new clExternalSemaphore(
vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
}
const uint32_t maxIter = innerIterations;
VulkanCommandPool vkCommandPool(vkDevice);
VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
VulkanDeviceMemory vkParamsDeviceMemory(
vkDevice, vkParamsBuffer.getSize(),
getVulkanMemoryType(vkDevice,
VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
std::vector<clExternalMemory *> externalMemory;
for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
emhtIdx++)
{
VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
vkExternalMemoryHandleTypeList[emhtIdx];
log_info("External memory handle type: %d\n",
vkExternalMemoryHandleType);
VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
vkExternalMemoryHandleType);
const VulkanMemoryTypeList &memoryTypeList =
vkDummyBuffer.getMemoryTypeList();
for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
{
const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
log_info("Memory type index: %d\n", (uint32_t)memoryType);
log_info("Memory type property: %d\n",
memoryType.getMemoryTypeProperty());
VulkanBufferList vkBufferList(numBuffers, vkDevice, bufferSize,
vkExternalMemoryHandleType);
for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
{
vkBufferListDeviceMemory.push_back(new VulkanDeviceMemory(
vkDevice, vkBufferList[bIdx], memoryType,
vkExternalMemoryHandleType));
externalMemory.push_back(new clExternalMemory(
vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType,
0, bufferSize, context, deviceId));
}
cl_mem buffers[MAX_BUFFERS];
clFinish(cmd_queue1);
Params *params = (Params *)vkParamsDeviceMemory.map();
params->numBuffers = numBuffers;
params->bufferSize = bufferSize;
params->interBufferOffset = 0;
vkParamsDeviceMemory.unmap();
vkDescriptorSet.update(0, vkParamsBuffer);
for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
{
size_t buffer_size = vkBufferList[bIdx].getSize();
vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx],
0);
buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer();
}
vkDescriptorSet.updateArray(1, numBuffers, vkBufferList);
vkCommandBuffer.begin();
vkCommandBuffer.bindPipeline(vkComputePipeline);
vkCommandBuffer.bindDescriptorSets(
vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
vkCommandBuffer.dispatch(512, 1, 1);
vkCommandBuffer.end();
if (vkBufferList.size() == 2)
{
update_buffer_kernel = kernel[0];
}
else if (vkBufferList.size() == 3)
{
update_buffer_kernel = kernel[1];
}
else if (vkBufferList.size() == 5)
{
update_buffer_kernel = kernel[2];
}
// global work size should be less than or equal to
// bufferSizeList[i]
global_work_size[0] = bufferSize;
for (uint32_t iter = 0; iter < maxIter; iter++)
{
if (use_fence)
{
fence->reset();
vkQueue.submit(vkCommandBuffer, fence);
fence->wait();
}
else
{
if (iter == 0)
{
vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
}
else
{
vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
vkVk2CLSemaphore);
}
clVk2CLExternalSemaphore->wait(cmd_queue1);
}
err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t),
(void *)&bufferSize);
err |= clSetKernelArg(kernel_cq, 0, sizeof(uint32_t),
(void *)&bufferSize);
err |= clSetKernelArg(kernel_cq, 1, sizeof(cl_mem),
(void *)&(buffers[0]));
for (int i = 0; i < vkBufferList.size() - 1; i++)
{
err |=
clSetKernelArg(update_buffer_kernel, i + 1,
sizeof(cl_mem), (void *)&(buffers[i]));
}
err |=
clSetKernelArg(kernel_cq, 2, sizeof(cl_mem),
(void *)&(buffers[vkBufferList.size() - 1]));
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to set arg values for kernel\n");
goto CLEANUP;
}
cl_event first_launch;
err = clEnqueueNDRangeKernel(cmd_queue1, update_buffer_kernel,
1, NULL, global_work_size, NULL, 0,
NULL, &first_launch);
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to launch update_buffer_kernel,"
"error\n");
goto CLEANUP;
}
err = clEnqueueNDRangeKernel(cmd_queue2, kernel_cq, 1, NULL,
global_work_size, NULL, 1,
&first_launch, NULL);
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to launch update_buffer_kernel,"
"error\n");
goto CLEANUP;
}
if (use_fence)
{
clFlush(cmd_queue1);
clFlush(cmd_queue2);
clFinish(cmd_queue1);
clFinish(cmd_queue2);
}
else if (!use_fence && iter != (maxIter - 1))
{
clCl2VkExternalSemaphore->signal(cmd_queue2);
}
}
error_2 = (uint8_t *)malloc(sizeof(uint8_t));
if (NULL == error_2)
{
log_error("Not able to allocate memory\n");
goto CLEANUP;
}
clFinish(cmd_queue2);
error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(uint8_t), NULL, &err);
if (CL_SUCCESS != err)
{
print_error(err, "Error: clCreateBuffer \n");
goto CLEANUP;
}
uint8_t val = 0;
err = clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
sizeof(uint8_t), &val, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
print_error(err, "Error: Failed read output, error\n");
goto CLEANUP;
}
int calc_max_iter;
for (int i = 0; i < vkBufferList.size(); i++)
{
if (i == 0)
calc_max_iter = (maxIter * 3);
else
calc_max_iter = (maxIter * 2);
err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
(void *)&(buffers[i]));
err |=
clSetKernelArg(verify_kernel, 1, sizeof(int), &bufferSize);
err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
&calc_max_iter);
err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
(void *)&error_1);
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to set arg values for "
"verify_kernel \n");
goto CLEANUP;
}
err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1, NULL,
global_work_size, NULL, 0, NULL,
NULL);
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to launch verify_kernel,"
"error \n");
goto CLEANUP;
}
err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
sizeof(uint8_t), error_2, 0, NULL,
NULL);
if (err != CL_SUCCESS)
{
print_error(err, "Error: Failed read output, error \n ");
goto CLEANUP;
}
if (*error_2 == 1)
{
log_error("&&&& vulkan_opencl_buffer test FAILED\n");
goto CLEANUP;
}
}
for (size_t i = 0; i < vkBufferList.size(); i++)
{
delete vkBufferListDeviceMemory[i];
delete externalMemory[i];
}
vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
vkBufferListDeviceMemory.begin()
+ numBuffers);
externalMemory.erase(externalMemory.begin(),
externalMemory.begin() + numBuffers);
}
}
CLEANUP:
for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
{
if (vkBufferListDeviceMemory[i])
{
delete vkBufferListDeviceMemory[i];
}
if (externalMemory[i])
{
delete externalMemory[i];
}
}
if (program) clReleaseProgram(program);
if (kernel_cq) clReleaseKernel(kernel_cq);
if (!use_fence)
{
if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
}
if (error_2) free(error_2);
if (error_1) clReleaseMemObject(error_1);
return err;
}
int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
cl_kernel *kernel, cl_kernel &verify_kernel,
VulkanDevice &vkDevice, uint32_t numBuffers,
uint32_t bufferSize, bool use_fence)
{
log_info("RUNNING TEST WITH ONE QUEUE...... \n\n");
size_t global_work_size[1];
uint8_t *error_2;
cl_mem error_1;
cl_kernel update_buffer_kernel;
clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
int err = CL_SUCCESS;
const std::vector<VulkanExternalMemoryHandleType>
vkExternalMemoryHandleTypeList =
getSupportedVulkanExternalMemoryHandleTypeList();
VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
std::shared_ptr<VulkanFence> fence = nullptr;
VulkanQueue &vkQueue = vkDevice.getQueue();
std::vector<char> vkBufferShader = readFile("buffer.spv");
VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
vkDescriptorSetLayoutBindingList.addBinding(
0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
vkDescriptorSetLayoutBindingList.addBinding(
1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
vkBufferShaderModule);
VulkanDescriptorPool vkDescriptorPool(vkDevice,
vkDescriptorSetLayoutBindingList);
VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
vkDescriptorSetLayout);
if (use_fence)
{
fence = std::make_shared<VulkanFence>(vkDevice);
}
else
{
clVk2CLExternalSemaphore = new clExternalSemaphore(
vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
clCl2VkExternalSemaphore = new clExternalSemaphore(
vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
}
const uint32_t maxIter = innerIterations;
VulkanCommandPool vkCommandPool(vkDevice);
VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
VulkanDeviceMemory vkParamsDeviceMemory(
vkDevice, vkParamsBuffer.getSize(),
getVulkanMemoryType(vkDevice,
VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
std::vector<clExternalMemory *> externalMemory;
for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
emhtIdx++)
{
VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
vkExternalMemoryHandleTypeList[emhtIdx];
log_info("External memory handle type: %d\n",
vkExternalMemoryHandleType);
VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
vkExternalMemoryHandleType);
const VulkanMemoryTypeList &memoryTypeList =
vkDummyBuffer.getMemoryTypeList();
for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
{
const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
log_info("Memory type index: %d\n", (uint32_t)memoryType);
log_info("Memory type property: %d\n",
memoryType.getMemoryTypeProperty());
VulkanBufferList vkBufferList(numBuffers, vkDevice, bufferSize,
vkExternalMemoryHandleType);
for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
{
vkBufferListDeviceMemory.push_back(new VulkanDeviceMemory(
vkDevice, vkBufferList[bIdx], memoryType,
vkExternalMemoryHandleType));
externalMemory.push_back(new clExternalMemory(
vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType,
0, bufferSize, context, deviceId));
}
cl_mem buffers[4];
clFinish(cmd_queue1);
Params *params = (Params *)vkParamsDeviceMemory.map();
params->numBuffers = numBuffers;
params->bufferSize = bufferSize;
params->interBufferOffset = 0;
vkParamsDeviceMemory.unmap();
vkDescriptorSet.update(0, vkParamsBuffer);
for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
{
size_t buffer_size = vkBufferList[bIdx].getSize();
vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx],
0);
buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer();
}
vkDescriptorSet.updateArray(1, vkBufferList.size(), vkBufferList);
vkCommandBuffer.begin();
vkCommandBuffer.bindPipeline(vkComputePipeline);
vkCommandBuffer.bindDescriptorSets(
vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
vkCommandBuffer.dispatch(512, 1, 1);
vkCommandBuffer.end();
if (vkBufferList.size() == 1)
{
update_buffer_kernel = kernel[0];
}
else if (vkBufferList.size() == 2)
{
update_buffer_kernel = kernel[1];
}
else if (vkBufferList.size() == 4)
{
update_buffer_kernel = kernel[2];
}
// global work size should be less than or equal to
// bufferSizeList[i]
global_work_size[0] = bufferSize;
for (uint32_t iter = 0; iter < maxIter; iter++)
{
if (use_fence)
{
fence->reset();
vkQueue.submit(vkCommandBuffer, fence);
fence->wait();
}
else
{
if (iter == 0)
{
vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
}
else
{
vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
vkVk2CLSemaphore);
}
clVk2CLExternalSemaphore->wait(cmd_queue1);
}
err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t),
(void *)&bufferSize);
for (int i = 0; i < vkBufferList.size(); i++)
{
err |=
clSetKernelArg(update_buffer_kernel, i + 1,
sizeof(cl_mem), (void *)&(buffers[i]));
}
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to set arg values for kernel\n");
goto CLEANUP;
}
err = clEnqueueNDRangeKernel(cmd_queue1, update_buffer_kernel,
1, NULL, global_work_size, NULL, 0,
NULL, NULL);
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to launch update_buffer_kernel,"
" error\n");
goto CLEANUP;
}
if (use_fence)
{
clFlush(cmd_queue1);
clFinish(cmd_queue1);
}
else if (!use_fence && (iter != (maxIter - 1)))
{
clCl2VkExternalSemaphore->signal(cmd_queue1);
}
}
error_2 = (uint8_t *)malloc(sizeof(uint8_t));
if (NULL == error_2)
{
log_error("Not able to allocate memory\n");
goto CLEANUP;
}
error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(uint8_t), NULL, &err);
if (CL_SUCCESS != err)
{
print_error(err, "Error: clCreateBuffer \n");
goto CLEANUP;
}
uint8_t val = 0;
err = clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
sizeof(uint8_t), &val, 0, NULL, NULL);
if (CL_SUCCESS != err)
{
print_error(err, "Error: clEnqueueWriteBuffer \n");
goto CLEANUP;
}
int calc_max_iter = (maxIter * 2);
for (int i = 0; i < vkBufferList.size(); i++)
{
err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
(void *)&(buffers[i]));
err |=
clSetKernelArg(verify_kernel, 1, sizeof(int), &bufferSize);
err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
&calc_max_iter);
err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
(void *)&error_1);
if (err != CL_SUCCESS)
{
print_error(
err,
"Error: Failed to set arg values for verify_kernel \n");
goto CLEANUP;
}
err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1, NULL,
global_work_size, NULL, 0, NULL,
NULL);
if (err != CL_SUCCESS)
{
print_error(
err, "Error: Failed to launch verify_kernel, error\n");
goto CLEANUP;
}
err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
sizeof(uint8_t), error_2, 0, NULL,
NULL);
if (err != CL_SUCCESS)
{
print_error(err, "Error: Failed read output, error \n");
goto CLEANUP;
}
if (*error_2 == 1)
{
log_error("&&&& vulkan_opencl_buffer test FAILED\n");
goto CLEANUP;
}
}
for (size_t i = 0; i < vkBufferList.size(); i++)
{
delete vkBufferListDeviceMemory[i];
delete externalMemory[i];
}
vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
vkBufferListDeviceMemory.begin()
+ numBuffers);
externalMemory.erase(externalMemory.begin(),
externalMemory.begin() + numBuffers);
}
}
CLEANUP:
for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
{
if (vkBufferListDeviceMemory[i])
{
delete vkBufferListDeviceMemory[i];
}
if (externalMemory[i])
{
delete externalMemory[i];
}
}
if (!use_fence)
{
if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
}
if (error_2) free(error_2);
if (error_1) clReleaseMemObject(error_1);
return err;
}
int run_test_with_multi_import_same_ctx(
cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel,
cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers,
uint32_t bufferSize, uint32_t bufferSizeForOffset, float use_fence)
{
size_t global_work_size[1];
uint8_t *error_2;
cl_mem error_1;
int numImports = numBuffers;
cl_kernel update_buffer_kernel[MAX_IMPORTS];
clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
int err = CL_SUCCESS;
int calc_max_iter;
bool withOffset;
uint32_t pBufferSize;
const std::vector<VulkanExternalMemoryHandleType>
vkExternalMemoryHandleTypeList =
getSupportedVulkanExternalMemoryHandleTypeList();
VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
std::shared_ptr<VulkanFence> fence = nullptr;
VulkanQueue &vkQueue = vkDevice.getQueue();
std::vector<char> vkBufferShader = readFile("buffer.spv");
VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
vkDescriptorSetLayoutBindingList.addBinding(
0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
vkDescriptorSetLayoutBindingList.addBinding(
1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
vkBufferShaderModule);
VulkanDescriptorPool vkDescriptorPool(vkDevice,
vkDescriptorSetLayoutBindingList);
VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
vkDescriptorSetLayout);
if (use_fence)
{
fence = std::make_shared<VulkanFence>(vkDevice);
}
else
{
clVk2CLExternalSemaphore = new clExternalSemaphore(
vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
clCl2VkExternalSemaphore = new clExternalSemaphore(
vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
}
const uint32_t maxIter = innerIterations;
VulkanCommandPool vkCommandPool(vkDevice);
VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
VulkanDeviceMemory vkParamsDeviceMemory(
vkDevice, vkParamsBuffer.getSize(),
getVulkanMemoryType(vkDevice,
VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
std::vector<std::vector<clExternalMemory *>> externalMemory;
for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
emhtIdx++)
{
VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
vkExternalMemoryHandleTypeList[emhtIdx];
log_info("External memory handle type: %d\n",
vkExternalMemoryHandleType);
VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
vkExternalMemoryHandleType);
const VulkanMemoryTypeList &memoryTypeList =
vkDummyBuffer.getMemoryTypeList();
for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
{
const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
log_info("Memory type index: %d\n", (uint32_t)memoryType);
log_info("Memory type property: %d\n",
memoryType.getMemoryTypeProperty());
for (unsigned int withOffset = 0;
withOffset <= (unsigned int)enableOffset; withOffset++)
{
log_info("Running withOffset case %d\n", (uint32_t)withOffset);
if (withOffset)
{
pBufferSize = bufferSizeForOffset;
}
else
{
pBufferSize = bufferSize;
}
cl_mem buffers[MAX_BUFFERS][MAX_IMPORTS];
VulkanBufferList vkBufferList(numBuffers, vkDevice, pBufferSize,
vkExternalMemoryHandleType);
uint32_t interBufferOffset =
(uint32_t)(vkBufferList[0].getSize());
for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
{
if (withOffset == 0)
{
vkBufferListDeviceMemory.push_back(
new VulkanDeviceMemory(vkDevice, vkBufferList[bIdx],
memoryType,
vkExternalMemoryHandleType));
}
if (withOffset == 1)
{
uint32_t totalSize =
(uint32_t)(vkBufferList.size() * interBufferOffset);
vkBufferListDeviceMemory.push_back(
new VulkanDeviceMemory(vkDevice, totalSize,
memoryType,
vkExternalMemoryHandleType));
}
std::vector<clExternalMemory *> pExternalMemory;
for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
{
pExternalMemory.push_back(new clExternalMemory(
vkBufferListDeviceMemory[bIdx],
vkExternalMemoryHandleType,
withOffset * bIdx * interBufferOffset, pBufferSize,
context, deviceId));
}
externalMemory.push_back(pExternalMemory);
}
clFinish(cmd_queue1);
Params *params = (Params *)vkParamsDeviceMemory.map();
params->numBuffers = numBuffers;
params->bufferSize = pBufferSize;
params->interBufferOffset = interBufferOffset * withOffset;
vkParamsDeviceMemory.unmap();
vkDescriptorSet.update(0, vkParamsBuffer);
for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
{
size_t buffer_size = vkBufferList[bIdx].getSize();
vkBufferListDeviceMemory[bIdx]->bindBuffer(
vkBufferList[bIdx],
bIdx * interBufferOffset * withOffset);
for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
{
buffers[bIdx][cl_bIdx] =
externalMemory[bIdx][cl_bIdx]
->getExternalMemoryBuffer();
}
}
vkDescriptorSet.updateArray(1, numBuffers, vkBufferList);
vkCommandBuffer.begin();
vkCommandBuffer.bindPipeline(vkComputePipeline);
vkCommandBuffer.bindDescriptorSets(
vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
vkCommandBuffer.dispatch(512, 1, 1);
vkCommandBuffer.end();
for (int i = 0; i < numImports; i++)
{
update_buffer_kernel[i] = (numBuffers == 1)
? kernel[0]
: ((numBuffers == 2) ? kernel[1] : kernel[2]);
}
// global work size should be less than or equal to
// bufferSizeList[i]
global_work_size[0] = pBufferSize;
for (uint32_t iter = 0; iter < maxIter; iter++)
{
if (use_fence)
{
fence->reset();
vkQueue.submit(vkCommandBuffer, fence);
fence->wait();
}
else
{
if (iter == 0)
{
vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
}
else
{
vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
vkVk2CLSemaphore);
}
}
if (use_fence)
{
fence->wait();
}
else
{
clVk2CLExternalSemaphore->wait(cmd_queue1);
}
for (uint8_t launchIter = 0; launchIter < numImports;
launchIter++)
{
err = clSetKernelArg(update_buffer_kernel[launchIter],
0, sizeof(uint32_t),
(void *)&pBufferSize);
for (int i = 0; i < numBuffers; i++)
{
err |= clSetKernelArg(
update_buffer_kernel[launchIter], i + 1,
sizeof(cl_mem),
(void *)&(buffers[i][launchIter]));
}
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to set arg values for "
"kernel\n ");
goto CLEANUP;
}
err = clEnqueueNDRangeKernel(
cmd_queue1, update_buffer_kernel[launchIter], 1,
NULL, global_work_size, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to launch "
"update_buffer_kernel, error\n ");
goto CLEANUP;
}
}
if (use_fence)
{
clFinish(cmd_queue1);
}
else if (!use_fence && iter != (maxIter - 1))
{
clCl2VkExternalSemaphore->signal(cmd_queue1);
}
}
error_2 = (uint8_t *)malloc(sizeof(uint8_t));
if (NULL == error_2)
{
log_error("Not able to allocate memory\n");
goto CLEANUP;
}
error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(uint8_t), NULL, &err);
if (CL_SUCCESS != err)
{
print_error(err, "Error: clCreateBuffer \n");
goto CLEANUP;
}
uint8_t val = 0;
err =
clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
sizeof(uint8_t), &val, 0, NULL, NULL);
if (CL_SUCCESS != err)
{
print_error(err, "Error: clEnqueueWriteBuffer \n");
goto CLEANUP;
}
calc_max_iter = maxIter * (numBuffers + 1);
for (int i = 0; i < vkBufferList.size(); i++)
{
err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
(void *)&(buffers[i][0]));
err |= clSetKernelArg(verify_kernel, 1, sizeof(int),
&pBufferSize);
err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
&calc_max_iter);
err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
(void *)&error_1);
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to set arg values for "
"verify_kernel \n");
goto CLEANUP;
}
err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1,
NULL, global_work_size, NULL,
0, NULL, NULL);
if (err != CL_SUCCESS)
{
print_error(
err,
"Error: Failed to launch verify_kernel, error\n");
goto CLEANUP;
}
err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
sizeof(uint8_t), error_2, 0, NULL,
NULL);
if (err != CL_SUCCESS)
{
print_error(err, "Error: Failed read output, error \n");
goto CLEANUP;
}
if (*error_2 == 1)
{
log_error("&&&& vulkan_opencl_buffer test FAILED\n");
goto CLEANUP;
}
}
for (size_t i = 0; i < vkBufferList.size(); i++)
{
for (size_t j = 0; j < numImports; j++)
{
delete externalMemory[i][j];
}
}
for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
{
delete vkBufferListDeviceMemory[i];
}
vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
vkBufferListDeviceMemory.end());
for (size_t i = 0; i < externalMemory.size(); i++)
{
externalMemory[i].erase(externalMemory[i].begin(),
externalMemory[i].begin()
+ numBuffers);
}
externalMemory.clear();
}
}
}
CLEANUP:
for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
{
if (vkBufferListDeviceMemory[i])
{
delete vkBufferListDeviceMemory[i];
}
}
for (size_t i = 0; i < externalMemory.size(); i++)
{
for (size_t j = 0; j < externalMemory[i].size(); j++)
{
if (externalMemory[i][j])
{
delete externalMemory[i][j];
}
}
}
if (!use_fence)
{
if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
}
if (error_2) free(error_2);
if (error_1) clReleaseMemObject(error_1);
return err;
}
int run_test_with_multi_import_diff_ctx(
cl_context &context, cl_context &context2, cl_command_queue &cmd_queue1,
cl_command_queue &cmd_queue2, cl_kernel *kernel1, cl_kernel *kernel2,
cl_kernel &verify_kernel, cl_kernel verify_kernel2, VulkanDevice &vkDevice,
uint32_t numBuffers, uint32_t bufferSize, uint32_t bufferSizeForOffset,
float use_fence)
{
size_t global_work_size[1];
uint8_t *error_3;
cl_mem error_1;
cl_mem error_2;
int numImports = numBuffers;
cl_kernel update_buffer_kernel1[MAX_IMPORTS];
cl_kernel update_buffer_kernel2[MAX_IMPORTS];
clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
clExternalSemaphore *clVk2CLExternalSemaphore2 = NULL;
clExternalSemaphore *clCl2VkExternalSemaphore2 = NULL;
int err = CL_SUCCESS;
int calc_max_iter;
bool withOffset;
uint32_t pBufferSize;
const std::vector<VulkanExternalMemoryHandleType>
vkExternalMemoryHandleTypeList =
getSupportedVulkanExternalMemoryHandleTypeList();
VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
std::shared_ptr<VulkanFence> fence = nullptr;
VulkanQueue &vkQueue = vkDevice.getQueue();
std::vector<char> vkBufferShader = readFile("buffer.spv");
VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
vkBufferShaderModule);
VulkanDescriptorPool vkDescriptorPool(vkDevice,
vkDescriptorSetLayoutBindingList);
VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
vkDescriptorSetLayout);
if (use_fence)
{
fence = std::make_shared<VulkanFence>(vkDevice);
}
else
{
clVk2CLExternalSemaphore = new clExternalSemaphore(
vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
clCl2VkExternalSemaphore = new clExternalSemaphore(
vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
clVk2CLExternalSemaphore2 =
new clExternalSemaphore(vkVk2CLSemaphore, context2,
vkExternalSemaphoreHandleType, deviceId);
clCl2VkExternalSemaphore2 =
new clExternalSemaphore(vkCl2VkSemaphore, context2,
vkExternalSemaphoreHandleType, deviceId);
}
const uint32_t maxIter = innerIterations;
VulkanCommandPool vkCommandPool(vkDevice);
VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
VulkanDeviceMemory vkParamsDeviceMemory(
vkDevice, vkParamsBuffer.getSize(),
getVulkanMemoryType(vkDevice,
VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
std::vector<std::vector<clExternalMemory *>> externalMemory1;
std::vector<std::vector<clExternalMemory *>> externalMemory2;
for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
emhtIdx++)
{
VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
vkExternalMemoryHandleTypeList[emhtIdx];
log_info("External memory handle type:%d\n",
vkExternalMemoryHandleType);
VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
vkExternalMemoryHandleType);
const VulkanMemoryTypeList &memoryTypeList =
vkDummyBuffer.getMemoryTypeList();
for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
{
const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
log_info("Memory type index: %d\n", (uint32_t)memoryType);
log_info("Memory type property: %d\n",
memoryType.getMemoryTypeProperty());
for (unsigned int withOffset = 0;
withOffset <= (unsigned int)enableOffset; withOffset++)
{
log_info("Running withOffset case %d\n", (uint32_t)withOffset);
cl_mem buffers1[MAX_BUFFERS][MAX_IMPORTS];
cl_mem buffers2[MAX_BUFFERS][MAX_IMPORTS];
if (withOffset)
{
pBufferSize = bufferSizeForOffset;
}
else
{
pBufferSize = bufferSize;
}
VulkanBufferList vkBufferList(numBuffers, vkDevice, pBufferSize,
vkExternalMemoryHandleType);
uint32_t interBufferOffset =
(uint32_t)(vkBufferList[0].getSize());
for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
{
if (withOffset == 0)
{
vkBufferListDeviceMemory.push_back(
new VulkanDeviceMemory(vkDevice, pBufferSize,
memoryType,
vkExternalMemoryHandleType));
}
if (withOffset == 1)
{
uint32_t totalSize =
(uint32_t)(vkBufferList.size() * interBufferOffset);
vkBufferListDeviceMemory.push_back(
new VulkanDeviceMemory(vkDevice, totalSize,
memoryType,
vkExternalMemoryHandleType));
}
std::vector<clExternalMemory *> pExternalMemory1;
std::vector<clExternalMemory *> pExternalMemory2;
for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
{
pExternalMemory1.push_back(new clExternalMemory(
vkBufferListDeviceMemory[bIdx],
vkExternalMemoryHandleType,
withOffset * bIdx * interBufferOffset, pBufferSize,
context, deviceId));
pExternalMemory2.push_back(new clExternalMemory(
vkBufferListDeviceMemory[bIdx],
vkExternalMemoryHandleType,
withOffset * bIdx * interBufferOffset, pBufferSize,
context2, deviceId));
}
externalMemory1.push_back(pExternalMemory1);
externalMemory2.push_back(pExternalMemory2);
}
clFinish(cmd_queue1);
Params *params = (Params *)vkParamsDeviceMemory.map();
params->numBuffers = numBuffers;
params->bufferSize = pBufferSize;
params->interBufferOffset = interBufferOffset * withOffset;
vkParamsDeviceMemory.unmap();
vkDescriptorSet.update(0, vkParamsBuffer);
for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
{
size_t buffer_size = vkBufferList[bIdx].getSize();
vkBufferListDeviceMemory[bIdx]->bindBuffer(
vkBufferList[bIdx],
bIdx * interBufferOffset * withOffset);
for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
{
buffers1[bIdx][cl_bIdx] =
externalMemory1[bIdx][cl_bIdx]
->getExternalMemoryBuffer();
buffers2[bIdx][cl_bIdx] =
externalMemory2[bIdx][cl_bIdx]
->getExternalMemoryBuffer();
}
vkDescriptorSet.update((uint32_t)bIdx + 1,
vkBufferList[bIdx]);
}
vkCommandBuffer.begin();
vkCommandBuffer.bindPipeline(vkComputePipeline);
vkCommandBuffer.bindDescriptorSets(
vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
vkCommandBuffer.dispatch(512, 1, 1);
vkCommandBuffer.end();
for (int i = 0; i < numImports; i++)
{
update_buffer_kernel1[i] = (numBuffers == 1)
? kernel1[0]
: ((numBuffers == 2) ? kernel1[1] : kernel1[2]);
update_buffer_kernel2[i] = (numBuffers == 1)
? kernel2[0]
: ((numBuffers == 2) ? kernel2[1] : kernel2[2]);
}
// global work size should be less than or equal
// to bufferSizeList[i]
global_work_size[0] = pBufferSize;
for (uint32_t iter = 0; iter < maxIter; iter++)
{
if (use_fence)
{
fence->reset();
vkQueue.submit(vkCommandBuffer, fence);
fence->wait();
}
else
{
if (iter == 0)
{
vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
}
else
{
vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
vkVk2CLSemaphore);
}
}
if (use_fence)
{
fence->wait();
}
else
{
clVk2CLExternalSemaphore->wait(cmd_queue1);
}
for (uint8_t launchIter = 0; launchIter < numImports;
launchIter++)
{
err = clSetKernelArg(update_buffer_kernel1[launchIter],
0, sizeof(uint32_t),
(void *)&pBufferSize);
for (int i = 0; i < numBuffers; i++)
{
err |= clSetKernelArg(
update_buffer_kernel1[launchIter], i + 1,
sizeof(cl_mem),
(void *)&(buffers1[i][launchIter]));
}
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to set arg values for "
"kernel\n ");
goto CLEANUP;
}
err = clEnqueueNDRangeKernel(
cmd_queue1, update_buffer_kernel1[launchIter], 1,
NULL, global_work_size, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to launch "
"update_buffer_kernel, error\n");
goto CLEANUP;
}
}
if (use_fence)
{
clFinish(cmd_queue1);
}
else if (!use_fence && iter != (maxIter - 1))
{
clCl2VkExternalSemaphore->signal(cmd_queue1);
}
}
clFinish(cmd_queue1);
for (uint32_t iter = 0; iter < maxIter; iter++)
{
if (use_fence)
{
fence->reset();
vkQueue.submit(vkCommandBuffer, fence);
fence->wait();
}
else
{
if (iter == 0)
{
vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
}
else
{
vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
vkVk2CLSemaphore);
}
}
if (use_fence)
{
fence->wait();
}
else
{
clVk2CLExternalSemaphore2->wait(cmd_queue2);
}
for (uint8_t launchIter = 0; launchIter < numImports;
launchIter++)
{
err = clSetKernelArg(update_buffer_kernel2[launchIter],
0, sizeof(uint32_t),
(void *)&bufferSize);
for (int i = 0; i < numBuffers; i++)
{
err |= clSetKernelArg(
update_buffer_kernel2[launchIter], i + 1,
sizeof(cl_mem),
(void *)&(buffers2[i][launchIter]));
}
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to set arg values for "
"kernel\n ");
goto CLEANUP;
}
err = clEnqueueNDRangeKernel(
cmd_queue2, update_buffer_kernel2[launchIter], 1,
NULL, global_work_size, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to launch "
"update_buffer_kernel, error\n ");
goto CLEANUP;
}
}
if (use_fence)
{
clFinish(cmd_queue2);
}
else if (!use_fence && iter != (maxIter - 1))
{
clCl2VkExternalSemaphore2->signal(cmd_queue2);
}
}
clFinish(cmd_queue2);
error_3 = (uint8_t *)malloc(sizeof(uint8_t));
if (NULL == error_3)
{
log_error("Not able to allocate memory\n");
goto CLEANUP;
}
error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(uint8_t), NULL, &err);
if (CL_SUCCESS != err)
{
print_error(err, "Error: clCreateBuffer \n");
goto CLEANUP;
}
error_2 = clCreateBuffer(context2, CL_MEM_WRITE_ONLY,
sizeof(uint8_t), NULL, &err);
if (CL_SUCCESS != err)
{
print_error(err, "Error: clCreateBuffer \n");
goto CLEANUP;
}
uint8_t val = 0;
err =
clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
sizeof(uint8_t), &val, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
print_error(err, "Error: Failed read output, error \n");
goto CLEANUP;
}
err =
clEnqueueWriteBuffer(cmd_queue2, error_2, CL_TRUE, 0,
sizeof(uint8_t), &val, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
print_error(err, "Error: Failed read output, error \n");
goto CLEANUP;
}
calc_max_iter = maxIter * 2 * (numBuffers + 1);
for (int i = 0; i < numBuffers; i++)
{
err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
(void *)&(buffers1[i][0]));
err |= clSetKernelArg(verify_kernel, 1, sizeof(int),
&pBufferSize);
err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
&calc_max_iter);
err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
(void *)&error_1);
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to set arg values for "
"verify_kernel \n");
goto CLEANUP;
}
err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1,
NULL, global_work_size, NULL,
0, NULL, NULL);
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to launch verify_kernel,"
"error\n");
goto CLEANUP;
}
err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
sizeof(uint8_t), error_3, 0, NULL,
NULL);
if (err != CL_SUCCESS)
{
print_error(err, "Error: Failed read output, error\n");
goto CLEANUP;
}
if (*error_3 == 1)
{
log_error("&&&& vulkan_opencl_buffer test FAILED\n");
goto CLEANUP;
}
}
*error_3 = 0;
for (int i = 0; i < vkBufferList.size(); i++)
{
err = clSetKernelArg(verify_kernel2, 0, sizeof(cl_mem),
(void *)&(buffers2[i][0]));
err |= clSetKernelArg(verify_kernel2, 1, sizeof(int),
&pBufferSize);
err |= clSetKernelArg(verify_kernel2, 2, sizeof(int),
&calc_max_iter);
err |= clSetKernelArg(verify_kernel2, 3, sizeof(cl_mem),
(void *)&error_2);
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to set arg values for "
"verify_kernel \n");
goto CLEANUP;
}
err = clEnqueueNDRangeKernel(cmd_queue2, verify_kernel2, 1,
NULL, global_work_size, NULL,
0, NULL, NULL);
if (err != CL_SUCCESS)
{
print_error(err,
"Error: Failed to launch verify_kernel,"
"error\n");
goto CLEANUP;
}
err = clEnqueueReadBuffer(cmd_queue2, error_2, CL_TRUE, 0,
sizeof(uint8_t), error_3, 0, NULL,
NULL);
if (err != CL_SUCCESS)
{
print_error(err, "Error: Failed read output, error\n");
goto CLEANUP;
}
if (*error_3 == 1)
{
log_error("&&&& vulkan_opencl_buffer test FAILED\n");
goto CLEANUP;
}
}
for (size_t i = 0; i < vkBufferList.size(); i++)
{
for (size_t j = 0; j < numImports; j++)
{
delete externalMemory1[i][j];
delete externalMemory2[i][j];
}
}
for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
{
delete vkBufferListDeviceMemory[i];
}
vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
vkBufferListDeviceMemory.end());
for (size_t i = 0; i < externalMemory1.size(); i++)
{
externalMemory1[i].erase(externalMemory1[i].begin(),
externalMemory1[i].begin()
+ numBuffers);
externalMemory2[i].erase(externalMemory2[i].begin(),
externalMemory2[i].begin()
+ numBuffers);
}
externalMemory1.clear();
externalMemory2.clear();
}
}
}
CLEANUP:
for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
{
if (vkBufferListDeviceMemory[i])
{
delete vkBufferListDeviceMemory[i];
}
}
for (size_t i = 0; i < externalMemory1.size(); i++)
{
for (size_t j = 0; j < externalMemory1[i].size(); j++)
{
if (externalMemory1[i][j])
{
delete externalMemory1[i][j];
}
}
}
for (size_t i = 0; i < externalMemory2.size(); i++)
{
for (size_t j = 0; j < externalMemory2[i].size(); j++)
{
if (externalMemory2[i][j])
{
delete externalMemory2[i][j];
}
}
}
if (!use_fence)
{
if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
if (clVk2CLExternalSemaphore2) delete clVk2CLExternalSemaphore2;
if (clCl2VkExternalSemaphore2) delete clCl2VkExternalSemaphore2;
}
if (error_3) free(error_3);
if (error_1) clReleaseMemObject(error_1);
if (error_2) clReleaseMemObject(error_2);
return err;
}
int test_buffer_common(cl_device_id device_, cl_context context_,
cl_command_queue queue_, int numElements_,
float use_fence)
{
int current_device = 0;
int device_count = 0;
int devices_prohibited = 0;
cl_int errNum = CL_SUCCESS;
cl_platform_id platform = NULL;
size_t extensionSize = 0;
cl_uint num_devices = 0;
cl_uint device_no = 0;
const size_t bufsize = BUFFERSIZE;
char buf[BUFFERSIZE];
cl_device_id *devices;
char *extensions = NULL;
cl_kernel verify_kernel;
cl_kernel verify_kernel2;
cl_kernel kernel[3] = { NULL, NULL, NULL };
cl_kernel kernel2[3] = { NULL, NULL, NULL };
const char *program_source_const[3] = { kernel_text_numbuffer_1,
kernel_text_numbuffer_2,
kernel_text_numbuffer_4 };
const char *program_source_const_verify;
size_t program_source_length;
cl_command_queue cmd_queue1 = NULL;
cl_command_queue cmd_queue2 = NULL;
cl_command_queue cmd_queue3 = NULL;
cl_context context = NULL;
cl_program program[3] = { NULL, NULL, NULL };
cl_program program_verify, program_verify2;
cl_context context2 = NULL;
VulkanDevice vkDevice;
uint32_t numBuffersList[] = { 1, 2, 4 };
uint32_t bufferSizeList[] = { 4 * 1024, 64 * 1024, 2 * 1024 * 1024 };
uint32_t bufferSizeListforOffset[] = { 256, 512, 1024 };
cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
errNum = clGetPlatformIDs(1, &platform, NULL);
if (errNum != CL_SUCCESS)
{
print_error(errNum, "Error: Failed to get platform\n");
goto CLEANUP;
}
errNum =
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
if (CL_SUCCESS != errNum)
{
print_error(errNum, "clGetDeviceIDs failed in returning of devices\n");
goto CLEANUP;
}
devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
if (NULL == devices)
{
errNum = CL_OUT_OF_HOST_MEMORY;
print_error(errNum, "Unable to allocate memory for devices\n");
goto CLEANUP;
}
errNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices,
NULL);
if (CL_SUCCESS != errNum)
{
print_error(errNum, "Failed to get deviceID.\n");
goto CLEANUP;
}
contextProperties[1] = (cl_context_properties)platform;
log_info("Assigned contextproperties for platform\n");
for (device_no = 0; device_no < num_devices; device_no++)
{
errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0,
NULL, &extensionSize);
if (CL_SUCCESS != errNum)
{
print_error(errNum,
"Error in clGetDeviceInfo for getting device_extension "
"size....\n");
goto CLEANUP;
}
extensions = (char *)malloc(extensionSize);
if (NULL == extensions)
{
print_error(errNum, "Unable to allocate memory for extensions\n");
errNum = CL_OUT_OF_HOST_MEMORY;
goto CLEANUP;
}
errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS,
extensionSize, extensions, NULL);
if (CL_SUCCESS != errNum)
{
print_error(errNum,
"Error in clGetDeviceInfo for device_extension\n");
goto CLEANUP;
}
errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR,
CL_UUID_SIZE_KHR, uuid, &extensionSize);
if (CL_SUCCESS != errNum)
{
print_error(errNum, "clGetDeviceInfo failed\n");
goto CLEANUP;
}
errNum =
memcmp(uuid, vkDevice.getPhysicalDevice().getUUID(), VK_UUID_SIZE);
if (errNum == 0)
{
break;
}
}
if (device_no >= num_devices)
{
errNum = EXIT_FAILURE;
print_error(errNum,
"OpenCL error: "
"No Vulkan-OpenCL Interop capable GPU found.\n");
goto CLEANUP;
}
deviceId = devices[device_no];
context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
NULL, NULL, &errNum);
if (CL_SUCCESS != errNum)
{
print_error(errNum, "error creating context\n");
goto CLEANUP;
}
log_info("Successfully created context !!!\n");
cmd_queue1 = clCreateCommandQueue(context, devices[device_no], 0, &errNum);
if (CL_SUCCESS != errNum)
{
errNum = CL_INVALID_COMMAND_QUEUE;
print_error(errNum, "Error: Failed to create command queue!\n");
goto CLEANUP;
}
cmd_queue2 = clCreateCommandQueue(context, devices[device_no], 0, &errNum);
if (CL_SUCCESS != errNum)
{
errNum = CL_INVALID_COMMAND_QUEUE;
print_error(errNum, "Error: Failed to create command queue!\n");
goto CLEANUP;
}
log_info("clCreateCommandQueue successful\n");
for (int i = 0; i < 3; i++)
{
program_source_length = strlen(program_source_const[i]);
program[i] =
clCreateProgramWithSource(context, 1, &program_source_const[i],
&program_source_length, &errNum);
errNum = clBuildProgram(program[i], 0, NULL, NULL, NULL, NULL);
if (errNum != CL_SUCCESS)
{
print_error(errNum, "Error: Failed to build program \n");
return errNum;
}
// create the kernel
kernel[i] = clCreateKernel(program[i], "clUpdateBuffer", &errNum);
if (errNum != CL_SUCCESS)
{
print_error(errNum, "clCreateKernel failed \n");
return errNum;
}
}
program_source_const_verify = kernel_text_verify;
program_source_length = strlen(program_source_const_verify);
program_verify =
clCreateProgramWithSource(context, 1, &program_source_const_verify,
&program_source_length, &errNum);
errNum = clBuildProgram(program_verify, 0, NULL, NULL, NULL, NULL);
if (errNum != CL_SUCCESS)
{
log_error("Error: Failed to build program2\n");
return errNum;
}
verify_kernel = clCreateKernel(program_verify, "checkKernel", &errNum);
if (errNum != CL_SUCCESS)
{
print_error(errNum, "clCreateKernel failed \n");
return errNum;
}
if (multiCtx) // different context guard
{
context2 = clCreateContextFromType(
contextProperties, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum);
if (CL_SUCCESS != errNum)
{
print_error(errNum, "error creating context\n");
goto CLEANUP;
}
cmd_queue3 =
clCreateCommandQueue(context2, devices[device_no], 0, &errNum);
if (CL_SUCCESS != errNum)
{
errNum = CL_INVALID_COMMAND_QUEUE;
print_error(errNum, "Error: Failed to create command queue!\n");
goto CLEANUP;
}
for (int i = 0; i < 3; i++)
{
program_source_length = strlen(program_source_const[i]);
program[i] =
clCreateProgramWithSource(context2, 1, &program_source_const[i],
&program_source_length, &errNum);
errNum = clBuildProgram(program[i], 0, NULL, NULL, NULL, NULL);
if (errNum != CL_SUCCESS)
{
print_error(errNum, "Error: Failed to build program \n");
return errNum;
}
// create the kernel
kernel2[i] = clCreateKernel(program[i], "clUpdateBuffer", &errNum);
if (errNum != CL_SUCCESS)
{
print_error(errNum, "clCreateKernel failed \n");
return errNum;
}
}
program_source_length = strlen(program_source_const_verify);
program_verify =
clCreateProgramWithSource(context2, 1, &program_source_const_verify,
&program_source_length, &errNum);
errNum = clBuildProgram(program_verify, 0, NULL, NULL, NULL, NULL);
if (errNum != CL_SUCCESS)
{
log_error("Error: Failed to build program2\n");
return errNum;
}
verify_kernel2 = clCreateKernel(program_verify, "checkKernel", &errNum);
if (errNum != CL_SUCCESS)
{
print_error(errNum, "clCreateKernel failed \n");
return errNum;
}
}
for (size_t numBuffersIdx = 0; numBuffersIdx < ARRAY_SIZE(numBuffersList);
numBuffersIdx++)
{
uint32_t numBuffers = numBuffersList[numBuffersIdx];
log_info("Number of buffers: %d\n", numBuffers);
for (size_t sizeIdx = 0; sizeIdx < ARRAY_SIZE(bufferSizeList);
sizeIdx++)
{
uint32_t bufferSize = bufferSizeList[sizeIdx];
uint32_t bufferSizeForOffset = bufferSizeListforOffset[sizeIdx];
log_info("&&&& RUNNING vulkan_opencl_buffer test for Buffer size: "
"%d\n",
bufferSize);
if (multiImport && !multiCtx)
{
errNum = run_test_with_multi_import_same_ctx(
context, cmd_queue1, kernel, verify_kernel, vkDevice,
numBuffers, bufferSize, bufferSizeForOffset, use_fence);
}
else if (multiImport && multiCtx)
{
errNum = run_test_with_multi_import_diff_ctx(
context, context2, cmd_queue1, cmd_queue3, kernel, kernel2,
verify_kernel, verify_kernel2, vkDevice, numBuffers,
bufferSize, bufferSizeForOffset, use_fence);
}
else if (numCQ == 2)
{
errNum = run_test_with_two_queue(
context, cmd_queue1, cmd_queue2, kernel, verify_kernel,
vkDevice, numBuffers + 1, bufferSize, use_fence);
}
else
{
errNum = run_test_with_one_queue(
context, cmd_queue1, kernel, verify_kernel, vkDevice,
numBuffers, bufferSize, use_fence);
}
if (errNum != CL_SUCCESS)
{
print_error(errNum, "func_name failed \n");
goto CLEANUP;
}
}
}
CLEANUP:
for (int i = 0; i < 3; i++)
{
if (program[i]) clReleaseProgram(program[i]);
if (kernel[i]) clReleaseKernel(kernel[i]);
}
if (cmd_queue1) clReleaseCommandQueue(cmd_queue1);
if (cmd_queue2) clReleaseCommandQueue(cmd_queue2);
if (cmd_queue3) clReleaseCommandQueue(cmd_queue3);
if (context) clReleaseContext(context);
if (context2) clReleaseContext(context2);
if (devices) free(devices);
if (extensions) free(extensions);
return errNum;
}