Add tests for external sharing not dependant on semaphores. (#1648)

* Add tests for external sharing not dependant on semaphores.

Additional external sharing tests that use fences instead of semaphores.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Fix clang-format

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Apply changes for review.

Apply changes for review:
- Make VkFence + clFinish a synchronization option to existing tests
instead of creating a separate test that uses fence.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Fix build break.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Fix resource release conditions.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Fix fence usage.

Fixed following fence issues:
- Add missing link to command buffer
- Add fence reset before wait

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Add Vulkan wrapper for fence.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Rework fence reset.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Change synchronisation mechanisms.

Changes made:
- wait for fence with clFinish
- queue submit with wait for fence

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Replace clFinish with vkWaitForFences.

Replaced clFinish with vkWaitForFences in Vulkan exectution context.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Replace remaining clFinish with vkWaitForFences.

Replaced remaining clFinish with vkWaitForFences in Vulkan exectution context.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Fix review comments for synchoronisation simplification.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Fix review comments for synchoronisation simplification for remaining tests.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Fix condition check.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

---------

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>
This commit is contained in:
Paweł Jastrzębski
2023-07-11 17:55:37 +02:00
committed by GitHub
parent 2686b9e2c1
commit 1ab4b26821
4 changed files with 338 additions and 77 deletions

View File

@@ -21,6 +21,7 @@
#include <assert.h>
#include <vector>
#include <iostream>
#include <memory>
#include <string.h>
#include "harness/errorHelpers.h"
@@ -82,7 +83,8 @@ __kernel void checkKernel(__global unsigned char *ptr, int size, int expVal, __g
int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
cl_command_queue &cmd_queue2, cl_kernel *kernel,
cl_kernel &verify_kernel, VulkanDevice &vkDevice,
uint32_t numBuffers, uint32_t bufferSize)
uint32_t numBuffers, uint32_t bufferSize,
bool use_fence)
{
int err = CL_SUCCESS;
size_t global_work_size[1];
@@ -117,6 +119,7 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
std::shared_ptr<VulkanFence> fence = nullptr;
VulkanQueue &vkQueue = vkDevice.getQueue();
@@ -136,10 +139,17 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
vkDescriptorSetLayout);
clVk2CLExternalSemaphore = new clExternalSemaphore(
vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
clCl2VkExternalSemaphore = new clExternalSemaphore(
vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
if (use_fence)
{
fence = std::make_shared<VulkanFence>(vkDevice);
}
else
{
clVk2CLExternalSemaphore = new clExternalSemaphore(
vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
clCl2VkExternalSemaphore = new clExternalSemaphore(
vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
}
const uint32_t maxIter = innerIterations;
VulkanCommandPool vkCommandPool(vkDevice);
@@ -227,16 +237,27 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
for (uint32_t iter = 0; iter < maxIter; iter++)
{
if (iter == 0)
if (use_fence)
{
vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
fence->reset();
vkQueue.submit(vkCommandBuffer, fence);
fence->wait();
}
else
{
vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
vkVk2CLSemaphore);
if (iter == 0)
{
vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
}
else
{
vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
vkVk2CLSemaphore);
}
clVk2CLExternalSemaphore->wait(cmd_queue1);
}
clVk2CLExternalSemaphore->wait(cmd_queue1);
err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t),
(void *)&bufferSize);
@@ -286,7 +307,14 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
goto CLEANUP;
}
if (iter != (maxIter - 1))
if (use_fence)
{
clFlush(cmd_queue1);
clFlush(cmd_queue2);
clFinish(cmd_queue1);
clFinish(cmd_queue2);
}
else if (!use_fence && iter != (maxIter - 1))
{
clCl2VkExternalSemaphore->signal(cmd_queue2);
}
@@ -387,8 +415,11 @@ CLEANUP:
}
if (program) clReleaseProgram(program);
if (kernel_cq) clReleaseKernel(kernel_cq);
if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
if (!use_fence)
{
if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
}
if (error_2) free(error_2);
if (error_1) clReleaseMemObject(error_1);
@@ -398,7 +429,7 @@ CLEANUP:
int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
cl_kernel *kernel, cl_kernel &verify_kernel,
VulkanDevice &vkDevice, uint32_t numBuffers,
uint32_t bufferSize)
uint32_t bufferSize, bool use_fence)
{
log_info("RUNNING TEST WITH ONE QUEUE...... \n\n");
size_t global_work_size[1];
@@ -416,6 +447,7 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
std::shared_ptr<VulkanFence> fence = nullptr;
VulkanQueue &vkQueue = vkDevice.getQueue();
@@ -434,10 +466,18 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
vkDescriptorSetLayout);
clVk2CLExternalSemaphore = new clExternalSemaphore(
vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
clCl2VkExternalSemaphore = new clExternalSemaphore(
vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
if (use_fence)
{
fence = std::make_shared<VulkanFence>(vkDevice);
}
else
{
clVk2CLExternalSemaphore = new clExternalSemaphore(
vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
clCl2VkExternalSemaphore = new clExternalSemaphore(
vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
}
const uint32_t maxIter = innerIterations;
VulkanCommandPool vkCommandPool(vkDevice);
VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
@@ -526,16 +566,26 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
for (uint32_t iter = 0; iter < maxIter; iter++)
{
if (iter == 0)
if (use_fence)
{
vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
fence->reset();
vkQueue.submit(vkCommandBuffer, fence);
fence->wait();
}
else
{
vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
vkVk2CLSemaphore);
if (iter == 0)
{
vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
}
else
{
vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
vkVk2CLSemaphore);
}
clVk2CLExternalSemaphore->wait(cmd_queue1);
}
clVk2CLExternalSemaphore->wait(cmd_queue1);
err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t),
(void *)&bufferSize);
@@ -562,7 +612,12 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
" error\n");
goto CLEANUP;
}
if (iter != (maxIter - 1))
if (use_fence)
{
clFlush(cmd_queue1);
clFinish(cmd_queue1);
}
else if (!use_fence && (iter != (maxIter - 1)))
{
clCl2VkExternalSemaphore->signal(cmd_queue1);
}
@@ -656,8 +711,13 @@ CLEANUP:
delete externalMemory[i];
}
}
if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
if (!use_fence)
{
if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
}
if (error_2) free(error_2);
if (error_1) clReleaseMemObject(error_1);
return err;
@@ -666,7 +726,7 @@ CLEANUP:
int run_test_with_multi_import_same_ctx(
cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel,
cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers,
uint32_t bufferSize, uint32_t bufferSizeForOffset)
uint32_t bufferSize, uint32_t bufferSizeForOffset, float use_fence)
{
size_t global_work_size[1];
uint8_t *error_2;
@@ -687,6 +747,7 @@ int run_test_with_multi_import_same_ctx(
getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
std::shared_ptr<VulkanFence> fence = nullptr;
VulkanQueue &vkQueue = vkDevice.getQueue();
@@ -706,10 +767,18 @@ int run_test_with_multi_import_same_ctx(
VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
vkDescriptorSetLayout);
clVk2CLExternalSemaphore = new clExternalSemaphore(
vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
clCl2VkExternalSemaphore = new clExternalSemaphore(
vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
if (use_fence)
{
fence = std::make_shared<VulkanFence>(vkDevice);
}
else
{
clVk2CLExternalSemaphore = new clExternalSemaphore(
vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
clCl2VkExternalSemaphore = new clExternalSemaphore(
vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
}
const uint32_t maxIter = innerIterations;
VulkanCommandPool vkCommandPool(vkDevice);
VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
@@ -832,16 +901,34 @@ int run_test_with_multi_import_same_ctx(
for (uint32_t iter = 0; iter < maxIter; iter++)
{
if (iter == 0)
if (use_fence)
{
vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
fence->reset();
vkQueue.submit(vkCommandBuffer, fence);
fence->wait();
}
else
{
vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
vkVk2CLSemaphore);
if (iter == 0)
{
vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
}
else
{
vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
vkVk2CLSemaphore);
}
}
clVk2CLExternalSemaphore->wait(cmd_queue1);
if (use_fence)
{
fence->wait();
}
else
{
clVk2CLExternalSemaphore->wait(cmd_queue1);
}
for (uint8_t launchIter = 0; launchIter < numImports;
launchIter++)
{
@@ -874,7 +961,11 @@ int run_test_with_multi_import_same_ctx(
goto CLEANUP;
}
}
if (iter != (maxIter - 1))
if (use_fence)
{
clFinish(cmd_queue1);
}
else if (!use_fence && iter != (maxIter - 1))
{
clCl2VkExternalSemaphore->signal(cmd_queue1);
}
@@ -987,8 +1078,13 @@ CLEANUP:
}
}
}
if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
if (!use_fence)
{
if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
}
if (error_2) free(error_2);
if (error_1) clReleaseMemObject(error_1);
return err;
@@ -998,7 +1094,8 @@ int run_test_with_multi_import_diff_ctx(
cl_context &context, cl_context &context2, cl_command_queue &cmd_queue1,
cl_command_queue &cmd_queue2, cl_kernel *kernel1, cl_kernel *kernel2,
cl_kernel &verify_kernel, cl_kernel verify_kernel2, VulkanDevice &vkDevice,
uint32_t numBuffers, uint32_t bufferSize, uint32_t bufferSizeForOffset)
uint32_t numBuffers, uint32_t bufferSize, uint32_t bufferSizeForOffset,
float use_fence)
{
size_t global_work_size[1];
uint8_t *error_3;
@@ -1023,6 +1120,7 @@ int run_test_with_multi_import_diff_ctx(
getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
std::shared_ptr<VulkanFence> fence = nullptr;
VulkanQueue &vkQueue = vkDevice.getQueue();
@@ -1042,15 +1140,24 @@ int run_test_with_multi_import_diff_ctx(
VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
vkDescriptorSetLayout);
clVk2CLExternalSemaphore = new clExternalSemaphore(
vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
clCl2VkExternalSemaphore = new clExternalSemaphore(
vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
if (use_fence)
{
fence = std::make_shared<VulkanFence>(vkDevice);
}
else
{
clVk2CLExternalSemaphore = new clExternalSemaphore(
vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
clCl2VkExternalSemaphore = new clExternalSemaphore(
vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
clVk2CLExternalSemaphore2 = new clExternalSemaphore(
vkVk2CLSemaphore, context2, vkExternalSemaphoreHandleType, deviceId);
clCl2VkExternalSemaphore2 = new clExternalSemaphore(
vkCl2VkSemaphore, context2, vkExternalSemaphoreHandleType, deviceId);
clVk2CLExternalSemaphore2 =
new clExternalSemaphore(vkVk2CLSemaphore, context2,
vkExternalSemaphoreHandleType, deviceId);
clCl2VkExternalSemaphore2 =
new clExternalSemaphore(vkCl2VkSemaphore, context2,
vkExternalSemaphoreHandleType, deviceId);
}
const uint32_t maxIter = innerIterations;
VulkanCommandPool vkCommandPool(vkDevice);
@@ -1192,16 +1299,33 @@ int run_test_with_multi_import_diff_ctx(
for (uint32_t iter = 0; iter < maxIter; iter++)
{
if (iter == 0)
if (use_fence)
{
vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
fence->reset();
vkQueue.submit(vkCommandBuffer, fence);
fence->wait();
}
else
{
vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
vkVk2CLSemaphore);
if (iter == 0)
{
vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
}
else
{
vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
vkVk2CLSemaphore);
}
}
if (use_fence)
{
fence->wait();
}
else
{
clVk2CLExternalSemaphore->wait(cmd_queue1);
}
clVk2CLExternalSemaphore->wait(cmd_queue1);
for (uint8_t launchIter = 0; launchIter < numImports;
launchIter++)
@@ -1235,7 +1359,11 @@ int run_test_with_multi_import_diff_ctx(
goto CLEANUP;
}
}
if (iter != (maxIter - 1))
if (use_fence)
{
clFinish(cmd_queue1);
}
else if (!use_fence && iter != (maxIter - 1))
{
clCl2VkExternalSemaphore->signal(cmd_queue1);
}
@@ -1243,16 +1371,33 @@ int run_test_with_multi_import_diff_ctx(
clFinish(cmd_queue1);
for (uint32_t iter = 0; iter < maxIter; iter++)
{
if (iter == 0)
if (use_fence)
{
vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
fence->reset();
vkQueue.submit(vkCommandBuffer, fence);
fence->wait();
}
else
{
vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
vkVk2CLSemaphore);
if (iter == 0)
{
vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
}
else
{
vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
vkVk2CLSemaphore);
}
}
if (use_fence)
{
fence->wait();
}
else
{
clVk2CLExternalSemaphore2->wait(cmd_queue2);
}
clVk2CLExternalSemaphore2->wait(cmd_queue2);
for (uint8_t launchIter = 0; launchIter < numImports;
launchIter++)
@@ -1286,7 +1431,11 @@ int run_test_with_multi_import_diff_ctx(
goto CLEANUP;
}
}
if (iter != (maxIter - 1))
if (use_fence)
{
clFinish(cmd_queue2);
}
else if (!use_fence && iter != (maxIter - 1))
{
clCl2VkExternalSemaphore2->signal(cmd_queue2);
}
@@ -1474,10 +1623,15 @@ CLEANUP:
}
}
}
if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
if (clVk2CLExternalSemaphore2) delete clVk2CLExternalSemaphore2;
if (clCl2VkExternalSemaphore2) delete clCl2VkExternalSemaphore2;
if (!use_fence)
{
if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
if (clVk2CLExternalSemaphore2) delete clVk2CLExternalSemaphore2;
if (clCl2VkExternalSemaphore2) delete clCl2VkExternalSemaphore2;
}
if (error_3) free(error_3);
if (error_1) clReleaseMemObject(error_1);
if (error_2) clReleaseMemObject(error_2);
@@ -1485,7 +1639,8 @@ CLEANUP:
}
int test_buffer_common(cl_device_id device_, cl_context context_,
cl_command_queue queue_, int numElements_)
cl_command_queue queue_, int numElements_,
float use_fence)
{
int current_device = 0;
@@ -1738,26 +1893,26 @@ int test_buffer_common(cl_device_id device_, cl_context context_,
{
errNum = run_test_with_multi_import_same_ctx(
context, cmd_queue1, kernel, verify_kernel, vkDevice,
numBuffers, bufferSize, bufferSizeForOffset);
numBuffers, bufferSize, bufferSizeForOffset, use_fence);
}
else if (multiImport && multiCtx)
{
errNum = run_test_with_multi_import_diff_ctx(
context, context2, cmd_queue1, cmd_queue3, kernel, kernel2,
verify_kernel, verify_kernel2, vkDevice, numBuffers,
bufferSize, bufferSizeForOffset);
bufferSize, bufferSizeForOffset, use_fence);
}
else if (numCQ == 2)
{
errNum = run_test_with_two_queue(
context, cmd_queue1, cmd_queue2, kernel, verify_kernel,
vkDevice, numBuffers + 1, bufferSize);
vkDevice, numBuffers + 1, bufferSize, use_fence);
}
else
{
errNum = run_test_with_one_queue(context, cmd_queue1, kernel,
verify_kernel, vkDevice,
numBuffers, bufferSize);
errNum = run_test_with_one_queue(
context, cmd_queue1, kernel, verify_kernel, vkDevice,
numBuffers, bufferSize, use_fence);
}
if (errNum != CL_SUCCESS)
{