Using Qualcomm DMA-BUF can achieve zero-copy memory conversion between CPU/GPU, and in general case, DMA-BUF is faster than traditional memory allocation, which saves time and improves program performance.
prerequisites
To use DMA-BUF, the following conditions should be met:
- NDK r23b+ SDK
- Include below header files
#include <linux/dma-buf.h>
#include <BufferAllocator/BufferAllocator.h>
#include <BufferAllocator/BufferAllocatorWrapper.h>
#include <cl_ext_qcom.h>
- Link your program to share library libdmabufheap.so which you can find in SM8450+ phone.
Allocate memory from DMA-BUF
#include <linux/dma-buf.h>
#include <BufferAllocator/BufferAllocator.h>
#include <BufferAllocator/BufferAllocatorWrapper.h>
BufferAllocator *buffer_allocator = nullptr;
/* Create Buffer Allocator */
buffer_allocator = CreateDmabufHeapBufferAllocator(); //from libdmabufheap.so
if(buffer_allocator == nullptr)
{
std::cerr << "Error with CreateDmabufHeapBufferAllocator()\n";
errcode = EXIT_FAILURE;
goto cleanup;
}
int fd = DmabufHeapAlloc(allocator, "qcom,system", size, 0, 0);
if(fd < 0)
{
std::cerr << "Error alocating dmabuf memory\n";
std::exit(EXIT_FAILURE);
}
void *host_addr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (MAP_FAILED == host_addr)
{
close(fd);
std::cerr << "Error " << errno << " mmapping fd to pointer: " << strerror(errno) << "\n";
std::exit(errno);
}
DMA-BUF for OpenCL
CLImage
#include "cl_ext_qcom.h"
cl_image_format img_format = {};
cl_image_desc img_desc = {};
cl_mem_dmabuf_host_ptr img_dmabuf = {};
cl_mem image = {};
img_dmabuf .ext_host_ptr.allocation_type = CL_MEM_DMABUF_HOST_PTR_QCOM;
img_dmabuf.ext_host_ptr.host_cache_policy = CL_MEM_HOST_IOCOHERENT_QCOM;
img_dmabuf.dmabuf_filedesc = fd;
img_dmabuf.dmabuf_hostptr = host_addr;
image = clCreateImage(
context,
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
&img_format,
&img_desc,
&img_dmabuf,
&errcode
);