builtin-programs/gpu/textures.folk
When the GPU Vulkan handle type definer is /defineVulkanHandleType/ &\
the GPU library is /gpuLib/ &\
the GPU VMA DLL is /vmaDll/ &\
the image library is /imageLib/ {
fn defineVulkanHandleType
set gpuc [C]
$gpuc cflags -I./vendor
$gpuc endcflags $vmaDll
$gpuc code {
#define VOLK_IMPLEMENTATION
#include "volk/volk.h"
#include "vk_mem_alloc.h"
void vmaInit(VkInstance instance, VkPhysicalDevice physicalDevice, VkDevice device,
PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr,
PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr,
PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties,
PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties);
VmaAllocator vmaGetAllocator();
}
$gpuc include <pthread.h>
$gpuc include <stdlib.h>
$gpuc include <string.h>
$gpuc include <stdatomic.h>
$gpuc extend $gpuLib
$gpuc extend $imageLib
$gpuc include <pthread.h>
$gpuc typedef {struct VmaAllocation_T*} VmaAllocation
$gpuc argtype VmaAllocation {
VmaAllocation $argname;
sscanf(Jim_String($obj), "(VmaAllocation) %p", &$argname);
}
$gpuc rtype VmaAllocation {
char buf[100];
snprintf(buf, 100, "(VmaAllocation) %p", $rvalue);
$robj = Jim_NewStringObj(interp, buf, -1);
}
local proc vktry {call} { string map {\n " "} [csubst {{
VkResult res = $call;
if (res != VK_SUCCESS) {
fprintf(stderr, "Failed $call: %s (%d)\n",
VkResultToString(res), res); exit(1);
}
}}] }
# Texture management:
# The technique used to manage textures here is to have a
# single giant descriptor set for a giant GPU-side array of
# textures, which all shaders can access. (That descriptor set
# _never_ has to be rebound; it stays bound through all draw
# calls, forever.)
#
# Each texture has to be 'copied to the GPU' before you do any
# draw calls that use it. Copying an texture to the GPU gives
# you a GPU-side texture handle, which is just an integer index
# into the GPU-side array. You can pass that texture handle
# into draw calls as a parameter (push constant) when you
# want to draw/use the texture.
#
# See:
# - http://kylehalladay.com/blog/tutorial/vulkan/2018/01/28/Textue-Arrays-Vulkan.html
# - https://chunkstories.xyz/blog/a-note-on-descriptor-indexing/
# - https://gist.github.com/DethRaid/0171f3cfcce51950ee4ef96c64f59617
# - http://roar11.com/2019/06/vulkan-textures-unbound/
$gpuc define {
VkDescriptorSetLayout textureDescriptorSetLayout;
VkDescriptorSet textureDescriptorSet;
}
defineVulkanHandleType $gpuc VkDescriptorSetLayout
defineVulkanHandleType $gpuc VkDescriptorSet
$gpuc code {
VkDevice device;
static void initPlaceholderTexture();
}
$gpuc typedef int GpuTextureHandle
defineVulkanHandleType $gpuc VkImage
defineVulkanHandleType $gpuc VkDeviceMemory
defineVulkanHandleType $gpuc VkImageView
defineVulkanHandleType $gpuc VkSampler
$gpuc struct GpuTextureBlock {
bool _Atomic alive;
bool _Atomic retiring;
int width;
int height;
int retireAfterFrame;
GpuTextureHandle handle;
VkImage textureImage;
VmaAllocation textureImageAllocation;
VkImageView textureImageView;
VkSampler textureSampler;
// mostly for debugging:
char* description;
}
$gpuc code {
// Array of GpuTextureBlocks. Each element points to all GPU-side
// data structures associated with a particular texture (that we
// will destroy when we evict that texture).
struct GpuTextureBlock* gpuTextures;
// Deferred descriptor-set updates and resource destruction,
// drained once per frame on the GPU thread when the GPU is idle.
enum DeferredTextureOp { DEFERRED_ADD, DEFERRED_FREE };
struct DeferredTextureEntry {
enum DeferredTextureOp op;
GpuTextureHandle handle;
};
#define DEFERRED_QUEUE_CAP 256
struct DeferredTextureEntry deferredQueue[DEFERRED_QUEUE_CAP];
int _Atomic deferredQueueCount = 0;
pthread_mutex_t deferredQueueMutex = PTHREAD_MUTEX_INITIALIZER;
#define TEXTURE_RETIRE_GRACE_FRAMES 2
int textureFrameEpoch = 0;
}
$gpuc proc textureManagerInit {} void {
$[vktry volkInitialize()]
volkLoadInstanceOnly(*instance_ptr());
device = *device_ptr();
volkLoadDevice(device);
gpuTextures = calloc(sizeof(GpuTextureBlock), getMaxTextures());
// Set up textureDescriptorSetLayout:
{
/* VkDescriptorBindingFlags flags[1]; */
/* flags[0] = VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT; */
/* flags[0] = VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT; */
/* VkDescriptorSetLayoutBindingFlagsCreateInfo bindingFlags = {0}; */
/* bindingFlags.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO; */
/* bindingFlags.bindingCount = 1; */
/* bindingFlags.pBindingFlags = flags; */
VkDescriptorSetLayoutBinding bindings[1];
memset(bindings, 0, sizeof(bindings));
bindings[0].binding = 0;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[0].descriptorCount = getMaxTextures();
bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
VkDescriptorSetLayoutCreateInfo createInfo = {0};
createInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
createInfo.bindingCount = 1;
createInfo.pBindings = bindings;
/* createInfo.pNext = &bindingFlags; */
vkCreateDescriptorSetLayout(device, &createInfo, NULL, textureDescriptorSetLayout_ptr());
}
VkDescriptorPool descriptorPool; {
VkDescriptorPoolSize poolSize = {0};
poolSize.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
poolSize.descriptorCount = 512;
VkDescriptorPoolCreateInfo poolInfo = {0};
poolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
poolInfo.poolSizeCount = 1;
poolInfo.pPoolSizes = &poolSize;
poolInfo.maxSets = 100;
$[vktry {vkCreateDescriptorPool(device, &poolInfo, NULL, &descriptorPool)}]
}
// Set up textureDescriptorSet:
{
VkDescriptorSetAllocateInfo allocInfo = {0};
allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
allocInfo.descriptorPool = descriptorPool;
allocInfo.descriptorSetCount = 1;
allocInfo.pSetLayouts = textureDescriptorSetLayout_ptr();
$[vktry {vkAllocateDescriptorSets(device, &allocInfo, textureDescriptorSet_ptr())}]
}
// Initialize VMA allocator
vmaInit(*instance_ptr(), *physicalDevice_ptr(), device,
vkGetInstanceProcAddr,
vkGetDeviceProcAddr,
vkGetPhysicalDeviceProperties,
vkGetPhysicalDeviceMemoryProperties);
initPlaceholderTexture();
}
# Buffer allocation:
$gpuc code [csubst {
uint32_t findMemoryType(uint32_t typeFilter, VkMemoryPropertyFlags properties) {
VkPhysicalDeviceMemoryProperties memProperties;
vkGetPhysicalDeviceMemoryProperties(*physicalDevice_ptr(), &memProperties);
for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) {
if ((typeFilter & (1 << i)) && (memProperties.memoryTypes[i].propertyFlags & properties) == properties) {
return i;
}
}
exit(1);
}
}]
$gpuc typedef size_t VkDeviceSize false
$gpuc typedef uint32_t VkBufferUsageFlags false
$gpuc typedef uint32_t VkMemoryPropertyFlags false
$gpuc proc createBuffer {VkDeviceSize size VkBufferUsageFlags usage VkMemoryPropertyFlags properties
VkBuffer* buffer VmaAllocation* allocation} void {
VkBufferCreateInfo bufferInfo = {0};
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.size = size;
bufferInfo.usage = usage;
bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
VmaAllocationCreateInfo allocInfo = {0};
allocInfo.usage = VMA_MEMORY_USAGE_AUTO;
if (properties & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
allocInfo.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
}
if (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT;
}
VkResult res = vmaCreateBuffer(vmaGetAllocator(), &bufferInfo, &allocInfo, buffer, allocation, NULL);
if (res != VK_SUCCESS) {
fprintf(stderr, "Failed to create buffer with VMA: %d\\n", res);
exit(1);
}
#ifdef TRACY_ENABLE
VmaAllocationInfo vmaInfo;
vmaGetAllocationInfo(vmaGetAllocator(), *allocation, &vmaInfo);
TracyCAlloc(*allocation, vmaInfo.size);
#endif
}
# Texture allocation:
$gpuc code [csubst {
void createImage(uint32_t width, uint32_t height,
VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage, VkMemoryPropertyFlags properties,
VkImage* image, VmaAllocation* allocation) {
VkImageCreateInfo imageInfo = {0};
imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
imageInfo.imageType = VK_IMAGE_TYPE_2D;
imageInfo.extent.width = width;
imageInfo.extent.height = height;
imageInfo.extent.depth = 1;
imageInfo.mipLevels = 1;
imageInfo.arrayLayers = 1;
imageInfo.format = format;
imageInfo.tiling = tiling;
// TODO: this means it can't be drawn right away (validation error).
imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imageInfo.usage = usage;
imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
VmaAllocationCreateInfo allocInfo = {0};
allocInfo.usage = VMA_MEMORY_USAGE_AUTO;
if (properties & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
allocInfo.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
}
if (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
}
VkResult res = vmaCreateImage(vmaGetAllocator(), &imageInfo, &allocInfo, image, allocation, NULL);
if (res != VK_SUCCESS) {
fprintf(stderr, "Failed to create image with VMA: %d\\n", res);
exit(1);
}
#ifdef TRACY_ENABLE
VmaAllocationInfo vmaInfo;
vmaGetAllocationInfo(vmaGetAllocator(), *allocation, &vmaInfo);
TracyCAlloc(*allocation, vmaInfo.size);
#endif
}
}]
defineVulkanHandleType $gpuc VkCommandBuffer
defineVulkanHandleType $gpuc VkFence
$gpuc proc beginSingleTimeCommands {} VkCommandBuffer {
VkCommandBuffer commandBuffer = getCommandBuffer();
VkCommandBufferBeginInfo beginInfo = {0};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(commandBuffer, &beginInfo);
return commandBuffer;
}
$gpuc proc endSingleTimeCommands {VkCommandBuffer commandBuffer VkFence fence} void {
vkEndCommandBuffer(commandBuffer);
VkSubmitInfo submitInfo = {0};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &commandBuffer;
pthread_mutex_lock(graphicsQueueMutex_ptr());
vkQueueSubmit(*graphicsQueue_ptr(), 1, &submitInfo, fence);
pthread_mutex_unlock(graphicsQueueMutex_ptr());
}
$gpuc code {
static __thread VkFence _fence = VK_NULL_HANDLE;
}
$gpuc proc getFence {} VkFence {
if (_fence == VK_NULL_HANDLE) {
VkFenceCreateInfo fenceInfo = {0};
fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
$[vktry {vkCreateFence(device, &fenceInfo, NULL, &_fence)}]
} else {
vkResetFences(device, 1, &_fence);
}
return _fence;
}
$gpuc typedef int VkFormat false
$gpuc typedef int VkImageLayout false
$gpuc proc transitionImageLayout {VkImage image VkFormat format
VkImageLayout oldLayout VkImageLayout newLayout} void {
VkFence fence = getFence();
VkCommandBuffer commandBuffer = beginSingleTimeCommands();
VkImageMemoryBarrier barrier = {0};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.oldLayout = oldLayout;
barrier.newLayout = newLayout;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = image;
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
barrier.subresourceRange.baseMipLevel = 0;
barrier.subresourceRange.levelCount = 1;
barrier.subresourceRange.baseArrayLayer = 0;
barrier.subresourceRange.layerCount = 1;
VkPipelineStageFlags sourceStage;
VkPipelineStageFlags destinationStage;
if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
} else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
} else {
exit(91);
}
vkCmdPipelineBarrier(commandBuffer,
sourceStage, destinationStage,
0,
0, NULL,
0, NULL,
1, &barrier);
endSingleTimeCommands(commandBuffer, fence);
// HACK: this wait is so that the commandBuffer is usable afterward.
vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX);
}
# Copy 1-channel, 3-channel, or 4-channel im to 4-channel ret.
$gpuc proc copyImageToRgba {Image im Image ret} void {
FOLK_ENSURE(im.width == ret.width && im.height == ret.height);
if (im.components == 4) {
if (ret.bytesPerRow == im.bytesPerRow) {
// Optimized bulk copy when row stride matches
memcpy(ret.data, im.data, ret.bytesPerRow * ret.height);
} else {
// Row-by-row copy when stride differs
for (int y = 0; y < im.height; y++) {
memcpy(ret.data + y*ret.bytesPerRow,
im.data + y*im.bytesPerRow,
im.width*4);
}
}
return;
}
FOLK_ENSURE(im.components == 1 || im.components == 3);
if (im.components == 3) {
for (int y = 0; y < im.height; y++) {
for (int x = 0; x < im.width; x++) {
int imidx = y*im.bytesPerRow + x*im.components;
int r = im.data[imidx+0],
g = im.data[imidx+1],
b = im.data[imidx+2];
int ridx = y*ret.bytesPerRow + x*ret.components;
ret.data[ridx+0] = r;
ret.data[ridx+1] = g;
ret.data[ridx+2] = b;
ret.data[ridx+3] = 255;
}
}
} else {
for (int y = 0; y < im.height; y++) {
for (int x = 0; x < im.width; x++) {
int imidx = y*im.bytesPerRow + x*im.components;
int r = im.data[imidx],
g = im.data[imidx],
b = im.data[imidx];
int ridx = y*ret.bytesPerRow + x*ret.components;
ret.data[ridx+0] = r;
ret.data[ridx+1] = g;
ret.data[ridx+2] = b;
ret.data[ridx+3] = 255;
}
}
}
}
$gpuc code [csubst {
GpuTextureHandle allocateGpuTextureHandle() {
for (;;) {
for (int i = 0; i < getMaxTextures(); i++) {
bool notAlive = false;
if (atomic_compare_exchange_weak(&gpuTextures[i].alive, ¬Alive, true)) {
gpuTextures[i].handle = i;
return i;
}
}
fprintf(stderr, "gpu/textures: Exceeded GPU max textures (%d):\n", getMaxTextures());
for (int i = 0; i < getMaxTextures(); i++) {
fprintf(stderr, " %d: %s\n", i, gpuTextures[i].alive ? gpuTextures[i].description : "<not alive>");
}
struct timespec ts = {0, 5000000};
nanosleep(&ts, NULL);
}
}
}]
$gpuc code {
static void enqueueDeferredTextureOp(enum DeferredTextureOp op, GpuTextureHandle handle) {
pthread_mutex_lock(&deferredQueueMutex);
if (deferredQueueCount >= DEFERRED_QUEUE_CAP) {
fprintf(stderr, "gpu/textures: Deferred queue full (%d)\n", DEFERRED_QUEUE_CAP);
exit(1);
}
deferredQueue[deferredQueueCount++] = (struct DeferredTextureEntry){op, handle};
pthread_mutex_unlock(&deferredQueueMutex);
}
// Write a single texture slot into the descriptor set. Must only
// be called on the GPU thread between frames (or during init).
static void writeTextureDescriptor(GpuTextureHandle textureId) {
VkDescriptorImageInfo imageInfo = {0};
imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
imageInfo.imageView = gpuTextures[textureId].textureImageView;
imageInfo.sampler = gpuTextures[textureId].textureSampler;
VkWriteDescriptorSet descriptorWrite = {0};
descriptorWrite.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptorWrite.dstSet = *textureDescriptorSet_ptr();
descriptorWrite.dstBinding = 0;
descriptorWrite.dstArrayElement = textureId;
descriptorWrite.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
descriptorWrite.descriptorCount = 1;
descriptorWrite.pImageInfo = &imageInfo;
vkUpdateDescriptorSets(device, 1, &descriptorWrite, 0, NULL);
}
}
$gpuc proc initializeDescriptorSet {GpuTextureHandle firstTextureId} void {
// Hack: if we're not using the descriptor indexing extension,
// we can't have a partially bound descriptor set, so we need
// to fill all the slots in the texture array with _something_.
// We just fill all slots with the first texture for now. See
// http://roar11.com/2019/06/vulkan-textures-unbound/
VkDescriptorImageInfo imageInfo = {0};
imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
imageInfo.imageView = gpuTextures[firstTextureId].textureImageView;
imageInfo.sampler = gpuTextures[firstTextureId].textureSampler;
VkWriteDescriptorSet descriptorWrites[getMaxTextures()];
for (int i = 0; i < getMaxTextures(); i++) {
memset(&descriptorWrites[i], 0, sizeof(VkWriteDescriptorSet));
descriptorWrites[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptorWrites[i].dstSet = *textureDescriptorSet_ptr();
descriptorWrites[i].dstBinding = 0;
descriptorWrites[i].dstArrayElement = i;
descriptorWrites[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
descriptorWrites[i].descriptorCount = 1;
descriptorWrites[i].pImageInfo = &imageInfo;
}
vkUpdateDescriptorSets(device, getMaxTextures(), descriptorWrites, 0, NULL);
}
$gpuc proc addToTextureDescriptorSet {GpuTextureHandle textureId} void {
enqueueDeferredTextureOp(DEFERRED_ADD, textureId);
}
$gpuc proc getGpuTexture {GpuTextureHandle handle} GpuTextureBlock* {
return &gpuTextures[handle];
}
# NOTE: The caller must call addToTextureDescriptorSet at some point
# after calling this to actually use the texture.
$gpuc proc createGpuTexture {int width int height int format} GpuTextureBlock* {
GpuTextureHandle textureId = allocateGpuTextureHandle();
GpuTextureBlock* block = &gpuTextures[textureId];
block->width = width;
block->height = height;
block->retiring = false;
block->retireAfterFrame = 0;
createImage(width, height,
(VkFormat) format, VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
&block->textureImage, &block->textureImageAllocation);
// Set up block->textureImageView:
{
VkImageViewCreateInfo viewInfo = {0};
viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
viewInfo.image = block->textureImage;
viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
viewInfo.format = format;
viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
viewInfo.subresourceRange.baseMipLevel = 0;
viewInfo.subresourceRange.levelCount = 1;
viewInfo.subresourceRange.baseArrayLayer = 0;
viewInfo.subresourceRange.layerCount = 1;
$[vktry {vkCreateImageView(device, &viewInfo, NULL, &block->textureImageView)}]
}
// Set up block->textureSampler:
{
VkSamplerCreateInfo samplerInfo = {0};
samplerInfo.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
samplerInfo.magFilter = VK_FILTER_LINEAR;
samplerInfo.minFilter = VK_FILTER_LINEAR;
samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT;
samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT;
samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT;
samplerInfo.anisotropyEnable = VK_FALSE; // TODO: do we want this?
samplerInfo.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK;
samplerInfo.unnormalizedCoordinates = VK_FALSE;
samplerInfo.compareEnable = VK_FALSE;
samplerInfo.compareOp = VK_COMPARE_OP_ALWAYS;
samplerInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
samplerInfo.mipLodBias = 0.0f;
samplerInfo.minLod = 0.0f;
samplerInfo.maxLod = 0.0f;
$[vktry {vkCreateSampler(device, &samplerInfo, NULL, &block->textureSampler)}]
}
block->description = malloc(32);
snprintf(block->description, 32, "%dx%d texture", width, height);
return block;
}
# Per-worker reusable texture upload slots. Staging buffers stay alive
# until their slot is reused and the previous upload fence has signaled.
$gpuc code {
#define INFLIGHT_UPLOADS 8
struct InflightUpload {
VkCommandBuffer cmdBuffer;
VkFence fence;
VkBuffer stagingBuffer;
VmaAllocation stagingBufferAllocation;
bool inUse;
};
static __thread struct InflightUpload _inflightUploads[INFLIGHT_UPLOADS];
static __thread int _inflightUploadsNext = 0;
// Reclaim a slot: if it holds an outstanding upload, wait for its
// fence, destroy its staging buffer, reset fence. Returns the slot
// with fence + cmdBuffer allocated and ready to use.
static struct InflightUpload* acquireInflightUpload() {
struct InflightUpload* slot = &_inflightUploads[_inflightUploadsNext];
_inflightUploadsNext = (_inflightUploadsNext + 1) % INFLIGHT_UPLOADS;
if (slot->inUse) {
#ifdef TRACY_ENABLE
TracyCZoneN(ctx, "vkWaitForFences (ring full)", 1);
#endif
vkWaitForFences(device, 1, &slot->fence, VK_TRUE, UINT64_MAX);
#ifdef TRACY_ENABLE
TracyCZoneEnd(ctx);
TracyCFree(slot->stagingBufferAllocation);
#endif
vmaDestroyBuffer(vmaGetAllocator(),
slot->stagingBuffer, slot->stagingBufferAllocation);
slot->stagingBuffer = VK_NULL_HANDLE;
slot->stagingBufferAllocation = NULL;
slot->inUse = false;
}
if (slot->fence == VK_NULL_HANDLE) {
VkFenceCreateInfo fenceInfo = {0};
fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
vkCreateFence(device, &fenceInfo, NULL, &slot->fence);
} else {
vkResetFences(device, 1, &slot->fence);
}
if (slot->cmdBuffer == VK_NULL_HANDLE) {
VkCommandBufferAllocateInfo allocInfo = {0};
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocInfo.commandPool = getCommandPool();
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
allocInfo.commandBufferCount = 1;
vkAllocateCommandBuffers(device, &allocInfo, &slot->cmdBuffer);
} else {
vkResetCommandBuffer(slot->cmdBuffer, 0);
}
return slot;
}
}
$gpuc proc copyImageToGpuTexture {Image im} GpuTextureHandle {
struct InflightUpload* upload = acquireInflightUpload();
size_t size = im.width * im.height * 4;
FOLK_ENSURE(size > 0);
createBuffer(size, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&upload->stagingBuffer, &upload->stagingBufferAllocation);
// Copy im to stagingBuffer:
{
void* data; vmaMapMemory(vmaGetAllocator(), upload->stagingBufferAllocation, &data);
Image stagingIm = (Image) {
.width = im.width, .height = im.height,
.components = 4,
.bytesPerRow = im.width * 4,
.data = data
};
copyImageToRgba(im, stagingIm);
vmaUnmapMemory(vmaGetAllocator(), upload->stagingBufferAllocation);
}
// Allocate a texture and texture block:
GpuTextureBlock* block = createGpuTexture(im.width, im.height, VK_FORMAT_R8G8B8A8_SRGB);
// Record + submit staging buffer -> image copy. We do NOT wait on
// the fence here; a later call to acquireInflightUpload will
// reclaim this slot's staging buffer once the GPU is done.
{
VkCommandBuffer commandBuffer = upload->cmdBuffer;
VkCommandBufferBeginInfo beginInfo = {0};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(commandBuffer, &beginInfo);
// Transition to transfer destination
VkImageMemoryBarrier barrier = {0};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = block->textureImage;
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
barrier.subresourceRange.baseMipLevel = 0;
barrier.subresourceRange.levelCount = 1;
barrier.subresourceRange.baseArrayLayer = 0;
barrier.subresourceRange.layerCount = 1;
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
vkCmdPipelineBarrier(commandBuffer,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, 0, NULL, 0, NULL, 1, &barrier);
// Copy buffer to image
VkBufferImageCopy region = {0};
region.bufferOffset = 0;
region.bufferRowLength = 0;
region.bufferImageHeight = 0;
region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
region.imageSubresource.mipLevel = 0;
region.imageSubresource.baseArrayLayer = 0;
region.imageSubresource.layerCount = 1;
region.imageOffset = (VkOffset3D) {0, 0, 0};
region.imageExtent = (VkExtent3D) {im.width, im.height, 1};
vkCmdCopyBufferToImage(commandBuffer,
upload->stagingBuffer,
block->textureImage,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1,
®ion);
// Transition to shader read-only
barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
vkCmdPipelineBarrier(commandBuffer,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
0, 0, NULL, 0, NULL, 1, &barrier);
vkEndCommandBuffer(commandBuffer);
VkSubmitInfo submitInfo = {0};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &commandBuffer;
pthread_mutex_lock(graphicsQueueMutex_ptr());
vkQueueSubmit(*graphicsQueue_ptr(), 1, &submitInfo, upload->fence);
pthread_mutex_unlock(graphicsQueueMutex_ptr());
}
upload->inUse = true;
addToTextureDescriptorSet(block->handle);
return block->handle;
}
# Replace a descriptor slot with a different texture. Must only be
# called on the GPU thread between frames (used by canvases).
$gpuc proc replaceInTextureDescriptorSet {GpuTextureHandle oldHandle GpuTextureHandle newHandle} void {
VkDescriptorImageInfo imageInfo = {0};
imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
imageInfo.imageView = gpuTextures[newHandle].textureImageView;
imageInfo.sampler = gpuTextures[newHandle].textureSampler;
VkWriteDescriptorSet descriptorWrite = {0};
descriptorWrite.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptorWrite.dstSet = *textureDescriptorSet_ptr();
descriptorWrite.dstBinding = 0;
descriptorWrite.dstArrayElement = oldHandle;
descriptorWrite.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
descriptorWrite.descriptorCount = 1;
descriptorWrite.pImageInfo = &imageInfo;
vkUpdateDescriptorSets(device, 1, &descriptorWrite, 0, NULL);
}
$gpuc proc freeGpuTexture {GpuTextureHandle gim} void {
enqueueDeferredTextureOp(DEFERRED_FREE, gim);
}
# Actually destroy a texture's GPU resources. Must only be called
# on the GPU thread between frames when the GPU is idle.
$gpuc code {
static void retireGpuTexture(GpuTextureHandle gim) {
GpuTextureBlock* block = &gpuTextures[gim];
if (gim == 0 || !block->alive || block->retiring) return;
block->retiring = true;
block->retireAfterFrame = textureFrameEpoch + TEXTURE_RETIRE_GRACE_FRAMES;
}
static void destroyGpuTextureResources(GpuTextureHandle gim) {
GpuTextureBlock* block = &gpuTextures[gim];
if (gim == 0 || !block->alive) return;
// Point this descriptor slot at the placeholder texture (slot 0)
// so later frames don't reference a destroyed image.
{
VkDescriptorImageInfo imageInfo = {0};
imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
imageInfo.imageView = gpuTextures[0].textureImageView;
imageInfo.sampler = gpuTextures[0].textureSampler;
VkWriteDescriptorSet descriptorWrite = {0};
descriptorWrite.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptorWrite.dstSet = *textureDescriptorSet_ptr();
descriptorWrite.dstBinding = 0;
descriptorWrite.dstArrayElement = gim;
descriptorWrite.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
descriptorWrite.descriptorCount = 1;
descriptorWrite.pImageInfo = &imageInfo;
vkUpdateDescriptorSets(device, 1, &descriptorWrite, 0, NULL);
}
#ifdef TRACY_ENABLE
TracyCFree(block->textureImageAllocation);
#endif
vkDestroySampler(device, block->textureSampler, NULL);
vkDestroyImageView(device, block->textureImageView, NULL);
vmaDestroyImage(vmaGetAllocator(), block->textureImage, block->textureImageAllocation);
free(block->description);
block->description = NULL;
block->textureImage = VK_NULL_HANDLE;
block->textureImageAllocation = NULL;
block->textureImageView = VK_NULL_HANDLE;
block->textureSampler = VK_NULL_HANDLE;
block->retiring = false;
block->retireAfterFrame = 0;
block->alive = false;
}
static void destroyRetiredGpuTextures() {
for (GpuTextureHandle gim = 1; gim < getMaxTextures(); gim++) {
GpuTextureBlock* block = &gpuTextures[gim];
if (block->alive && block->retiring &&
block->retireAfterFrame <= textureFrameEpoch) {
destroyGpuTextureResources(gim);
}
}
}
}
# Called on the GPU thread before recording work that may sample textures.
$gpuc proc drainDeferredTextureOps {} void {
pthread_mutex_lock(&deferredQueueMutex);
int count = deferredQueueCount;
struct DeferredTextureEntry localQueue[DEFERRED_QUEUE_CAP];
memcpy(localQueue, deferredQueue, count * sizeof(struct DeferredTextureEntry));
deferredQueueCount = 0;
pthread_mutex_unlock(&deferredQueueMutex);
for (int i = 0; i < count; i++) {
switch (localQueue[i].op) {
case DEFERRED_ADD:
writeTextureDescriptor(localQueue[i].handle);
break;
case DEFERRED_FREE:
retireGpuTexture(localQueue[i].handle);
break;
}
}
destroyRetiredGpuTextures();
}
# Called once per GPU frame so retired textures age exactly once,
# even though descriptor work may be drained multiple times.
$gpuc proc beginTextureFrame {} void {
textureFrameEpoch++;
drainDeferredTextureOps();
}
$gpuc proc initPlaceholderTexture {} void {
// Set up a placeholder texture in slot 0 that can always be drawn
// that we can swap in when textures get invalidated.
Image debugIm = {
.width = 128, .height = 128,
.components = 4,
.bytesPerRow = 128 * 4,
.data = malloc(128 * 128 * 4)
};
for (int y = 0; y < debugIm.height; y++) {
for (int x = 0; x < debugIm.width; x++) {
int i = y * debugIm.bytesPerRow + x * debugIm.components;
debugIm.data[i+0] = 255;
debugIm.data[i+1] = 0;
debugIm.data[i+2] = 255;
debugIm.data[i+3] = 255;
}
}
GpuTextureHandle han = copyImageToGpuTexture(debugIm);
FOLK_ENSURE(han == 0);
// Fill all descriptor slots with the placeholder texture, then
// drain the queued DEFERRED_ADD for slot 0 (which is now redundant
// but harmless).
initializeDescriptorSet(han);
drainDeferredTextureOps();
}
set gpuTextureLib [$gpuc compile]
$gpuTextureLib textureManagerInit
Claim the GPU texture library is $gpuTextureLib
When /someone/ wishes the GPU loads image /im/ as texture {
set gtex [$gpuTextureLib copyImageToGpuTexture $im]
Claim the GPU has loaded image $im as texture $gtex \
-destructor [list $gpuTextureLib freeGpuTexture $gtex]
}
}