builtin-programs/gpu/pipelines.folk

# pipelines.folk --
#
#     Shared render pass, pipeline creation, and shader compilation.
#     Created once (not per-display). Pipelines use dynamic viewport/scissor
#     so they work across displays of different sizes.

When the GPU Vulkan handle type definer is /defineVulkanHandleType/ &\
     the GPU library is /gpuLib/ &\
     the image library is /imageLib/ &\
     the GPU texture library is /gpuTextureLib/ {

fn defineVulkanHandleType

set cc [C]
$cc cflags -I./vendor
$cc include <pthread.h>
$cc code {
    #define VOLK_IMPLEMENTATION
    #include "volk/volk.h"
}

$cc extend $gpuLib
$cc extend $imageLib
$cc extend -noprocs $gpuTextureLib

local proc vktry {call} { string map {\n " "} [csubst {{
    VkResult res = $call;
    if (res != VK_SUCCESS) {
        fprintf(stderr, "Failed $call: %s (%d)\n",
                VkResultToString(res), res); exit(1);
    }
}}] }

$cc code {
    VkDevice device;
}

$cc define {
    VkRenderPass renderPass;
}

# Shader compilation:
defineVulkanHandleType $cc VkShaderModule
$cc proc createShaderModule {Jim_Obj* codeObj} VkShaderModule {
    int codeObjc = Jim_ListLength(interp, codeObj);
    uint32_t* code = malloc(codeObjc * sizeof(uint32_t));
    for (int i = 0; i < codeObjc; i++) {
        Jim_Obj* codeObjv = Jim_ListGetIndex(interp, codeObj, i);
        long val;
        Jim_GetLong(interp, codeObjv, &val);
        code[i] = (uint32_t)val;
    }

    VkShaderModuleCreateInfo createInfo = {0};
    createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
    createInfo.codeSize = codeObjc * sizeof(uint32_t);
    createInfo.pCode = code;

    VkShaderModule shaderModule;
    $[vktry {vkCreateShaderModule(device, &createInfo, NULL, &shaderModule)}]
    free(code);
    return shaderModule;
}

# Pipeline creation:
defineVulkanHandleType $cc VkPipeline
defineVulkanHandleType $cc VkPipelineLayout
defineVulkanHandleType $cc VkDescriptorSet
$cc typedef uint64_t VkDeviceSize
$cc argtype VkDescriptorType { int $argname; __ENSURE_OK(Tcl_GetIntFromObj(interp, $obj, &$argname)); }
$cc rtype VkDescriptorType { $robj = Tcl_NewIntObj($rvalue); }

$cc code {
    typedef struct PushConstantsEncoder {
        int (*encode)(Jim_Interp* interp, Jim_Obj* obj, uint8_t out[128]);
    } PushConstantsEncoder;
} :extend ;# needs to be available for an extender in order for
           # Pipeline to be defined.
$cc struct Pipeline {
    VkPipeline pipeline;
    VkPipelineLayout pipelineLayout;

    size_t pushConstantsSize;
    PushConstantsEncoder* encodePushConstants;
}

$cc proc createPipeline {VkShaderModule vertShaderModule
                           VkShaderModule fragShaderModule
                           PushConstantsEncoder* encodePushConstants
                           size_t pushConstantsSize} Pipeline {
        VkPipelineShaderStageCreateInfo shaderStages[2]; {
        VkPipelineShaderStageCreateInfo vertShaderStageInfo = {0};
        vertShaderStageInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
        vertShaderStageInfo.stage = VK_SHADER_STAGE_VERTEX_BIT;
        vertShaderStageInfo.module = vertShaderModule;
        vertShaderStageInfo.pName = "main";

        VkPipelineShaderStageCreateInfo fragShaderStageInfo = {0};
        fragShaderStageInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
        fragShaderStageInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
        fragShaderStageInfo.module = fragShaderModule;
        fragShaderStageInfo.pName = "main";

        shaderStages[0] = vertShaderStageInfo;
        shaderStages[1] = fragShaderStageInfo;
    }

    VkPipelineVertexInputStateCreateInfo vertexInputInfo = {0}; {
        vertexInputInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
        vertexInputInfo.vertexBindingDescriptionCount = 0;
        vertexInputInfo.vertexAttributeDescriptionCount = 0;
    }

    VkPipelineInputAssemblyStateCreateInfo inputAssembly = {0}; {
        inputAssembly.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
        inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
        inputAssembly.primitiveRestartEnable = VK_FALSE;
    }

    // Dummy viewport/scissor — overridden by dynamic state at draw time.
    VkViewport viewport = {0}; {
        viewport.width = 1.0f;
        viewport.height = 1.0f;
        viewport.maxDepth = 1.0f;
    }
    VkRect2D scissor = {0}; {
        scissor.extent = (VkExtent2D) {1, 1};
    }
    VkPipelineViewportStateCreateInfo viewportState = {0};
    viewportState.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
    viewportState.viewportCount = 1;
    viewportState.pViewports = &viewport;
    viewportState.scissorCount = 1;
    viewportState.pScissors = &scissor;

    VkPipelineRasterizationStateCreateInfo rasterizer = {0};
    rasterizer.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
    rasterizer.depthClampEnable = VK_FALSE;
    rasterizer.rasterizerDiscardEnable = VK_FALSE;
    rasterizer.polygonMode = VK_POLYGON_MODE_FILL;
    rasterizer.lineWidth = 1.0f;
    rasterizer.cullMode = VK_CULL_MODE_BACK_BIT;
    rasterizer.frontFace = VK_FRONT_FACE_CLOCKWISE;
    rasterizer.depthBiasEnable = VK_FALSE;

    VkPipelineMultisampleStateCreateInfo multisampling = {0};
    multisampling.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
    multisampling.sampleShadingEnable = VK_FALSE;
    multisampling.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;

    VkPipelineColorBlendAttachmentState colorBlendAttachment = {0};
    colorBlendAttachment.colorWriteMask =
      VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT |
      VK_COLOR_COMPONENT_A_BIT;
    colorBlendAttachment.blendEnable = VK_TRUE;
    colorBlendAttachment.srcColorBlendFactor = VK_BLEND_FACTOR_ONE;
    colorBlendAttachment.dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA;
    colorBlendAttachment.colorBlendOp = VK_BLEND_OP_ADD;
    colorBlendAttachment.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE;
    colorBlendAttachment.dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA;
    colorBlendAttachment.alphaBlendOp = VK_BLEND_OP_ADD;

    VkPipelineColorBlendStateCreateInfo colorBlending = {0};
    colorBlending.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
    colorBlending.logicOpEnable = VK_FALSE;
    colorBlending.logicOp = VK_LOGIC_OP_COPY;
    colorBlending.attachmentCount = 1;
    colorBlending.pAttachments = &colorBlendAttachment;

    VkPipelineDynamicStateCreateInfo dynamicState = {0};
    dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
    dynamicState.dynamicStateCount = 2;
    VkDynamicState dynamicStates[] = {
        VK_DYNAMIC_STATE_VIEWPORT,
        VK_DYNAMIC_STATE_SCISSOR
    };
    dynamicState.pDynamicStates = dynamicStates;

    VkPipelineLayout pipelineLayout; {
        VkPipelineLayoutCreateInfo pipelineLayoutInfo = {0};
        pipelineLayoutInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;

        pipelineLayoutInfo.pSetLayouts = textureDescriptorSetLayout_ptr();
        pipelineLayoutInfo.setLayoutCount = 1;

        // We configure all pipelines with push constants size =
        // 128 (the maximum), no matter what actual push constants
        // they take; this is so that pipelines are all
        // layout-compatible so we can reuse descriptor set
        // between pipelines without needing to rebind it.
        VkPushConstantRange pushConstantRange = {0};
        pushConstantRange.offset = 0;
        pushConstantRange.size = 128;
        pushConstantRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;

        pipelineLayoutInfo.pPushConstantRanges = &pushConstantRange;
        pipelineLayoutInfo.pushConstantRangeCount = 1;

        $[vktry {vkCreatePipelineLayout(device, &pipelineLayoutInfo, NULL, &pipelineLayout)}]
    }

    VkPipeline pipeline; {
        VkGraphicsPipelineCreateInfo pipelineInfo = {0};
        pipelineInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
        pipelineInfo.stageCount = 2;
        pipelineInfo.pStages = shaderStages;
        pipelineInfo.pVertexInputState = &vertexInputInfo;
        pipelineInfo.pInputAssemblyState = &inputAssembly;
        pipelineInfo.pViewportState = &viewportState;
        pipelineInfo.pRasterizationState = &rasterizer;
        pipelineInfo.pMultisampleState = &multisampling;
        pipelineInfo.pDepthStencilState = NULL;
        pipelineInfo.pColorBlendState = &colorBlending;
        pipelineInfo.pDynamicState = &dynamicState;

        pipelineInfo.layout = pipelineLayout;

        pipelineInfo.renderPass = renderPass;
        pipelineInfo.subpass = 0;

        pipelineInfo.basePipelineHandle = VK_NULL_HANDLE;
        pipelineInfo.basePipelineIndex = -1;

        $[vktry {vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo, NULL, &pipeline)}]
    }

    return (Pipeline) {
        .pipeline = pipeline,
        .pipelineLayout = pipelineLayout,
        .pushConstantsSize = pushConstantsSize,
        .encodePushConstants = encodePushConstants
    };
}

# Bind the shared texture descriptor set on a command buffer.
$cc proc bindTextureDescriptorSet {VkCommandBuffer commandBuffer VkPipelineLayout pipelineLayout} void {
    vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
                            pipelineLayout, 0, 1, textureDescriptorSet_ptr(), 0, NULL);
}
$cc proc getTextureDescriptorSet {} VkDescriptorSet {
    return *textureDescriptorSet_ptr();
}

$cc proc pipelinesInit {} void {
    $[vktry volkInitialize()]
    volkLoadInstanceOnly(*instance_ptr());

    device = *device_ptr();
    volkLoadDevice(device);

    // Set up VkRenderPass renderPass:
    {
        VkAttachmentDescription colorAttachment = {0};
        colorAttachment.format = VK_FORMAT_B8G8R8A8_UNORM;
        colorAttachment.samples = VK_SAMPLE_COUNT_1_BIT;
        colorAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
        colorAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
        colorAttachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
        colorAttachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
        colorAttachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
        colorAttachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;

        VkAttachmentReference colorAttachmentRef = {0};
        colorAttachmentRef.attachment = 0;
        colorAttachmentRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;

        VkSubpassDescription subpass = {0};
        subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
        subpass.colorAttachmentCount = 1;
        subpass.pColorAttachments = &colorAttachmentRef;

        VkRenderPassCreateInfo renderPassInfo = {0};
        renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
        renderPassInfo.attachmentCount = 1;
        renderPassInfo.pAttachments = &colorAttachment;
        renderPassInfo.subpassCount = 1;
        renderPassInfo.pSubpasses = &subpass;

        VkSubpassDependency dependency = {0};
        dependency.srcSubpass = VK_SUBPASS_EXTERNAL;
        dependency.dstSubpass = 0;
        dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
        dependency.srcAccessMask = 0;
        dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
        dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;

        renderPassInfo.dependencyCount = 1;
        renderPassInfo.pDependencies = &dependency;

        $[vktry {vkCreateRenderPass(device, &renderPassInfo, NULL, &renderPass)}]
    }
}

set pipelineLib [$cc compile]
$pipelineLib pipelinesInit
Claim the GPU pipeline library is $pipelineLib

# Pipeline compiler: Tcl-level library that takes GLSL fragments,
# routes them through glslc, builds a C subcompiler for push-constant
# encoding, and produces a Pipeline via createPipeline above.
set pipelineCompilerLib [library create pipelineCompiler {gpuLib pipelineLib} {
    variable gpuLib
    variable pipelineLib

    # Construct a reusable GLSL function that can be linked into and
    # called from a shader/pipeline.
    proc fn {fnDict arguments rtype body} {
        set fnArgs [list]
        # We inline all dependent functions from the caller scope
        # immediately here, since we don't know if those dependencies
        # would be accessible/in scope at all when this function gets
        # actually compiled into a shader.
        set depFnDict [dict create]
        foreach {argtype argname} $arguments {
            if {$argtype eq "fn"} {
                # TODO: Support fn being a list {fnName fn}.
                if {![dict exists $fnDict $argname]} {
                    puts stderr "pipelineCompiler::fn: $argname not found"
                    return -code 99 $argname
                }
                dict set depFnDict [string map {: ""} $argname] \
                    [dict get $fnDict $argname]
            } else {
                lappend fnArgs $argtype $argname
            }
        }
        return [list $fnArgs $depFnDict $rtype $body]
    }

    # Construct a shader pipeline that can be used to draw to the
    # screen.
    proc pipeline {fnDict args} {
        variable gpuLib
        variable pipelineLib

        if {[llength $args] == 3} {
            lassign $args vertArgs vertBody fragBody
            set fragArgs [list]
        } elseif {[llength $args] == 4} {
            lassign $args vertArgs vertBody fragArgs fragBody
        } else {
            error {pipelineCompiler pipeline: should be used as [$pipelineCompiler pipeline vertArgs vertBody fragBody], or [$pipelineCompiler pipeline vertArgs vertBody fragArgs fragBody]}
        }
        set vertFnDict [dict create]
        set fragFnDict [dict create]
        set pushConstants [list]
        foreach {argtype argname} $vertArgs {
            if {$argtype eq "fn"} {
                # TODO: Support fn being a list {name fn}.
                if {![dict exists $fnDict $argname]} {
                    return -code 99 $argname
                }
                set fn [dict get $fnDict $argname]
                set vertFnDict [dict merge $vertFnDict [lindex $fn 1]]
                dict set vertFnDict $argname $fn
                continue
            }
            lappend pushConstants $argtype $argname
        }
        foreach {argtype argname} $fragArgs {
            if {$argtype eq "fn"} {
                # TODO: Support fn being a list {name fn}.
                if {![dict exists $fnDict $argname]} {
                    return -code 99 $argname
                }
                set fn [dict get $fnDict $argname]
                set fragFnDict [dict merge $fragFnDict [lindex $fn 1]]
                dict set fragFnDict $argname $fn
                continue
            } else {
                error "Fragment arguments not supported"
            }
        }

        # Create a C subcompiler to create a fast routine to encode
        # the push constants on each draw call.
        set cc [C]
        $cc typedef int sampler2D
        $cc struct vec2 { float x; float y; }
        $cc struct vec3 { float x; float y; float z; }
        $cc struct vec4 { float x; float y; float z; float w; }
        $cc struct uvec4 { uint32_t x; uint32_t y; uint32_t z; uint32_t w; }
        # Note that mat3 is COLUMN-MAJOR and every column has 1 float
        # of padding at the end.
        $cc struct mat3 { float data[12]; }

        $cc argtype vec2 {
            vec2 $argname;
            {
                int $[set argname]_objc = Jim_ListLength(interp, $obj);
                __ENSURE($[set argname]_objc == 2);
                double x; double y;
                __ENSURE_OK(Jim_GetDouble(interp, Jim_ListGetIndex(interp, $obj, 0), &x));
                __ENSURE_OK(Jim_GetDouble(interp, Jim_ListGetIndex(interp, $obj, 1), &y));
                $argname = (vec2) { (float)x, (float)y };
            }
        }
        $cc argtype vec3 {
            vec3 $argname;
            {
                int $[set argname]_objc = Jim_ListLength(interp, $obj);
                __ENSURE($[set argname]_objc == 3);
                double x; double y; double z;
                __ENSURE_OK(Jim_GetDouble(interp, Jim_ListGetIndex(interp, $obj, 0), &x));
                __ENSURE_OK(Jim_GetDouble(interp, Jim_ListGetIndex(interp, $obj, 1), &y));
                __ENSURE_OK(Jim_GetDouble(interp, Jim_ListGetIndex(interp, $obj, 2), &z));
                $argname = (vec3) { (float)x, (float)y, (float)z };
            }
        }
        $cc argtype vec4 {
            vec4 $argname;
            {
                int $[set argname]_objc = Jim_ListLength(interp, $obj);
                __ENSURE($[set argname]_objc == 4);
                double x; double y; double z; double w;
                __ENSURE_OK(Jim_GetDouble(interp, Jim_ListGetIndex(interp, $obj, 0), &x));
                __ENSURE_OK(Jim_GetDouble(interp, Jim_ListGetIndex(interp, $obj, 1), &y));
                __ENSURE_OK(Jim_GetDouble(interp, Jim_ListGetIndex(interp, $obj, 2), &z));
                __ENSURE_OK(Jim_GetDouble(interp, Jim_ListGetIndex(interp, $obj, 3), &w));
                $argname = (vec4) { (float)x, (float)y, (float)z, (float)w };
            }
        }
        $cc argtype uvec4 {
            uvec4 $argname;
            {
                int $[set argname]_objc = Jim_ListLength(interp, $obj);
                __ENSURE($[set argname]_objc == 4);
                jim_wide x; jim_wide y; jim_wide z; jim_wide w;
                __ENSURE_OK(Jim_GetWide(interp, Jim_ListGetIndex(interp, $obj, 0), &x));
                __ENSURE_OK(Jim_GetWide(interp, Jim_ListGetIndex(interp, $obj, 1), &y));
                __ENSURE_OK(Jim_GetWide(interp, Jim_ListGetIndex(interp, $obj, 2), &z));
                __ENSURE_OK(Jim_GetWide(interp, Jim_ListGetIndex(interp, $obj, 3), &w));
                $argname = (uvec4) { (uint32_t)x, (uint32_t)y, (uint32_t)z, (uint32_t)w };
            }
        }
        # Note that we take matrices from Tcl in ROW-MAJOR form and
        # convert them to column-major form inline here.
        $cc argtype mat3 {
            mat3 $argname;
            {
                int $[set argname]_objc = Jim_ListLength(interp, $obj);
                __ENSURE($[set argname]_objc == 3);
                for (int y = 0; y < 3; y++) {
                    Jim_Obj* rowObj = Jim_ListGetIndex(interp, $obj, y);
                    __ENSURE(Jim_ListLength(interp, rowObj) == 3);
                    for (int x = 0; x < 3; x++) {
                        double el;
                        __ENSURE_OK(Jim_GetDouble(interp, Jim_ListGetIndex(interp, rowObj, x), &el));

                        int i = x * 4 + y;
                        $argname.data[i] = el;
                    }
                }
            }
        }
        $cc code [csubst {
            typedef struct Args {
                $[join [lmap {argtype argname} $pushConstants {
                    set alignas [expr {$argtype eq "mat3" ? "16" : "sizeof($argtype)"}]
                    subst {_Alignas($alignas) $argtype $argname;}
                }] "\n"]
            } Args;

            typedef struct PushConstantsEncoder {
                int (*encode)(Jim_Interp* interp, Jim_Obj* obj, uint8_t out[128]);
            } PushConstantsEncoder;

            static uint8_t argsBuf[128];
        }]
        $cc include <stddef.h>
        $cc proc getArgsSize {} int { return sizeof(Args); }
        $cc proc encodeArgs $pushConstants void {
            Args args = {$[join [lmap {argtype argname} $pushConstants { subst {.$argname = $argname} }] " ,"]};
            memcpy(argsBuf, &args, sizeof(args));
        }
        # This is what gets saved as the PushConstantsEncoder and
        # called at draw time.
        $cc proc encodeObj {Jim_Interp* interp Jim_Obj* obj uint8_t* out} int {
            int objc = Jim_ListLength(interp, obj);
            Jim_Obj* objv[1 + objc];
            for (int i = 0; i < objc; i++) {
                objv[1 + i] = Jim_ListGetIndex(interp, obj, i);
            }

            int ret = encodeArgs_Cmd(interp, 1 + objc, objv);
            if (ret != JIM_OK) {
                // You CANNOT use FOLK_ENSURE here, because this is
                // passed as function pointer and does not capture the
                // correct jmp_buf for the caller.
                return -1;
            }

            memcpy(out, argsBuf, sizeof(Args));
            return sizeof(Args);
        }
        $cc proc makeEncoder {} PushConstantsEncoder* {
            PushConstantsEncoder* encoder = malloc(sizeof(PushConstantsEncoder));
            encoder->encode = encodeObj;
            return encoder;
        }
        set encoderLib [$cc compile]

        set encodePushConstants [$encoderLib makeEncoder]
        set pushConstantsSize [$encoderLib getArgsSize]

        set pushConstantsCode [if {[llength $pushConstants] > 0} {
            subst {
                layout(push_constant) uniform Args {
                    [join [lmap {argtype argname} $pushConstants {
                        if {$argname eq "_"} continue
                        if {$argtype eq "sampler2D"} {
                            expr {"int $argname;"}
                        } else {
                            expr {"$argtype $argname;"}
                        }
                    }] "\n"]
                } args;
            }
        }]

        set vertShaderModule [$pipelineLib createShaderModule [glslc -fshader-stage=vert [csubst {
            #version 450

            $pushConstantsCode

            $[join [lmap {fnName fn} $vertFnDict {
                lassign $fn fnArgs _ fnRtype fnBody
                subst {
                    $fnRtype $fnName ([join [lmap {fnArgtype fnArgname} $fnArgs {subst {$fnArgtype $fnArgname}}] ", "]) {
                        $fnBody
                    }
                }
            }] "\n"]

            vec4 vert() {
                $[join [lmap {argtype argname} $pushConstants {
                    if {$argname eq "_"} continue
                    if {$argtype eq "sampler2D"} continue
                    expr {"$argtype $argname = args.$argname;"}
                }] " "]
                $vertBody
            }

            void main() {
                gl_Position = vert();
            }
        }]]]
        # We pass the descriptor set with all textures (samplers) to all
        # fragment shaders, so we never need to rebind it (at draw
        # time, the shader may get an index into the array if it's
        # meant to draw an texture).
        #
        # Note that we have individual combined image+samplers,
        # instead of 1 global sampler and multiple images/textures,
        # because that's the only way to allow each texture to have
        # its own dimensions (dimensions are a property bound to the
        # sampler).
        #
        # We have a whole code path basically just to handle v3dv
        # (Raspberry Pi GPU), which doesn't support dynamic indexing
        # (based on push constant) into the descriptor array. On GPUs
        # like that, we manually emit an if ladder that checks each
        # possible value of the push constant and uses the right
        # statically-indexed descriptor.
        set gpuSupportsDynamicIndexing [$gpuLib getDoesSupportShaderSampledImageArrayDynamicIndexing]
        set fragShaderModule [$pipelineLib createShaderModule [glslc -fshader-stage=frag [csubst {
            #version 450

            layout(set = 0, binding = 0) uniform sampler2D _samplers[$[$gpuLib getMaxTextures]];

            $pushConstantsCode

            layout(location = 0) out vec4 outColor;

            $[join [lmap {fnName fn} $fragFnDict {
                lassign $fn fnArgs _ fnRtype fnBody
                subst {
                    $fnRtype $fnName ([join [lmap {fnArgtype fnArgname} $fnArgs {subst {$fnArgtype $fnArgname}}] ", "]) {
                        $fnBody
                    }
                }
            }] "\n"]

            vec4 frag($[join [lmap {argtype argname} $pushConstants {
                if {$argname eq "_"} continue
                expr {"$argtype $argname"}
            }] ", "]) {
                $fragBody
            }

            $[eval {
                set samplerIdxs [lsearch -all -exact $pushConstants sampler2D]
                proc emitFragInvocation {gpuSupportsDynamicIndexing pushConstants} { subst {
                    vec4 rawColor = frag([join [lmap {argtype argname} $pushConstants {
                        if {$argname eq "_"} continue
                        if {$argtype eq "sampler2D"} {
                            if {$gpuSupportsDynamicIndexing} {
                                expr {"_samplers\[args.$argname\]"}
                            } else {
                                # This should have been patched to a
                                # static expression like
                                # `_samplers[3]` by the caller.
                                expr {"$argname"}
                            }
                        } else {
                            expr {"args.$argname"}
                        }
                    }] ", "]);

                    // Premultiply the RGB
                    outColor = vec4(rawColor.rgb * rawColor.a, rawColor.a);
                }}
                list
            }]
            void main() {
                $[if {$gpuSupportsDynamicIndexing || [llength $samplerIdxs] == 0} {
                    emitFragInvocation $gpuSupportsDynamicIndexing $pushConstants

                } elseif {[llength $samplerIdxs] == 1} {
                    set samplerIdx [+ [lindex $samplerIdxs 0] 1]
                    set samplerName [lindex $pushConstants $samplerIdx]
                    set maxTextures [$gpuLib getMaxTextures]
                    set xs [list]
                    for {set i 0} {$i < $maxTextures} {incr i} {
                        set patchedPushConstants [lreplace $pushConstants $samplerIdx $samplerIdx \
                                                      _samplers\[$i\]]
                        lappend xs [subst {
                            [expr {$i == 0 ? "if" : "else if"}] (args.$samplerName == $i) {
                                [emitFragInvocation $gpuSupportsDynamicIndexing $patchedPushConstants]
                            }
                        }]
                    }
                    join $xs "\n"
                } else {
                    error "display: Cannot currently compile a shader that has more than 1 sampler2D parameter on this GPU."
                }]
            }
        }]]]

        # pipeline needs to contain a specification of push constants,
        # so they can be filled in at draw time.
        set pipeline [$pipelineLib createPipeline \
                          $vertShaderModule $fragShaderModule \
                          $encodePushConstants \
                          $pushConstantsSize]
        return $pipeline
    }

    proc glslc {args} {
        set cmdargs [lreplace $args end end]
        set glsl [lindex $args end]
        set glslfile [file tempfile /tmp/glslfileXXXXXX].glsl
        set glslfd [open $glslfile w]; puts $glslfd $glsl; close $glslfd
        split [string map {\n ""} [exec glslc {*}$cmdargs -mfmt=num -o - $glslfile]] ","
    }
}]
Claim the GPU pipeline compiler library is $pipelineCompilerLib

}