view static_utils.nim @ 1183:850450bfe2a2 compiletime-tests

sync from bedroom to office
author sam <sam@basx.dev>
date Thu, 04 Jul 2024 07:30:08 +0700
parents e9a212e9cdf7
children 3f43c7163029
line wrap: on
line source

import std/os
import std/enumerate
import std/hashes
import std/macros
import std/strformat
import std/strutils
import std/sequtils
import std/typetraits as tt

import semicongine/core/utils
import semicongine/core/vector
import semicongine/core/matrix
import semicongine/core/vulkanapi

template VertexAttribute {.pragma.}
template InstanceAttribute {.pragma.}
template Pass {.pragma.}
template PassFlat {.pragma.}
template ShaderOutput {.pragma.}
template VertexIndices {.pragma.}

const INFLIGHTFRAMES = 2'u32
const MEMORY_ALIGNMENT = 65536'u64 # Align buffers inside memory along this alignment
const BUFFER_ALIGNMENT = 64'u64 # align offsets inside buffers along this alignment

# some globals that will (likely?) never change during the life time of the engine
type
  SupportedGPUType = float32 | float64 | int8 | int16 | int32 | int64 | uint8 | uint16 | uint32 | uint64 | TVec2[int32] | TVec2[int64] | TVec3[int32] | TVec3[int64] | TVec4[int32] | TVec4[int64] | TVec2[uint32] | TVec2[uint64] | TVec3[uint32] | TVec3[uint64] | TVec4[uint32] | TVec4[uint64] | TVec2[float32] | TVec2[float64] | TVec3[float32] | TVec3[float64] | TVec4[float32] | TVec4[float64] | TMat2[float32] | TMat2[float64] | TMat23[float32] | TMat23[float64] | TMat32[float32] | TMat32[float64] | TMat3[float32] | TMat3[float64] | TMat34[float32] | TMat34[float64] | TMat43[float32] | TMat43[float64] | TMat4[float32] | TMat4[float64]

  ShaderObject[TShader] = object
    vertexShader: VkShaderModule
    fragmentShader: VkShaderModule

  VulkanGlobals = object
    instance: VkInstance
    device: VkDevice
    physicalDevice: VkPhysicalDevice
    queueFamilyIndex: uint32
    queue: VkQueue

  IndexType = enum
    None, UInt8, UInt16, UInt32

  IndirectGPUMemory = object
    vk: VkDeviceMemory
    size: uint64
    needsTransfer: bool # usually true
  DirectGPUMemory = object
    vk: VkDeviceMemory
    size: uint64
    data: pointer
    needsFlush: bool # usually true
  GPUMemory = IndirectGPUMemory | DirectGPUMemory

  Buffer[TMemory: GPUMemory] = object
    memory: TMemory
    vk: VkBuffer
    offset: uint64
    size: uint64
  Texture[Channels: static int, TMemory: GPUMemory] = object
    memory: TMemory
    vk: VkImage
    imageview: VkImageView
    sampler: VkSampler
    offset: uint64
    size: uint64
    width: int
    data: seq[array[Channels, uint8]]

  GPUArray[T: SupportedGPUType, TMemory: GPUMemory] = object
    data: seq[T]
    buffer: Buffer[TMemory]
    offset: uint64
  GPUValue[T: object|array, TMemory: GPUMemory] = object
    data: T
    buffer: Buffer[TMemory]
    offset: uint64
  GPUData = GPUArray | GPUValue

  DescriptorSetType = enum
    GlobalSet
    MaterialSet
  DescriptorSet[T: object, sType: static DescriptorSetType] = object
    data: T
    vk: array[INFLIGHTFRAMES.int, VkDescriptorSet]

  Pipeline[TShader] = object
    vk: VkPipeline
    layout: VkPipelineLayout
    descriptorSetLayouts: array[DescriptorSetType, VkDescriptorSetLayout]
  BufferType = enum
    VertexBuffer, IndexBuffer, UniformBuffer
  RenderData = object
    descriptorPool: VkDescriptorPool
    # tuple is memory and offset to next free allocation in that memory
    indirectMemory: seq[tuple[memory: IndirectGPUMemory, usedOffset: uint64]]
    directMemory: seq[tuple[memory: DirectGPUMemory, usedOffset: uint64]]
    indirectBuffers: seq[tuple[buffer: Buffer[IndirectGPUMemory], btype: BufferType, usedOffset: uint64]]
    directBuffers: seq[tuple[buffer: Buffer[DirectGPUMemory], btype: BufferType, usedOffset: uint64]]

var vulkan: VulkanGlobals

func alignedTo[T: SomeInteger](value: T, alignment: T): T =
  let remainder = value mod alignment
  if remainder == 0:
    return value
  else:
    return value + alignment - remainder

func VkType[T: SupportedGPUType](value: T): VkFormat =
  when T is float32: VK_FORMAT_R32_SFLOAT
  elif T is float64: VK_FORMAT_R64_SFLOAT
  elif T is int8: VK_FORMAT_R8_SINT
  elif T is int16: VK_FORMAT_R16_SINT
  elif T is int32: VK_FORMAT_R32_SINT
  elif T is int64: VK_FORMAT_R64_SINT
  elif T is uint8: VK_FORMAT_R8_UINT
  elif T is uint16: VK_FORMAT_R16_UINT
  elif T is uint32: VK_FORMAT_R32_UINT
  elif T is uint64: VK_FORMAT_R64_UINT
  elif T is TVec2[int32]: VK_FORMAT_R32G32_SINT
  elif T is TVec2[int64]: VK_FORMAT_R64G64_SINT
  elif T is TVec3[int32]: VK_FORMAT_R32G32B32_SINT
  elif T is TVec3[int64]: VK_FORMAT_R64G64B64_SINT
  elif T is TVec4[int32]: VK_FORMAT_R32G32B32A32_SINT
  elif T is TVec4[int64]: VK_FORMAT_R64G64B64A64_SINT
  elif T is TVec2[uint32]: VK_FORMAT_R32G32_UINT
  elif T is TVec2[uint64]: VK_FORMAT_R64G64_UINT
  elif T is TVec3[uint32]: VK_FORMAT_R32G32B32_UINT
  elif T is TVec3[uint64]: VK_FORMAT_R64G64B64_UINT
  elif T is TVec4[uint32]: VK_FORMAT_R32G32B32A32_UINT
  elif T is TVec4[uint64]: VK_FORMAT_R64G64B64A64_UINT
  elif T is TVec2[float32]: VK_FORMAT_R32G32_SFLOAT
  elif T is TVec2[float64]: VK_FORMAT_R64G64_SFLOAT
  elif T is TVec3[float32]: VK_FORMAT_R32G32B32_SFLOAT
  elif T is TVec3[float64]: VK_FORMAT_R64G64B64_SFLOAT
  elif T is TVec4[float32]: VK_FORMAT_R32G32B32A32_SFLOAT
  elif T is TVec4[float64]: VK_FORMAT_R64G64B64A64_SFLOAT
  elif T is TMat2[float32]: VK_FORMAT_R32G32_SFLOAT
  elif T is TMat2[float64]: VK_FORMAT_R64G64_SFLOAT
  elif T is TMat23[float32]: VK_FORMAT_R32G32B32_SFLOAT
  elif T is TMat23[float64]: VK_FORMAT_R64G64B64_SFLOAT
  elif T is TMat32[float32]: VK_FORMAT_R32G32_SFLOAT
  elif T is TMat32[float64]: VK_FORMAT_R64G64_SFLOAT
  elif T is TMat3[float32]: VK_FORMAT_R32G32B32_SFLOAT
  elif T is TMat3[float64]: VK_FORMAT_R64G64B64_SFLOAT
  elif T is TMat34[float32]: VK_FORMAT_R32G32B32A32_SFLOAT
  elif T is TMat34[float64]: VK_FORMAT_R64G64B64A64_SFLOAT
  elif T is TMat43[float32]: VK_FORMAT_R32G32B32_SFLOAT
  elif T is TMat43[float64]: VK_FORMAT_R64G64B64_SFLOAT
  elif T is TMat4[float32]: VK_FORMAT_R32G32B32A32_SFLOAT
  elif T is TMat4[float64]: VK_FORMAT_R64G64B64A64_SFLOAT
  else: {.error: "Unsupported data type on GPU".}

func GlslType[T: SupportedGPUType|Texture](value: T): string =
  when T is float32: "float"
  elif T is float64: "double"
  elif T is int8 or T is int16 or T is int32 or T is int64: "int"
  elif T is uint8 or T is uint16 or T is uint32 or T is uint64: "uint"
  elif T is TVec2[int32]: "ivec2"
  elif T is TVec2[int64]: "ivec2"
  elif T is TVec3[int32]: "ivec3"
  elif T is TVec3[int64]: "ivec3"
  elif T is TVec4[int32]: "ivec4"
  elif T is TVec4[int64]: "ivec4"
  elif T is TVec2[uint32]: "uvec2"
  elif T is TVec2[uint64]: "uvec2"
  elif T is TVec3[uint32]: "uvec3"
  elif T is TVec3[uint64]: "uvec3"
  elif T is TVec4[uint32]: "uvec4"
  elif T is TVec4[uint64]: "uvec4"
  elif T is TVec2[float32]: "vec2"
  elif T is TVec2[float64]: "dvec2"
  elif T is TVec3[float32]: "vec3"
  elif T is TVec3[float64]: "dvec3"
  elif T is TVec4[float32]: "vec4"
  elif T is TVec4[float64]: "dvec4"
  elif T is TMat2[float32]: "mat2"
  elif T is TMat2[float64]: "dmat2"
  elif T is TMat23[float32]: "mat23"
  elif T is TMat23[float64]: "dmat23"
  elif T is TMat32[float32]: "mat32"
  elif T is TMat32[float64]: "dmat32"
  elif T is TMat3[float32]: "mat3"
  elif T is TMat3[float64]: "dmat3"
  elif T is TMat34[float32]: "mat34"
  elif T is TMat34[float64]: "dmat34"
  elif T is TMat43[float32]: "mat43"
  elif T is TMat43[float64]: "dmat43"
  elif T is TMat4[float32]: "mat4"
  elif T is TMat4[float64]: "dmat4"
  elif T is Texture: "sampler2D"
  else: {.error: "Unsupported data type on GPU".}

template ForVertexDataFields(shader: typed, fieldname, valuename, isinstancename, body: untyped): untyped =
  for theFieldname, value in fieldPairs(shader):
    when hasCustomPragma(value, VertexAttribute) or hasCustomPragma(value, InstanceAttribute):
      when not typeof(value) is seq:
        {.error: "field '" & theFieldname & "' needs to be a seq".}
      when not typeof(value) is SupportedGPUType:
        {.error: "field '" & theFieldname & "' is not a supported GPU type".}
      block:
        const `fieldname` {.inject.} = theFieldname
        let `valuename` {.inject.} = value
        const `isinstancename` {.inject.} = hasCustomPragma(value, InstanceAttribute)
        body

template ForDescriptorFields(shader: typed, fieldname, valuename, typename, countname, bindingNumber, body: untyped): untyped =
  var `bindingNumber` {.inject.} = 1'u32
  for theFieldname, value in fieldPairs(shader):
    when typeof(value) is Texture:
      block:
        const `fieldname` {.inject.} = theFieldname
        const `typename` {.inject.} = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER
        const `countname` {.inject.} = 1'u32
        let `valuename` {.inject.} = value
        body
        `bindingNumber`.inc
    elif typeof(value) is object:
      block:
        const `fieldname` {.inject.} = theFieldname
        const `typename` {.inject.} = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER
        const `countname` {.inject.} = 1'u32
        let `valuename` {.inject.} = value
        body
        `bindingNumber`.inc
    elif typeof(value) is array:
      when elementType(value) is Texture:
        block:
          const `fieldname` {.inject.} = theFieldname
          const `typename` {.inject.} = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER
          const `countname` {.inject.} = uint32(typeof(value).len)
          let `valuename` {.inject.} = value
          body
          `bindingNumber`.inc
      elif elementType(value) is object:
        block:
          const `fieldname` {.inject.} = theFieldname
          const `typename` {.inject.} = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER
          const `countname` {.inject.} = uint32(typeof(value).len)
          let `valuename` {.inject.} = value
          body
          `bindingNumber`.inc
      else:
        {.error: "Unsupported descriptor type: " & tt.name(typeof(value)).}

func NumberOfVertexInputAttributeDescriptors[T: SupportedGPUType|Texture](value: T): uint32 =
  when T is TMat2[float32] or T is TMat2[float64] or T is TMat23[float32] or T is TMat23[float64]:
    2
  elif T is TMat32[float32] or T is TMat32[float64] or T is TMat3[float32] or T is TMat3[float64] or T is TMat34[float32] or T is TMat34[float64]:
    3
  elif T is TMat43[float32] or T is TMat43[float64] or T is TMat4[float32] or T is TMat4[float64]:
    4
  else:
    1

func NLocationSlots[T: SupportedGPUType|Texture](value: T): uint32 =
  #[
  single location:
    - any scalar
    - any 16-bit vector
    - any 32-bit vector
    - any 64-bit vector that has max. 2 components
    16-bit scalar and vector types, and
    32-bit scalar and vector types, and
    64-bit scalar and 2-component vector types.
  two locations
    64-bit three- and four-component vectors
  ]#
  when T is TVec3[int64] or
    T is TVec4[int64] or
    T is TVec3[uint64] or
    T is TVec4[uint64] or
    T is TVec3[float64] or
    T is TVec4[float64] or
    T is TMat23[float64] or
    T is TMat3[float64] or
    T is TMat34[float64] or
    T is TMat43[float64] or
    T is TMat4[float64]:
    return 2
  else:
    return 1

template sType(descriptorSet: DescriptorSet): untyped =
  get(genericParams(typeof(gpuData)), 1)

template UsesIndirectMemory(gpuData: GPUData): untyped =
  get(genericParams(typeof(gpuData)), 1) is IndirectGPUMemory
template UsesDirectMemory(gpuData: GPUData): untyped =
  get(genericParams(typeof(gpuData)), 1) is DirectGPUMemory

template size(gpuArray: GPUArray): uint64 =
  (gpuArray.data.len * sizeof(elementType(gpuArray.data))).uint64
template size(gpuValue: GPUValue): uint64 =
  sizeof(gpuValue.data).uint64

template datapointer(gpuArray: GPUArray): pointer =
  addr(gpuArray.data[0])
template datapointer(gpuValue: GPUValue): pointer =
  addr(gpuValue.data)

proc AllocationSize(buffer: Buffer): uint64 =
  var req: VkMemoryRequirements
  vkGetBufferMemoryRequirements(vulkan.device, buffer.vk, addr(req))
  return req.size

proc GetPhysicalDevice(instance: VkInstance): VkPhysicalDevice =
  var nDevices: uint32
  checkVkResult vkEnumeratePhysicalDevices(instance, addr(nDevices), nil)
  var devices = newSeq[VkPhysicalDevice](nDevices)
  checkVkResult vkEnumeratePhysicalDevices(instance, addr(nDevices), devices.ToCPointer)

  var score = 0'u32
  for pDevice in devices:
    var props: VkPhysicalDeviceProperties
    vkGetPhysicalDeviceProperties(pDevice, addr(props))
    if props.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU and props.limits.maxImageDimension2D > score:
      score = props.limits.maxImageDimension2D
      result = pDevice

  if score == 0:
    for pDevice in devices:
      var props: VkPhysicalDeviceProperties
      vkGetPhysicalDeviceProperties(pDevice, addr(props))
      if props.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU and props.limits.maxImageDimension2D > score:
        score = props.limits.maxImageDimension2D
        result = pDevice

  assert score > 0, "Unable to find integrated or discrete GPU"


proc GetDirectMemoryTypeIndex(): uint32 =
  var physicalProperties: VkPhysicalDeviceMemoryProperties
  vkGetPhysicalDeviceMemoryProperties(vulkan.physicalDevice, addr(physicalProperties))

  var biggestHeap: uint64 = 0
  result = high(uint32)
  # try to find host-visible type
  for i in 0'u32 ..< physicalProperties.memoryTypeCount:
    let flags = toEnums(physicalProperties.memoryTypes[i].propertyFlags)
    if VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT in flags:
      let size = physicalProperties.memoryHeaps[physicalProperties.memoryTypes[i].heapIndex].size
      if size > biggestHeap:
        biggestHeap = size
        result = i
  assert result != high(uint32), "There is not host visible memory. This is likely a driver bug."

proc GetQueueFamily(pDevice: VkPhysicalDevice, qType: VkQueueFlagBits): uint32 =
  var nQueuefamilies: uint32
  vkGetPhysicalDeviceQueueFamilyProperties(pDevice, addr nQueuefamilies, nil)
  var queuFamilies = newSeq[VkQueueFamilyProperties](nQueuefamilies)
  vkGetPhysicalDeviceQueueFamilyProperties(pDevice, addr nQueuefamilies, queuFamilies.ToCPointer)
  for i in 0'u32 ..< nQueuefamilies:
    if qType in toEnums(queuFamilies[i].queueFlags):
      return i
  assert false, &"Queue of type {qType} not found"

proc GetQueue(device: VkDevice, queueFamilyIndex: uint32, qType: VkQueueFlagBits): VkQueue =
  vkGetDeviceQueue(
    device,
    queueFamilyIndex,
    0,
    addr(result),
  )

proc GetSurfaceFormat(): VkFormat =
  # EVERY windows driver and almost every linux driver should support this
  VK_FORMAT_B8G8R8A8_SRGB

template WithSingleUseCommandBuffer(device: VkDevice, cmd, body: untyped): untyped =
  block:
    var
      commandBufferPool: VkCommandPool
      createInfo = VkCommandPoolCreateInfo(
        sType: VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
        flags: toBits [VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT],
        queueFamilyIndex: vulkan.queueFamilyIndex,
      )
    checkVkResult vkCreateCommandPool(device, addr createInfo, nil, addr(commandBufferPool))
    var
      `cmd` {.inject.}: VkCommandBuffer
      allocInfo = VkCommandBufferAllocateInfo(
        sType: VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
        commandPool: commandBufferPool,
        level: VK_COMMAND_BUFFER_LEVEL_PRIMARY,
        commandBufferCount: 1,
      )
    checkVkResult device.vkAllocateCommandBuffers(addr allocInfo, addr(`cmd`))
    var beginInfo = VkCommandBufferBeginInfo(
      sType: VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
      flags: VkCommandBufferUsageFlags(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT),
    )
    checkVkResult `cmd`.vkBeginCommandBuffer(addr beginInfo)

    body

    checkVkResult `cmd`.vkEndCommandBuffer()
    var submitInfo = VkSubmitInfo(
      sType: VK_STRUCTURE_TYPE_SUBMIT_INFO,
      commandBufferCount: 1,
      pCommandBuffers: addr(`cmd`),
    )

    var
      fence: VkFence
      fenceInfo = VkFenceCreateInfo(
        sType: VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
        # flags: toBits [VK_FENCE_CREATE_SIGNALED_BIT]
      )
    checkVkResult device.vkCreateFence(addr(fenceInfo), nil, addr(fence))
    checkVkResult vkQueueSubmit(vulkan.queue, 1, addr(submitInfo), fence)
    checkVkResult vkWaitForFences(device, 1, addr fence, false, high(uint64))
    vkDestroyCommandPool(device, commandBufferPool, nil)


proc UpdateGPUBuffer(gpuData: GPUData) =
  if gpuData.size == 0:
    return
  when UsesDirectMemory(gpuData):
    copyMem(cast[pointer](cast[uint64](gpuData.buffer.memory.data) + gpuData.buffer.offset + gpuData.offset), gpuData.datapointer, gpuData.size)
  else:
    var
      stagingBuffer: VkBuffer
      createInfo = VkBufferCreateInfo(
        sType: VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
        flags: VkBufferCreateFlags(0),
        size: gpuData.size,
        usage: toBits([VK_BUFFER_USAGE_TRANSFER_SRC_BIT]),
        sharingMode: VK_SHARING_MODE_EXCLUSIVE,
      )
    checkVkResult vkCreateBuffer(
      device = vulkan.device,
      pCreateInfo = addr(createInfo),
      pAllocator = nil,
      pBuffer = addr(stagingBuffer),
    )
    var
      stagingMemory: VkDeviceMemory
      stagingPtr: pointer
      memoryAllocationInfo = VkMemoryAllocateInfo(
        sType: VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
        allocationSize: gpuData.buffer.AllocationSize(),
        memoryTypeIndex: GetDirectMemoryTypeIndex(),
      )
    checkVkResult vkAllocateMemory(
      vulkan.device,
      addr(memoryAllocationInfo),
      nil,
      addr(stagingMemory),
    )
    checkVkResult vkBindBufferMemory(vulkan.device, stagingBuffer, stagingMemory, 0)
    checkVkResult vkMapMemory(
      device = vulkan.device,
      memory = stagingMemory,
      offset = 0'u64,
      size = VK_WHOLE_SIZE,
      flags = VkMemoryMapFlags(0),
      ppData = addr(stagingPtr)
    )
    copyMem(stagingPtr, gpuData.datapointer, gpuData.size)
    var stagingRange = VkMappedMemoryRange(
      sType: VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
      memory: stagingMemory,
      size: VK_WHOLE_SIZE,
    )
    checkVkResult vkFlushMappedMemoryRanges(vulkan.device, 1, addr(stagingRange))

    WithSingleUseCommandBuffer(vulkan.device, commandBuffer):
      var copyRegion = VkBufferCopy(size: gpuData.size)
      vkCmdCopyBuffer(commandBuffer, stagingBuffer, gpuData.buffer.vk, 1, addr(copyRegion))

    vkDestroyBuffer(vulkan.device, stagingBuffer, nil)
    vkFreeMemory(vulkan.device, stagingMemory, nil)

proc UpdateAllGPUBuffers[T](value: T) =
  for name, fieldvalue in value.fieldPairs():
    when typeof(fieldvalue) is GPUData:
      UpdateGPUBuffer(fieldvalue)

proc InitDescriptorSet(
  renderData: RenderData,
  layout: VkDescriptorSetLayout,
  descriptorSet: var DescriptorSet,
) =
  # santization checks
  for name, value in descriptorSet.data.fieldPairs:
    when typeof(value) is GPUValue:
      assert value.buffer.vk.Valid
    # TODO:
    # when typeof(value) is Texture:
    # assert value.texture.vk.Valid

  # allocate
  var layouts = newSeqWith(descriptorSet.vk.len, layout)
  var allocInfo = VkDescriptorSetAllocateInfo(
    sType: VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
    descriptorPool: renderData.descriptorPool,
    descriptorSetCount: uint32(layouts.len),
    pSetLayouts: layouts.ToCPointer,
  )
  checkVkResult vkAllocateDescriptorSets(vulkan.device, addr(allocInfo), descriptorSet.vk.ToCPointer)

  # write
  var descriptorSetWrites = newSeq[VkWriteDescriptorSet](descriptorSet.vk.len)

  var descriptorBinding = 0
  ForDescriptorFields(descriptorSet.data, fieldName, fieldValue, descriptorType, descriptorCount, descriptorBindingNumber):
    for i in 0 ..< descriptorSet.vk.len:
      when typeof(fieldValue) is GPUValue:
        let bufferInfo = VkDescriptorBufferInfo(
          buffer: fieldValue.buffer.vk,
          offset: fieldValue.buffer.offset,
          range: fieldValue.buffer.size,
        )
        descriptorSetWrites[i] = VkWriteDescriptorSet(
          sType: VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
          dstSet: descriptorSet.vk[i],
          dstBinding: descriptorBindingNumber,
          dstArrayElement: uint32(0),
          descriptorType: descriptorType,
          descriptorCount: descriptorCount,
          pImageInfo: nil,
          pBufferInfo: addr(bufferInfo),
        )
      elif typeof(fieldValue) is Texture:
        let imageInfo = VkDescriptorImageInfo(
          sampler: fieldValue.sampler,
          imageView: fieldValue.imageView,
          imageLayout: VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
        )
        descriptorSetWrites[i] = VkWriteDescriptorSet(
          sType: VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
          dstSet: descriptorSet.vk[i],
          dstBinding: descriptorBindingNumber,
          dstArrayElement: uint32(0),
          descriptorType: descriptorType,
          descriptorCount: descriptorCount,
          pImageInfo: addr(imageInfo),
          pBufferInfo: nil,
        )
      else:
        {.error: "Unsupported descriptor type: " & tt.name(typeof(fieldValue)).}


  vkUpdateDescriptorSets(vulkan.device, descriptorSetWrites.len.uint32, descriptorSetWrites.ToCPointer, 0, nil)

#[
proc WriteDescriptors[TShader, TUniforms, TGlobals](renderData: RenderData, uniforms: TUniforms, globals: TGlobals) =
  var descriptorSetWrites: seq[VkWriteDescriptorSet]
  ForDescriptorFields(default(TShader), fieldName, descriptorType, descriptorCount, descriptorBindingNumber):
    for frameInFlight in 0 ..< renderData.descriptorSets.len:
      when descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
        when HasGPUValueField[TUniforms](fieldName):
          WithGPUValueField(uniforms, fieldName, gpuValue):
            let bufferInfo = VkDescriptorBufferInfo(
              buffer: gpuValue.buffer.vk,
              offset: gpuValue.buffer.offset,
              range: gpuValue.buffer.size,
            )
            descriptorSetWrites.add VkWriteDescriptorSet(
              sType: VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
              dstSet: renderData.descriptorSets[frameInFlight],
              dstBinding: descriptorBindingNumber,
              dstArrayElement: uint32(0),
              descriptorType: descriptorType,
              descriptorCount: descriptorCount,
              pImageInfo: nil,
              pBufferInfo: addr(bufferInfo),
            )
        elif HasGPUValueField[TGlobals](fieldName):
          WithGPUValueField(globals, fieldName, theValue):
            let bufferInfo = VkDescriptorBufferInfo(
              buffer: theValue.buffer.vk,
              offset: theValue.buffer.offset,
              range: theValue.buffer.size,
            )
            descriptorSetWrites.add VkWriteDescriptorSet(
              sType: VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
              dstSet: renderData.descriptorSets[frameInFlight],
              dstBinding: descriptorBindingNumber,
              dstArrayElement: uint32(0),
              descriptorType: descriptorType,
              descriptorCount: descriptorCount,
              pImageInfo: nil,
              pBufferInfo: addr(bufferInfo),
            )
        else:
          {.error: "Unable to find field '" & fieldName & "' in uniforms or globals".}
      elif descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
        # TODO
        let imageInfo = VkDescriptorImageInfo(
          sampler: VkSampler(0),
          imageView: VkImageView(0),
          imageLayout: VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
        )
        descriptorSetWrites.add VkWriteDescriptorSet(
          sType: VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
          dstSet: renderData.descriptorSets[frameInFlight],
          dstBinding: descriptorBindingNumber,
          dstArrayElement: 0'u32,
          descriptorType: descriptorType,
          descriptorCount: descriptorCount,
          pImageInfo: addr(imageInfo),
          pBufferInfo: nil,
        )
      else:
        assert false, "Unsupported descriptor type"
  vkUpdateDescriptorSets(vulkan.device, uint32(descriptorSetWrites.len), descriptorSetWrites.ToCPointer, 0, nil)
]#


converter toVkIndexType(indexType: IndexType): VkIndexType =
  case indexType:
    of None: VK_INDEX_TYPE_NONE_KHR
    of UInt8: VK_INDEX_TYPE_UINT8_EXT
    of UInt16: VK_INDEX_TYPE_UINT16
    of UInt32: VK_INDEX_TYPE_UINT32

proc CreateRenderPass(format: VkFormat): VkRenderPass =
  var
    attachments = @[VkAttachmentDescription(
        format: format,
        samples: VK_SAMPLE_COUNT_1_BIT,
        loadOp: VK_ATTACHMENT_LOAD_OP_CLEAR,
        storeOp: VK_ATTACHMENT_STORE_OP_STORE,
        stencilLoadOp: VK_ATTACHMENT_LOAD_OP_DONT_CARE,
        stencilStoreOp: VK_ATTACHMENT_STORE_OP_DONT_CARE,
        initialLayout: VK_IMAGE_LAYOUT_UNDEFINED,
        finalLayout: VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
    )]
    dependencies = @[VkSubpassDependency(
      srcSubpass: VK_SUBPASS_EXTERNAL,
      dstSubpass: 0,
      srcStageMask: toBits [VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT],
      srcAccessMask: toBits [VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT],
      dstStageMask: toBits [VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT],
      dstAccessMask: toBits [VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT],
    )]
    outputs = @[
      VkAttachmentReference(
        attachment: 0,
        layout: VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
      )
    ]

  var subpassesList = [
    VkSubpassDescription(
      flags: VkSubpassDescriptionFlags(0),
      pipelineBindPoint: VK_PIPELINE_BIND_POINT_GRAPHICS,
      inputAttachmentCount: 0,
      pInputAttachments: nil,
      colorAttachmentCount: uint32(outputs.len),
      pColorAttachments: outputs.ToCPointer,
      pResolveAttachments: nil,
      pDepthStencilAttachment: nil,
      preserveAttachmentCount: 0,
      pPreserveAttachments: nil,
    )
  ]

  var createInfo = VkRenderPassCreateInfo(
      sType: VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
      attachmentCount: uint32(attachments.len),
      pAttachments: attachments.ToCPointer,
      subpassCount: uint32(subpassesList.len),
      pSubpasses: subpassesList.ToCPointer,
      dependencyCount: uint32(dependencies.len),
      pDependencies: dependencies.ToCPointer,
    )
  checkVkResult vulkan.device.vkCreateRenderPass(addr(createInfo), nil, addr(result))

proc compileGlslToSPIRV(stage: VkShaderStageFlagBits, shaderSource: string): seq[uint32] {.compileTime.} =
  func stage2string(stage: VkShaderStageFlagBits): string {.compileTime.} =
    case stage
    of VK_SHADER_STAGE_VERTEX_BIT: "vert"
    of VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: "tesc"
    of VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: "tese"
    of VK_SHADER_STAGE_GEOMETRY_BIT: "geom"
    of VK_SHADER_STAGE_FRAGMENT_BIT: "frag"
    of VK_SHADER_STAGE_COMPUTE_BIT: "comp"
    else: ""

  when defined(nimcheck): # will not run if nimcheck is running
    return result

  let
    stagename = stage2string(stage)
    shaderHash = hash(shaderSource)
    shaderfile = getTempDir() / &"shader_{shaderHash}.{stagename}"

  if not shaderfile.fileExists:
    echo "shader of type ", stage
    for i, line in enumerate(shaderSource.splitlines()):
      echo "  ", i + 1, " ", line
    # var glslExe = currentSourcePath.parentDir.parentDir.parentDir / "tools" / "glslangValidator"
    var glslExe = currentSourcePath.parentDir / "tools" / "glslangValidator"
    when defined(windows):
      glslExe = glslExe & "." & ExeExt
    let command = &"{glslExe} --entry-point main -V --stdin -S {stagename} -o {shaderfile}"
    echo "run: ", command
    discard StaticExecChecked(
        command = command,
        input = shaderSource
    )
  else:
    echo &"shaderfile {shaderfile} is up-to-date"

  when defined(mingw) and defined(linux): # required for crosscompilation, path separators get messed up
    let shaderbinary = staticRead shaderfile.replace("\\", "/")
  else:
    let shaderbinary = staticRead shaderfile

  var i = 0
  while i < shaderbinary.len:
    result.add(
      (uint32(shaderbinary[i + 0]) shl 0) or
      (uint32(shaderbinary[i + 1]) shl 8) or
      (uint32(shaderbinary[i + 2]) shl 16) or
      (uint32(shaderbinary[i + 3]) shl 24)
    )
    i += 4

proc generateShaderSource[TShader](shader: TShader): (string, string) {.compileTime.} =
  const GLSL_VERSION = "450"
  var vsInput: seq[string]
  var vsOutput: seq[string]
  var fsInput: seq[string]
  var fsOutput: seq[string]
  var uniforms: seq[string]
  var samplers: seq[string]
  var vsInputLocation = 0'u32
  var passLocation = 0
  var fsOutputLocation = 0

  var descriptorSetCount = 0
  for fieldname, value in fieldPairs(shader):
    # vertex shader inputs
    when hasCustomPragma(value, VertexAttribute) or hasCustomPragma(value, InstanceAttribute):
      assert typeof(value) is SupportedGPUType
      vsInput.add "layout(location = " & $vsInputLocation & ") in " & GlslType(value) & " " & fieldname & ";"
      for j in 0 ..< NumberOfVertexInputAttributeDescriptors(value):
        vsInputLocation += NLocationSlots(value)

    # intermediate values, passed between shaders
    elif hasCustomPragma(value, Pass) or hasCustomPragma(value, PassFlat):
      let flat = if hasCustomPragma(value, PassFlat): "flat " else: ""
      vsOutput.add "layout(location = " & $passLocation & ") " & flat & "out " & GlslType(value) & " " & fieldname & ";"
      fsInput.add "layout(location = " & $passLocation & ") " & flat & "in " & GlslType(value) & " " & fieldname & ";"
      passLocation.inc

    # fragment shader output
    elif hasCustomPragma(value, ShaderOutput):
      fsOutput.add &"layout(location = " & $fsOutputLocation & ") out " & GlslType(value) & " " & fieldname & ";"
      fsOutputLocation.inc

    # descriptor sets
    # need to consider 4 cases: uniform block, texture, uniform block array, texture array
    elif typeof(value) is DescriptorSet:
      assert descriptorSetCount <= DescriptorSetType.high.int, &"{tt.name(TShader)}: maximum {DescriptorSetType.high} allowed"

      var descriptorBinding = 0
      for descriptorName, descriptorValue in fieldPairs(value.data):

        when typeof(descriptorValue) is Texture:
          samplers.add "layout(set=" & $descriptorSetCount & ", binding = " & $descriptorBinding & ") uniform " & GlslType(descriptorValue) & " " & descriptorName & ";"
          descriptorBinding.inc

        elif typeof(descriptorValue) is GPUValue:
          uniforms.add "layout(set=" & $descriptorSetCount & ", binding = " & $descriptorBinding & ") uniform T" & descriptorName & " {"
          when typeof(descriptorValue.data) is object:
            for blockFieldName, blockFieldValue in descriptorValue.data.fieldPairs():
              assert typeof(blockFieldValue) is SupportedGPUType, "uniform block field '" & blockFieldName & "' is not a SupportedGPUType"
              uniforms.add "  " & GlslType(blockFieldValue) & " " & blockFieldName & ";"
            uniforms.add "} " & descriptorName & ";"
          elif typeof(descriptorValue.data) is array:
            for blockFieldName, blockFieldValue in default(elementType(descriptorValue.data)).fieldPairs():
              assert typeof(blockFieldValue) is SupportedGPUType, "uniform block field '" & blockFieldName & "' is not a SupportedGPUType"
              uniforms.add "  " & GlslType(blockFieldValue) & " " & blockFieldName & ";"
            uniforms.add "} " & descriptorName & "[" & $descriptorValue.data.len & "];"
          descriptorBinding.inc
        elif typeof(descriptorValue) is array:
          when elementType(descriptorValue) is Texture:
            let arrayDecl = "[" & $typeof(descriptorValue).len & "]"
            samplers.add "layout(set=" & $descriptorSetCount & ", binding = " & $descriptorBinding & ") uniform " & GlslType(default(elementType(descriptorValue))) & " " & descriptorName & "" & arrayDecl & ";"
            descriptorBinding.inc
          else:
            {.error: "Unsupported shader descriptor field " & descriptorName.}
      descriptorSetCount.inc
    elif fieldname in ["vertexCode", "fragmentCode"]:
      discard
    else:
      {.error: "Unsupported shader field '" & tt.name(TShader) & "." & fieldname & "' of type " & tt.name(typeof(value)).}

  result[0] = (@[&"#version {GLSL_VERSION}", "#extension GL_EXT_scalar_block_layout : require", ""] &
    vsInput &
    uniforms &
    samplers &
    vsOutput &
    @[shader.vertexCode]).join("\n")

  result[1] = (@[&"#version {GLSL_VERSION}", "#extension GL_EXT_scalar_block_layout : require", ""] &
    fsInput &
    uniforms &
    samplers &
    fsOutput &
    @[shader.fragmentCode]).join("\n")

proc CompileShader[TShader](shader: static TShader): ShaderObject[TShader] =
  const (vertexShaderSource, fragmentShaderSource) = generateShaderSource(shader)

  let vertexBinary = compileGlslToSPIRV(VK_SHADER_STAGE_VERTEX_BIT, vertexShaderSource)
  let fragmentBinary = compileGlslToSPIRV(VK_SHADER_STAGE_FRAGMENT_BIT, fragmentShaderSource)

  var createInfoVertex = VkShaderModuleCreateInfo(
    sType: VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
    codeSize: csize_t(vertexBinary.len * sizeof(uint32)),
    pCode: vertexBinary.ToCPointer,
  )
  checkVkResult vulkan.device.vkCreateShaderModule(addr(createInfoVertex), nil, addr(result.vertexShader))
  var createInfoFragment = VkShaderModuleCreateInfo(
    sType: VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
    codeSize: csize_t(fragmentBinary.len * sizeof(uint32)),
    pCode: fragmentBinary.ToCPointer,
  )
  checkVkResult vulkan.device.vkCreateShaderModule(addr(createInfoFragment), nil, addr(result.fragmentShader))


proc CreatePipeline[TShader](
  renderPass: VkRenderPass,
  shader: ShaderObject[TShader],
  topology: VkPrimitiveTopology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
  polygonMode: VkPolygonMode = VK_POLYGON_MODE_FILL,
  cullMode: VkCullModeFlagBits = VK_CULL_MODE_BACK_BIT,
  frontFace: VkFrontFace = VK_FRONT_FACE_CLOCKWISE,
  descriptorPoolLimit = 1024
): Pipeline[TShader] =
  # create pipeline

  for theFieldname, value in fieldPairs(default(TShader)):
    when typeof(value) is DescriptorSet:
      var layoutbindings: seq[VkDescriptorSetLayoutBinding]
      ForDescriptorFields(value.data, fieldName, fieldValue, descriptorType, descriptorCount, descriptorBindingNumber):
        layoutbindings.add VkDescriptorSetLayoutBinding(
          binding: descriptorBindingNumber,
          descriptorType: descriptorType,
          descriptorCount: descriptorCount,
          stageFlags: VkShaderStageFlags(VK_SHADER_STAGE_ALL_GRAPHICS),
          pImmutableSamplers: nil,
        )
      var layoutCreateInfo = VkDescriptorSetLayoutCreateInfo(
        sType: VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
        bindingCount: layoutbindings.len.uint32,
        pBindings: layoutbindings.ToCPointer
      )
      checkVkResult vkCreateDescriptorSetLayout(
        vulkan.device,
        addr(layoutCreateInfo),
        nil,
        addr(result.descriptorSetLayouts[value.sType])
      )
  let pipelineLayoutInfo = VkPipelineLayoutCreateInfo(
    sType: VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
    setLayoutCount: result.descriptorSetLayouts.len.uint32,
    pSetLayouts: result.descriptorSetLayouts.ToCPointer,
    # pushConstantRangeCount: uint32(pushConstants.len),
      # pPushConstantRanges: pushConstants.ToCPointer,
  )
  checkVkResult vkCreatePipelineLayout(vulkan.device, addr(pipelineLayoutInfo), nil, addr(result.layout))

  let stages = [
    VkPipelineShaderStageCreateInfo(
      sType: VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
      stage: VK_SHADER_STAGE_VERTEX_BIT,
      module: shader.vertexShader,
      pName: "main",
    ),
    VkPipelineShaderStageCreateInfo(
      sType: VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
      stage: VK_SHADER_STAGE_FRAGMENT_BIT,
      module: shader.fragmentShader,
      pName: "main",
    ),
  ]
  var
    bindings: seq[VkVertexInputBindingDescription]
    attributes: seq[VkVertexInputAttributeDescription]
  var inputBindingNumber = 0'u32
  var location = 0'u32
  ForVertexDataFields(default(TShader), fieldname, value, isInstanceAttr):
    bindings.add VkVertexInputBindingDescription(
      binding: inputBindingNumber,
      stride: sizeof(value).uint32,
      inputRate: if isInstanceAttr: VK_VERTEX_INPUT_RATE_INSTANCE else: VK_VERTEX_INPUT_RATE_VERTEX,
    )
    # allows to submit larger data structures like Mat44, for most other types will be 1
    let perDescriptorSize = sizeof(value).uint32 div NumberOfVertexInputAttributeDescriptors(value)
    for i in 0'u32 ..< NumberOfVertexInputAttributeDescriptors(value):
      attributes.add VkVertexInputAttributeDescription(
        binding: inputBindingNumber,
        location: location,
        format: VkType(value),
        offset: i * perDescriptorSize,
      )
      location += NLocationSlots(value)
    inc inputBindingNumber

  let
    vertexInputInfo = VkPipelineVertexInputStateCreateInfo(
      sType: VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
      vertexBindingDescriptionCount: uint32(bindings.len),
      pVertexBindingDescriptions: bindings.ToCPointer,
      vertexAttributeDescriptionCount: uint32(attributes.len),
      pVertexAttributeDescriptions: attributes.ToCPointer,
    )
    inputAssembly = VkPipelineInputAssemblyStateCreateInfo(
      sType: VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
      topology: topology,
      primitiveRestartEnable: false,
    )
    viewportState = VkPipelineViewportStateCreateInfo(
      sType: VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
      viewportCount: 1,
      scissorCount: 1,
    )
    rasterizer = VkPipelineRasterizationStateCreateInfo(
      sType: VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
      depthClampEnable: VK_FALSE,
      rasterizerDiscardEnable: VK_FALSE,
      polygonMode: polygonMode,
      lineWidth: 1.0,
      cullMode: toBits [cullMode],
      frontFace: frontFace,
      depthBiasEnable: VK_FALSE,
      depthBiasConstantFactor: 0.0,
      depthBiasClamp: 0.0,
      depthBiasSlopeFactor: 0.0,
    )
    multisampling = VkPipelineMultisampleStateCreateInfo(
      sType: VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
      sampleShadingEnable: VK_FALSE,
      rasterizationSamples: VK_SAMPLE_COUNT_1_BIT,
      minSampleShading: 1.0,
      pSampleMask: nil,
      alphaToCoverageEnable: VK_FALSE,
      alphaToOneEnable: VK_FALSE,
    )
    colorBlendAttachment = VkPipelineColorBlendAttachmentState(
      colorWriteMask: toBits [VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, VK_COLOR_COMPONENT_A_BIT],
      blendEnable: VK_TRUE,
      srcColorBlendFactor: VK_BLEND_FACTOR_SRC_ALPHA,
      dstColorBlendFactor: VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
      colorBlendOp: VK_BLEND_OP_ADD,
      srcAlphaBlendFactor: VK_BLEND_FACTOR_ONE,
      dstAlphaBlendFactor: VK_BLEND_FACTOR_ZERO,
      alphaBlendOp: VK_BLEND_OP_ADD,
    )
    colorBlending = VkPipelineColorBlendStateCreateInfo(
      sType: VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
      logicOpEnable: false,
      attachmentCount: 1,
      pAttachments: addr(colorBlendAttachment),
    )
    dynamicStates = [VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR]
    dynamicState = VkPipelineDynamicStateCreateInfo(
      sType: VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
      dynamicStateCount: dynamicStates.len.uint32,
      pDynamicStates: dynamicStates.ToCPointer,
    )
  let createInfo = VkGraphicsPipelineCreateInfo(
    sType: VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
    stageCount: 2,
    pStages: stages.ToCPointer,
    pVertexInputState: addr(vertexInputInfo),
    pInputAssemblyState: addr(inputAssembly),
    pViewportState: addr(viewportState),
    pRasterizationState: addr(rasterizer),
    pMultisampleState: addr(multisampling),
    pDepthStencilState: nil,
    pColorBlendState: addr(colorBlending),
    pDynamicState: addr(dynamicState),
    layout: result.layout,
    renderPass: renderPass,
    subpass: 0,
    basePipelineHandle: VkPipeline(0),
    basePipelineIndex: -1,
  )
  checkVkResult vkCreateGraphicsPipelines(
    vulkan.device,
    VkPipelineCache(0),
    1,
    addr(createInfo),
    nil,
    addr(result.vk)
  )

proc AllocateIndirectMemory(size: uint64): IndirectGPUMemory =
  # chooses biggest memory type that has NOT VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
  result.size = size
  result.needsTransfer = true

  # find a good memory type
  var physicalProperties: VkPhysicalDeviceMemoryProperties
  vkGetPhysicalDeviceMemoryProperties(vulkan.physicalDevice, addr physicalProperties)

  var biggestHeap: uint64 = 0
  var memoryTypeIndex = high(uint32)
  # try to find non-host-visible type
  for i in 0'u32 ..< physicalProperties.memoryTypeCount:
    if not (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT in toEnums(physicalProperties.memoryTypes[i].propertyFlags)):
      let size = physicalProperties.memoryHeaps[physicalProperties.memoryTypes[i].heapIndex].size
      if size > biggestHeap:
        biggestHeap = size
        memoryTypeIndex = i

  # If we did not found a device-only memory type, let's just take the biggest overall
  if memoryTypeIndex == high(uint32):
    result.needsTransfer = false
    for i in 0'u32 ..< physicalProperties.memoryTypeCount:
      let size = physicalProperties.memoryHeaps[physicalProperties.memoryTypes[i].heapIndex].size
      if size > biggestHeap:
        biggestHeap = size
        memoryTypeIndex = i

  assert memoryTypeIndex != high(uint32), "Unable to find indirect memory type"
  var allocationInfo = VkMemoryAllocateInfo(
    sType: VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
    allocationSize: result.size,
    memoryTypeIndex: memoryTypeIndex,
  )
  checkVkResult vkAllocateMemory(
    vulkan.device,
    addr allocationInfo,
    nil,
    addr result.vk
  )

proc AllocateDirectMemory(size: uint64): DirectGPUMemory =
  result.size = size
  result.needsFlush = true

  # find a good memory type
  var physicalProperties: VkPhysicalDeviceMemoryProperties
  vkGetPhysicalDeviceMemoryProperties(vulkan.physicalDevice, addr physicalProperties)

  var biggestHeap: uint64 = 0
  var memoryTypeIndex = high(uint32)
  # try to find host-visible type
  for i in 0 ..< physicalProperties.memoryTypeCount:
    let flags = toEnums(physicalProperties.memoryTypes[i].propertyFlags)
    if VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT in flags:
      let size = physicalProperties.memoryHeaps[physicalProperties.memoryTypes[i].heapIndex].size
      if size > biggestHeap:
        biggestHeap = size
        memoryTypeIndex = i
        result.needsFlush = not (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT in flags)

  assert memoryTypeIndex != high(uint32), "Unable to find indirect memory type"
  var allocationInfo = VkMemoryAllocateInfo(
    sType: VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
    allocationSize: result.size,
    memoryTypeIndex: GetDirectMemoryTypeIndex(),
  )
  checkVkResult vkAllocateMemory(
    vulkan.device,
    addr allocationInfo,
    nil,
    addr result.vk
  )
  checkVkResult vkMapMemory(
    device = vulkan.device,
    memory = result.vk,
    offset = 0'u64,
    size = result.size,
    flags = VkMemoryMapFlags(0),
    ppData = addr(result.data)
  )

proc AllocateIndirectBuffer(renderData: var RenderData, size: uint64, btype: BufferType) =
  if size == 0:
    return
  var buffer = Buffer[IndirectGPUMemory](size: size)

  let usageFlags = case btype:
    of VertexBuffer: [VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT]
    of IndexBuffer: [VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT]
    of UniformBuffer: [VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT]

  # iterate through memory areas to find big enough free space
  # TODO: dynamically expand memory allocations
  for (memory, usedOffset) in renderData.indirectMemory.mitems:
    if memory.size - usedOffset >= size:
      buffer.offset = usedOffset
      # create buffer
      var createInfo = VkBufferCreateInfo(
        sType: VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
        flags: VkBufferCreateFlags(0),
        size: buffer.size,
        usage: toBits(usageFlags),
        sharingMode: VK_SHARING_MODE_EXCLUSIVE,
      )
      checkVkResult vkCreateBuffer(
        device = vulkan.device,
        pCreateInfo = addr createInfo,
        pAllocator = nil,
        pBuffer = addr(buffer.vk)
      )
      checkVkResult vkBindBufferMemory(vulkan.device, buffer.vk, memory.vk, buffer.offset)
      renderData.indirectBuffers.add (buffer, btype, 0'u64)
      # update memory area offset
      usedOffset = alignedTo(usedOffset + size, MEMORY_ALIGNMENT)
      return

  assert false, "Did not find allocated memory region with enough space"

proc AllocateDirectBuffer(renderData: var RenderData, size: uint64, btype: BufferType) =
  if size == 0:
    return

  var buffer = Buffer[DirectGPUMemory](size: size)

  let usageFlags = case btype:
    of VertexBuffer: [VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT]
    of IndexBuffer: [VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT]
    of UniformBuffer: [VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT]

  # iterate through memory areas to find big enough free space
  # TODO: dynamically expand memory allocations
  for (memory, usedOffset) in renderData.directMemory.mitems:
    if memory.size - usedOffset >= size:
      buffer.offset = usedOffset
      # create buffer
      var createInfo = VkBufferCreateInfo(
        sType: VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
        flags: VkBufferCreateFlags(0),
        size: buffer.size,
        usage: toBits(usageFlags),
        sharingMode: VK_SHARING_MODE_EXCLUSIVE,
      )
      checkVkResult vkCreateBuffer(
        device = vulkan.device,
        pCreateInfo = addr createInfo,
        pAllocator = nil,
        pBuffer = addr(buffer.vk)
      )
      checkVkResult vkBindBufferMemory(vulkan.device, buffer.vk, memory.vk, buffer.offset)
      renderData.directBuffers.add (buffer, btype, 0'u64)
      # update memory area offset
      usedOffset = alignedTo(usedOffset + size, MEMORY_ALIGNMENT)
      return

  assert false, "Did not find allocated memory region with enough space"

proc InitRenderData(descriptorPoolLimit = 1024'u32): RenderData =
  # allocate descriptor pools
  var poolSizes = [
    VkDescriptorPoolSize(thetype: VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, descriptorCount: descriptorPoolLimit),
    VkDescriptorPoolSize(thetype: VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, descriptorCount: descriptorPoolLimit),
  ]
  var poolInfo = VkDescriptorPoolCreateInfo(
    sType: VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
    poolSizeCount: poolSizes.len.uint32,
    pPoolSizes: poolSizes.ToCPointer,
    maxSets: descriptorPoolLimit,
  )
  checkVkResult vkCreateDescriptorPool(vulkan.device, addr(poolInfo), nil, addr(result.descriptorPool))

  # allocate some memory
  var initialAllocationSize = 1_000_000_000'u64 # TODO: make this more dynamic or something?
  result.indirectMemory = @[(memory: AllocateIndirectMemory(size = initialAllocationSize), usedOffset: 0'u64)]
  result.directMemory = @[(memory: AllocateDirectMemory(size = initialAllocationSize), usedOffset: 0'u64)]

proc FlushDirectMemory(renderData: RenderData) =
  var flushRegions = newSeqOfCap[VkMappedMemoryRange](renderData.directMemory.len)
  for entry in renderData.directMemory:
    if entry.usedOffset > 0:
      flushRegions.add VkMappedMemoryRange(
        sType: VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
        memory: entry.memory.vk,
        size: entry.usedOffset,
      )
  if flushRegions.len > 0:
    checkVkResult vkFlushMappedMemoryRanges(vulkan.device, flushRegions.len.uint32, flushRegions.ToCPointer())

# For the Get*BufferSize:
# BUFFER_ALIGNMENT is just added for a rough estimate, to ensure we have enough space to align when binding
proc GetIndirectBufferSizes[T](data: T): uint64 =
  for name, value in fieldPairs(data):
    when not hasCustomPragma(value, VertexIndices):
      when typeof(value) is GPUData:
        when UsesIndirectMemory(value):
          result += value.size + BUFFER_ALIGNMENT
proc GetIndirectBufferSizes(data: DescriptorSet): uint64 =
  GetIndirectBufferSizes(data.data)
proc GetDirectBufferSizes[T](data: T): uint64 =
  for name, value in fieldPairs(data):
    when not hasCustomPragma(value, VertexIndices):
      when typeof(value) is GPUData:
        when UsesDirectMemory(value):
          result += value.size + BUFFER_ALIGNMENT
proc GetDirectBufferSizes(data: DescriptorSet): uint64 =
  GetDirectBufferSizes(data.data)
proc GetIndirectIndexBufferSizes[T](data: T): uint64 =
  for name, value in fieldPairs(data):
    when hasCustomPragma(value, VertexIndices):
      static: assert typeof(value) is GPUArray, "Index buffers must be of type GPUArray"
      static: assert elementType(value.data) is uint8 or elementType(value.data) is uint16 or elementType(value.data) is uint32
      when UsesIndirectMemory(value):
        result += value.size + BUFFER_ALIGNMENT
proc GetDirectIndexBufferSizes[T](data: T): uint64 =
  for name, value in fieldPairs(data):
    when hasCustomPragma(value, VertexIndices):
      static: assert typeof(value) is GPUArray, "Index buffers must be of type GPUArray"
      static: assert elementType(value.data) is uint8 or elementType(value.data) is uint16 or elementType(value.data) is uint32
      when UsesDirectMemory(value):
        result += value.size + BUFFER_ALIGNMENT

proc AssignIndirectBuffers[T](renderdata: var RenderData, btype: BufferType, data: var T) =
  for name, value in fieldPairs(data):
    when typeof(value) is GPUData:
      when UsesIndirectMemory(value):
        # find next buffer of correct type with enough free space
        if btype == IndexBuffer == value.hasCustomPragma(VertexIndices):
          var foundBuffer = false
          for (buffer, bt, offset) in renderData.indirectBuffers.mitems:
            if bt == btype and buffer.size - offset >= value.size:
              assert not value.buffer.vk.Valid, "GPUData-Buffer has already been assigned"
              assert buffer.vk.Valid, "RenderData-Buffer has not yet been created"
              value.buffer = buffer
              value.offset = offset
              offset = alignedTo(offset + value.size, BUFFER_ALIGNMENT)
              foundBuffer = true
              break
          assert foundBuffer, &"Unable to find large enough '{btype}' for '{data}'"
proc AssignIndirectBuffers(renderdata: var RenderData, btype: BufferType, data: var DescriptorSet) =
  AssignIndirectBuffers(renderdata, btype, data.data)
proc AssignDirectBuffers[T](renderdata: var RenderData, btype: BufferType, data: var T) =
  for name, value in fieldPairs(data):
    when typeof(value) is GPUData:
      when UsesDirectMemory(value):
        # find next buffer of correct type with enough free space
        if btype == IndexBuffer == value.hasCustomPragma(VertexIndices):
          var foundBuffer = false
          for (buffer, bt, offset) in renderData.directBuffers.mitems:
            if bt == btype and buffer.size - offset >= value.size:
              assert not value.buffer.vk.Valid, "GPUData-Buffer has already been assigned"
              assert buffer.vk.Valid, "RenderData-Buffer has not yet been created"
              value.buffer = buffer
              value.offset = offset
              offset = alignedTo(offset + value.size, BUFFER_ALIGNMENT)
              foundBuffer = true
              break
          assert foundBuffer, &"Unable to find large enough '{btype}' for '{data}'"
proc AssignDirectBuffers(renderdata: var RenderData, btype: BufferType, data: var DescriptorSet) =
  AssignDirectBuffers(renderdata, btype, data.data)

proc HasGPUValueField[T](name: static string): bool {.compileTime.} =
  for fieldname, value in default(T).fieldPairs():
    when typeof(value) is GPUValue and fieldname == name: return true
  return false

template WithGPUValueField(obj: object, name: static string, fieldvalue, body: untyped): untyped =
  # HasGPUValueField MUST be used to check if this is supported
  for fieldname, value in obj.fieldPairs():
    when fieldname == name:
      block:
        let `fieldvalue` {.inject.} = value
        body

proc Bind[T](pipeline: Pipeline[T], commandBuffer: VkCommandBuffer, currentFrameInFlight: int) =
  commandBuffer.vkCmdBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.vk)
  #[
  commandBuffer.vkCmdBindDescriptorSets(
    VK_PIPELINE_BIND_POINT_GRAPHICS,
    pipeline.layout,
    0,
    1,
    addr pipeline.descriptorSets[currentFrameInFlight],
    0,
    nil,
  )
  ]#

proc AssertCompatible(TShader, TMesh, TInstance, TGlobals, TMaterial: typedesc) =
  var descriptorSetCount = 0

  for shaderAttributeName, shaderAttribute in default(TShader).fieldPairs:
    var foundField = false

    # Vertex input data
    when hasCustomPragma(shaderAttribute, VertexAttribute):
      assert typeof(shaderAttribute) is SupportedGPUType
      for meshName, meshValue in default(TMesh).fieldPairs:
        when meshName == shaderAttributeName:
          assert meshValue is GPUArray, "Mesh attribute '" & meshName & "' must be of type 'GPUArray' but is of type " & tt.name(typeof(meshValue))
          assert foundField == false, "Shader input '" & tt.name(TShader) & "." & shaderAttributeName & "' has been found more than once"
          assert elementType(meshValue.data) is typeof(shaderAttribute), "Shader input " & tt.name(TShader) & "." & shaderAttributeName & " is of type '" & tt.name(typeof(shaderAttribute)) & "' but mesh attribute is of type '" & tt.name(elementType(meshValue.data)) & "'"
          foundField = true
      assert foundField, "Shader input '" & tt.name(TShader) & "." & shaderAttributeName & ": " & tt.name(typeof(shaderAttribute)) & "' not found in '" & tt.name(TMesh) & "'"

    # Instance input data
    elif hasCustomPragma(shaderAttribute, InstanceAttribute):
      assert typeof(shaderAttribute) is SupportedGPUType
      for instanceName, instanceValue in default(TInstance).fieldPairs:
        when instanceName == shaderAttributeName:
          assert instanceValue is GPUArray, "Instance attribute '" & instanceName & "' must be of type 'GPUArray' but is of type " & tt.name(typeof(instanceName))
          assert foundField == false, "Shader input '" & tt.name(TShader) & "." & shaderAttributeName & "' has been found more than once"
          assert elementType(instanceValue.data) is typeof(shaderAttribute), "Shader input " & tt.name(TShader) & "." & shaderAttributeName & " is of type '" & tt.name(typeof(shaderAttribute)) & "' but instance attribute is of type '" & tt.name(elementType(instanceValue.data)) & "'"
          foundField = true
      assert foundField, "Shader input '" & tt.name(TShader) & "." & shaderAttributeName & ": " & tt.name(typeof(shaderAttribute)) & "' not found in '" & tt.name(TInstance) & "'"

    # descriptors
    elif typeof(shaderAttribute) is DescriptorSet:
      assert descriptorSetCount <= DescriptorSetType.high.int, &"{tt.name(TShader)}: maximum {DescriptorSetType.high} allowed"
      descriptorSetCount.inc


      when shaderAttribute.sType == GlobalSet:
        assert shaderAttribute.sType == default(TGlobals).sType, "Shader has global descriptor set of type '" & $shaderAttribute.sType & "' but matching provided type is '" & $default(TGlobals).sType & "'"
        assert typeof(shaderAttribute) is TGlobals, "Shader has global descriptor set type '" & tt.name(get(genericParams(typeof(shaderAttribute)), 0)) & "' but provided type is " & tt.name(TGlobals)
      elif shaderAttribute.sType == MaterialSet:
        assert shaderAttribute.sType == default(TMaterial).sType, "Shader has material descriptor set of type '" & $shaderAttribute.sType & "' but matching provided type is '" & $default(TMaterial).sType & "'"
        assert typeof(shaderAttribute) is TMaterial, "Shader has materialdescriptor type '" & tt.name(get(genericParams(typeof(shaderAttribute)), 0)) & "' but provided type is " & tt.name(TMaterial)


proc Render[TShader, TGlobals, TMaterial, TMesh, TInstance](
  commandBuffer: VkCommandBuffer,
  pipeline: Pipeline[TShader],
  globalSet: TGlobals,
  materialSet: TMaterial,
  mesh: TMesh,
  instances: TInstance,
) =
  static: AssertCompatible(TShader, TMesh, TInstance, TGlobals, TMaterial)
  #[
  if renderable.vertexBuffers.len > 0:
    commandBuffer.vkCmdBindVertexBuffers(
      firstBinding = 0'u32,
      bindingCount = uint32(renderable.vertexBuffers.len),
      pBuffers = renderable.vertexBuffers.ToCPointer(),
      pOffsets = renderable.bufferOffsets.ToCPointer()
    )
  if renderable.indexType != None:
    commandBuffer.vkCmdBindIndexBuffer(
      renderable.indexBuffer,
      renderable.indexBufferOffset,
      renderable.indexType,
    )
    commandBuffer.vkCmdDrawIndexed(
      indexCount = renderable.indexCount,
      instanceCount = renderable.instanceCount,
      firstIndex = 0,
      vertexOffset = 0,
      firstInstance = 0
    )
  else:
    commandBuffer.vkCmdDraw(
      vertexCount = renderable.vertexCount,
      instanceCount = renderable.instanceCount,
      firstVertex = 0,
      firstInstance = 0
    )
    ]#

when isMainModule:
  import semicongine/platform/window
  import semicongine/vulkan/instance
  import semicongine/vulkan/device
  import semicongine/vulkan/physicaldevice
  import std/options

  type
    MeshA = object
      position: GPUArray[Vec3f, IndirectGPUMemory]
      indices {.VertexIndices.}: GPUArray[uint16, IndirectGPUMemory]
    InstanceA = object
      rotation: GPUArray[Vec4f, IndirectGPUMemory]
      objPosition: GPUArray[Vec3f, IndirectGPUMemory]
    MaterialA = object
      reflection: float32
      baseColor: Vec3f
    UniformsA = object
      defaultTexture: Texture[3, IndirectGPUMemory]
      defaultMaterial: GPUValue[MaterialA, IndirectGPUMemory]
      materials: GPUValue[array[3, MaterialA], IndirectGPUMemory]
      materialTextures: array[3, Texture[3, IndirectGPUMemory]]
    ShaderSettings = object
      gamma: float32
    GlobalsA = object
      fontAtlas: Texture[1, IndirectGPUMemory]
      settings: GPUValue[ShaderSettings, IndirectGPUMemory]

    ShaderA = object
      # vertex input
      position {.VertexAttribute.}: Vec3f
      objPosition {.InstanceAttribute.}: Vec3f
      rotation {.InstanceAttribute.}: Vec4f
      # intermediate
      test {.Pass.}: float32
      test1 {.PassFlat.}: Vec3f
      # output
      color {.ShaderOutput.}: Vec4f
      # descriptor sets
      globals: DescriptorSet[GlobalsA, GlobalSet]
      uniforms: DescriptorSet[UniformsA, MaterialSet]
      # code
      vertexCode: string = "void main() {}"
      fragmentCode: string = "void main() {}"

  let w = CreateWindow("test2")
  putEnv("VK_LAYER_ENABLES", "VALIDATION_CHECK_ENABLE_VENDOR_SPECIFIC_AMD,VALIDATION_CHECK_ENABLE_VENDOR_SPECIFIC_NVIDIA,VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXTVK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT,VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT")

  # TODO: remove those ugly wrappers
  let theInstance = w.CreateInstance(
    vulkanVersion = VK_MAKE_API_VERSION(0, 1, 3, 0),
    instanceExtensions = @[],
    layers = @["VK_LAYER_KHRONOS_validation"],
  )

  let dev = theInstance.CreateDevice(
    theInstance.GetPhysicalDevices().FilterBestGraphics(),
    enabledExtensions = @[],
    theInstance.GetPhysicalDevices().FilterBestGraphics().FilterForGraphicsPresentationQueues()
  ).vk
  let frameWidth = 100'u32
  let frameHeight = 100'u32

  # TODO: pack this stuff into a setup method and condense everything a bit
  let pDevice = theInstance.vk.GetPhysicalDevice()
  let qfi = pDevice.GetQueueFamily(VK_QUEUE_GRAPHICS_BIT)
  vulkan = VulkanGlobals(
    instance: theInstance.vk,
    device: dev,
    physicalDevice: pDevice,
    queueFamilyIndex: qfi,
    queue: dev.GetQueue(qfi, VK_QUEUE_GRAPHICS_BIT)
  )

  var myMesh1 = MeshA(
    position: GPUArray[Vec3f, IndirectGPUMemory](data: @[NewVec3f(0, 0, ), NewVec3f(0, 0, ), NewVec3f(0, 0, )]),
  )
  var uniforms1 = DescriptorSet[UniformsA, MaterialSet](
    data: UniformsA(
      materials: GPUValue[array[3, MaterialA], IndirectGPUMemory](data: [
        MaterialA(reflection: 0, baseColor: NewVec3f(1, 0, 0)),
        MaterialA(reflection: 0.1, baseColor: NewVec3f(0, 1, 0)),
        MaterialA(reflection: 0.5, baseColor: NewVec3f(0, 0, 1)),
    ]),
    materialTextures: [
      Texture[3, IndirectGPUMemory](),
      Texture[3, IndirectGPUMemory](),
      Texture[3, IndirectGPUMemory](),
    ]
  )
  )
  var instances1 = InstanceA(
    rotation: GPUArray[Vec4f, IndirectGPUMemory](data: @[NewVec4f(1, 0, 0, 0.1), NewVec4f(0, 1, 0, 0.1)]),
    objPosition: GPUArray[Vec3f, IndirectGPUMemory](data: @[NewVec3f(0, 0, 0), NewVec3f(1, 1, 1)]),
  )
  var myGlobals = DescriptorSet[GlobalsA, GlobalSet]()

  # setup for rendering (TODO: swapchain & framebuffers)
  let renderpass = CreateRenderPass(GetSurfaceFormat())

  # shaders
  const shader = ShaderA()
  let shaderObject = CompileShader(shader)
  var pipeline1 = CreatePipeline(renderPass = renderpass, shader = shaderObject)

  var renderdata = InitRenderData()

  # TODO: Textures
  # upload all textures
  # write descriptors for textures and uniform buffers

  # buffer allocation
  var
    indirectVertexSizes = 0'u64
    directVertexSizes = 0'u64
    indirectIndexSizes = 0'u64
    directIndexSizes = 0'u64
    indirectUniformSizes = 0'u64
    directUniformSizes = 0'u64

  # buffer allocation

  echo "Allocating GPU buffers"
  indirectVertexSizes += GetIndirectBufferSizes(myMesh1)
  indirectVertexSizes += GetIndirectBufferSizes(instances1)
  AllocateIndirectBuffer(renderdata, indirectVertexSizes, VertexBuffer)

  directVertexSizes += GetDirectBufferSizes(myMesh1)
  directVertexSizes += GetDirectBufferSizes(instances1)
  AllocateDirectBuffer(renderdata, directVertexSizes, VertexBuffer)

  indirectIndexSizes += GetIndirectIndexBufferSizes(myMesh1)
  AllocateIndirectBuffer(renderdata, indirectIndexSizes, IndexBuffer)

  directIndexSizes += GetDirectIndexBufferSizes(myMesh1)
  AllocateDirectBuffer(renderdata, directIndexSizes, IndexBuffer)

  indirectUniformSizes += GetIndirectBufferSizes(uniforms1)
  indirectUniformSizes += GetIndirectBufferSizes(myGlobals)
  AllocateIndirectBuffer(renderdata, indirectUniformSizes, UniformBuffer)

  directUniformSizes += GetDirectBufferSizes(uniforms1)
  directUniformSizes += GetDirectBufferSizes(myGlobals)
  AllocateDirectBuffer(renderdata, directUniformSizes, UniformBuffer)

  # buffer assignment
  #
  echo "Assigning buffers to GPUData fields"

  # for meshes we do:
  renderdata.AssignIndirectBuffers(VertexBuffer, myMesh1)
  renderdata.AssignDirectBuffers(VertexBuffer, myMesh1)
  renderdata.AssignIndirectBuffers(IndexBuffer, myMesh1)
  renderdata.AssignDirectBuffers(IndexBuffer, myMesh1)

  # for instances we do:
  renderdata.AssignIndirectBuffers(VertexBuffer, instances1)
  renderdata.AssignDirectBuffers(VertexBuffer, instances1)

  # for uniforms/globals we do:
  renderdata.AssignIndirectBuffers(UniformBuffer, uniforms1)
  renderdata.AssignDirectBuffers(UniformBuffer, uniforms1)
  renderdata.AssignIndirectBuffers(UniformBuffer, myGlobals)
  renderdata.AssignDirectBuffers(UniformBuffer, myGlobals)

  # buffer filling

  echo "Copying all data to GPU memory"

  # copy everything to GPU
  UpdateAllGPUBuffers(myMesh1)
  UpdateAllGPUBuffers(instances1)
  UpdateAllGPUBuffers(uniforms1)
  UpdateAllGPUBuffers(myGlobals)
  renderdata.FlushDirectMemory()


  # descriptors
  # TODO: I think we can write and assign descriptors directly after creation
  InitDescriptorSet(renderdata, pipeline1.descriptorSetLayouts[GlobalSet], myGlobals)
  InitDescriptorSet(renderdata, pipeline1.descriptorSetLayouts[MaterialSet], uniforms1)
  # WriteDescriptors[ShaderA, UniformsA, GlobalsA](renderdata, uniforms1, myGlobals)


  # command buffer
  var
    commandBufferPool: VkCommandPool
    createInfo = VkCommandPoolCreateInfo(
      sType: VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
      flags: toBits [VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT],
      queueFamilyIndex: vulkan.queueFamilyIndex,
    )
  checkVkResult vkCreateCommandPool(vulkan.device, addr createInfo, nil, addr commandBufferPool)
  var
    cmdBuffers: array[INFLIGHTFRAMES.int, VkCommandBuffer]
    allocInfo = VkCommandBufferAllocateInfo(
      sType: VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
      commandPool: commandBufferPool,
      level: VK_COMMAND_BUFFER_LEVEL_PRIMARY,
      commandBufferCount: INFLIGHTFRAMES,
    )
  checkVkResult vkAllocateCommandBuffers(vulkan.device, addr allocInfo, cmdBuffers.ToCPointer)

  # start command buffer
  block:
    let
      currentFramebuffer = VkFramebuffer(0) # TODO
      currentFrameInFlight = 1
      cmd = cmdBuffers[currentFrameInFlight]
      beginInfo = VkCommandBufferBeginInfo(
        sType: VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
        flags: VkCommandBufferUsageFlags(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT),
      )
    checkVkResult cmd.vkResetCommandBuffer(VkCommandBufferResetFlags(0))
    checkVkResult cmd.vkBeginCommandBuffer(addr(beginInfo))

    # start renderpass
    block:
      var
        clearColors = [VkClearValue(color: VkClearColorValue(float32: [0, 0, 0, 0]))]
        renderPassInfo = VkRenderPassBeginInfo(
          sType: VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
          renderPass: renderpass,
          framebuffer: currentFramebuffer, # TODO
          renderArea: VkRect2D(
            offset: VkOffset2D(x: 0, y: 0),
            extent: VkExtent2D(width: frameWidth, height: frameHeight),
          ),
          clearValueCount: uint32(clearColors.len),
          pClearValues: clearColors.ToCPointer(),
        )
        viewport = VkViewport(
          x: 0.0,
          y: 0.0,
          width: frameWidth.float32,
          height: frameHeight.float32,
          minDepth: 0.0,
          maxDepth: 1.0,
        )
        scissor = VkRect2D(
          offset: VkOffset2D(x: 0, y: 0),
          extent: VkExtent2D(width: frameWidth, height: frameHeight)
        )
      vkCmdBeginRenderPass(cmd, addr(renderPassInfo), VK_SUBPASS_CONTENTS_INLINE)

      # setup viewport
      vkCmdSetViewport(cmd, firstViewport = 0, viewportCount = 1, addr(viewport))
      vkCmdSetScissor(cmd, firstScissor = 0, scissorCount = 1, addr(scissor))

      # bind pipeline, will be loop
      block:
        Bind(pipeline1, cmd, currentFrameInFlight = currentFrameInFlight)

        # render object, will be loop
        block:
          Render(cmd, pipeline1, myGlobals, uniforms1, myMesh1, instances1)

      vkCmdEndRenderPass(cmd)
    checkVkResult cmd.vkEndCommandBuffer()