changeset 156:134647ed5b60

did: refactor memory selection
author Sam <sam@basx.dev>
date Sat, 29 Apr 2023 16:50:43 +0700
parents 321d3de6c78c
children 041a114db1dc
files examples/E10_pong.nim src/semicongine/gpu_data.nim src/semicongine/renderer.nim src/semicongine/vulkan/buffer.nim src/semicongine/vulkan/drawable.nim src/semicongine/vulkan/memory.nim src/semicongine/vulkan/pipeline.nim
diffstat 7 files changed, 89 insertions(+), 91 deletions(-) [+]
line wrap: on
line diff
--- a/examples/E10_pong.nim	Thu Apr 27 21:16:13 2023 +0700
+++ b/examples/E10_pong.nim	Sat Apr 29 16:50:43 2023 +0700
@@ -33,9 +33,9 @@
 
   const
     vertexInput = @[
-      attr[Vec3f]("position", memoryLocation=VRAM),
-      attr[Vec3f]("color", memoryLocation=VRAMVisible),
-      attr[Mat4]("transform", memoryLocation=VRAMVisible, perInstance=true),
+      attr[Vec3f]("position"),
+      attr[Vec3f]("color", memoryPerformanceHint=PreferFastWrite),
+      attr[Mat4]("transform", memoryPerformanceHint=PreferFastWrite, perInstance=true),
     ]
     vertexOutput = @[attr[Vec3f]("outcolor")]
     uniforms = @[attr[Mat4]("projection")]
--- a/src/semicongine/gpu_data.nim	Thu Apr 27 21:16:13 2023 +0700
+++ b/src/semicongine/gpu_data.nim	Sat Apr 29 16:50:43 2023 +0700
@@ -139,13 +139,13 @@
     of Mat43F64: mat43f64: seq[TMat43[float64]]
     of Mat4F32: mat4f32*: seq[TMat4[float32]]
     of Mat4F64: mat4f64: seq[TMat4[float64]]
-  MemoryLocation* = enum
-    VRAM, VRAMVisible, RAM # VRAM is fastest, VRAMVisible allows updating memory directly, may be slower
+  MemoryPerformanceHint* = enum
+    PreferFastRead, PreferFastWrite
   ShaderAttribute* = object
     name*: string
     thetype*: DataType
     perInstance*: bool
-    memoryLocation*: MemoryLocation
+    memoryPerformanceHint*: MemoryPerformanceHint
 
 func vertexInputs*(attributes: seq[ShaderAttribute]): seq[ShaderAttribute] =
   for attr in attributes:
@@ -280,13 +280,13 @@
 func attr*[T: GPUType](
   name: string,
   perInstance=false,
-  memoryLocation=VRAMVisible,
+  memoryPerformanceHint=PreferFastRead,
 ): auto =
   ShaderAttribute(
     name: name,
     thetype: getDataType[T](),
     perInstance: perInstance,
-    memoryLocation: memoryLocation,
+    memoryPerformanceHint: memoryPerformanceHint,
   )
 
 func get*[T: GPUType|int|uint|float](value: DataValue): T =
@@ -824,12 +824,6 @@
     of Mat4F32: "mat4"
     of Mat4F64: "dmat4"
 
-func groupByMemoryLocation*(attributes: openArray[ShaderAttribute]): Table[MemoryLocation, seq[ShaderAttribute]] =
-  for attr in attributes:
-    if not (attr.memoryLocation in result):
-      result[attr.memoryLocation] = @[]
-    result[attr.memoryLocation].add attr
-
 func glslInput*(group: seq[ShaderAttribute]): seq[string] =
   if group.len == 0:
     return @[]
--- a/src/semicongine/renderer.nim	Thu Apr 27 21:16:13 2023 +0700
+++ b/src/semicongine/renderer.nim	Sat Apr 29 16:50:43 2023 +0700
@@ -5,6 +5,7 @@
 
 import ./vulkan/api
 import ./vulkan/buffer
+import ./vulkan/memory
 import ./vulkan/device
 import ./vulkan/drawable
 import ./vulkan/pipeline
@@ -20,9 +21,9 @@
 type
   SceneData = object
     drawables*: OrderedTable[Mesh, Drawable]
-    vertexBuffers*: Table[MemoryLocation, Buffer]
+    vertexBuffers*: Table[MemoryPerformanceHint, Buffer]
     indexBuffer*: Buffer
-    attributeLocation*: Table[string, MemoryLocation]
+    attributeLocation*: Table[string, MemoryPerformanceHint]
     attributeBindingNumber*: Table[string, int]
     transformAttribute: string # name of attribute that is used for per-instance mesh transformation
     entityTransformationCache: Table[Mesh, Mat4] # remembers last transformation, avoid to send GPU-updates if no changes
@@ -85,42 +86,42 @@
     data.indexBuffer = renderer.device.createBuffer(
       size=indicesBufferSize,
       usage=[VK_BUFFER_USAGE_INDEX_BUFFER_BIT],
+      requireMappable=false,
       preferVRAM=true,
-      requiresMapping=false,
     )
 
   # one vertex data buffer per memory location
   var
-    perLocationOffsets: Table[MemoryLocation, uint64]
-    perLocationSizes: Table[MemoryLocation, uint64]
+    perLocationOffsets: Table[MemoryPerformanceHint, uint64]
+    perLocationSizes: Table[MemoryPerformanceHint, uint64]
     bindingNumber = 0
   for attribute in inputs:
-    data.attributeLocation[attribute.name] = attribute.memoryLocation
+    data.attributeLocation[attribute.name] = attribute.memoryPerformanceHint
     data.attributeBindingNumber[attribute.name] = bindingNumber
     inc bindingNumber
     # setup one buffer per attribute-location-type
-    if not (attribute.memoryLocation in perLocationSizes):
-      perLocationSizes[attribute.memoryLocation] = 0'u64
+    if not (attribute.memoryPerformanceHint in perLocationSizes):
+      perLocationSizes[attribute.memoryPerformanceHint] = 0'u64
     for mesh in allMeshes:
-      perLocationSizes[attribute.memoryLocation] += mesh.dataSize(attribute.name)
-  for location, bufferSize in perLocationSizes.pairs:
+      perLocationSizes[attribute.memoryPerformanceHint] += mesh.dataSize(attribute.name)
+  for memoryPerformanceHint, bufferSize in perLocationSizes.pairs:
     if bufferSize > 0:
-      data.vertexBuffers[location] = renderer.device.createBuffer(
+      data.vertexBuffers[memoryPerformanceHint] = renderer.device.createBuffer(
         size=bufferSize,
         usage=[VK_BUFFER_USAGE_VERTEX_BUFFER_BIT],
-        preferVRAM=location in [VRAM, VRAMVisible],
-        requiresMapping=location in [VRAMVisible, RAM],
+        requireMappable=memoryPerformanceHint==PreferFastWrite,
+        preferVRAM=true,
       )
-      perLocationOffsets[location] = 0
+      perLocationOffsets[memoryPerformanceHint] = 0
 
   var indexBufferOffset = 0'u64
   for mesh in allMeshes:
-    var offsets: seq[(MemoryLocation, uint64)]
+    var offsets: seq[(MemoryPerformanceHint, uint64)]
     for attribute in inputs:
-      offsets.add (attribute.memoryLocation, perLocationOffsets[attribute.memoryLocation])
+      offsets.add (attribute.memoryPerformanceHint, perLocationOffsets[attribute.memoryPerformanceHint])
       var (pdata, size) = mesh.getRawData(attribute.name)
-      data.vertexBuffers[attribute.memoryLocation].setData(pdata, size, perLocationOffsets[attribute.memoryLocation])
-      perLocationOffsets[attribute.memoryLocation] += size
+      data.vertexBuffers[attribute.memoryPerformanceHint].setData(pdata, size, perLocationOffsets[attribute.memoryPerformanceHint])
+      perLocationOffsets[attribute.memoryPerformanceHint] += size
 
     let indexed = mesh.indexType != None
     var drawable = Drawable(
@@ -150,9 +151,9 @@
 proc refreshMeshAttributeData(sceneData: var SceneData, mesh: Mesh, attribute: string) =
   debug &"Refreshing data on mesh {mesh} for {attribute}"
   var (pdata, size) = mesh.getRawData(attribute)
-  let memoryLocation = sceneData.attributeLocation[attribute]
+  let memoryPerformanceHint = sceneData.attributeLocation[attribute]
   let bindingNumber = sceneData.attributeBindingNumber[attribute]
-  sceneData.vertexBuffers[memoryLocation].setData(pdata, size, sceneData.drawables[mesh].bufferOffsets[bindingNumber][1])
+  sceneData.vertexBuffers[memoryPerformanceHint].setData(pdata, size, sceneData.drawables[mesh].bufferOffsets[bindingNumber][1])
 
 
 proc refreshMeshData*(renderer: var Renderer, scene: Entity) =
--- a/src/semicongine/vulkan/buffer.nim	Thu Apr 27 21:16:13 2023 +0700
+++ b/src/semicongine/vulkan/buffer.nim	Sat Apr 29 16:50:43 2023 +0700
@@ -20,6 +20,11 @@
       of false: discard
       of true:
         memory*: DeviceMemory
+  MemoryRequirements = object
+    size: uint64
+    alignment: uint64
+    memoryTypes: seq[MemoryType]
+
 
 proc `==`*(a, b: Buffer): bool =
   a.vk == b.vk
@@ -27,39 +32,43 @@
 func `$`*(buffer: Buffer): string =
   &"Buffer(vk: {buffer.vk}, size: {buffer.size}, usage: {buffer.usage})"
 
+proc requirements(buffer: Buffer): MemoryRequirements =
+  assert buffer.vk.valid
+  assert buffer.device.vk.valid
+  var req: VkMemoryRequirements
+  buffer.device.vk.vkGetBufferMemoryRequirements(buffer.vk, addr req)
+  result.size = req.size
+  result.alignment = req.alignment
+  let memorytypes = buffer.device.physicaldevice.vk.getMemoryProperties().types
+  for i in 0 ..< sizeof(req.memoryTypeBits) * 8:
+    if ((req.memoryTypeBits shr i) and 1) == 1:
+      result.memoryTypes.add memorytypes[i]
 
-proc allocateMemory(buffer: var Buffer, preferVRAM: bool, requiresMapping: bool, autoFlush: bool) =
+proc allocateMemory(buffer: var Buffer, requireMappable: bool, preferVRAM: bool, preferAutoFlush: bool) =
   assert buffer.device.vk.valid
   assert buffer.memoryAllocated == false
 
-  var flags: seq[VkMemoryPropertyFlagBits]
-  if requiresMapping:
-    flags.add VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
-
-  if preferVRAM and buffer.device.hasMemoryWith(flags & @[VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT]):
-    flags.add VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
-
-  if requiresMapping and autoFlush and buffer.device.hasMemoryWith(flags & @[VK_MEMORY_PROPERTY_HOST_COHERENT_BIT]):
-    flags.add VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
-
-  assert buffer.device.hasMemoryWith(flags)
-
+  let requirements = buffer.requirements()
+  let memoryType = requirements.memoryTypes.selectBestMemoryType(
+    requireMappable=requireMappable,
+    preferVRAM=preferVRAM,
+    preferAutoFlush=preferAutoFlush
+  )
   buffer.memoryAllocated = true
-  debug "Allocating memory for buffer: ", buffer.size, " bytes ", flags
-  buffer.memory = buffer.device.allocate(buffer.size, flags)
+  debug "Allocating memory for buffer: ", buffer.size, " bytes of type ", memoryType
+  buffer.memory = buffer.device.allocate(requirements.size, memoryType)
   if buffer.memory.canMap:
     checkVkResult buffer.device.vk.vkBindBufferMemory(buffer.vk, buffer.memory.vk, VkDeviceSize(0))
 
-
 # currently no support for extended structure and concurrent/shared use
 # (shardingMode = VK_SHARING_MODE_CONCURRENT not supported)
 proc createBuffer*(
   device: Device,
   size: uint64,
   usage: openArray[VkBufferUsageFlagBits],
+  requireMappable: bool,
   preferVRAM: bool,
-  requiresMapping: bool,
-  autoFlush=true,
+  preferAutoFlush=true,
 ): Buffer =
   assert device.vk.valid
   assert size > 0
@@ -67,7 +76,7 @@
   result.device = device
   result.size = size
   result.usage = usage.toSeq
-  if not (requiresMapping or VK_BUFFER_USAGE_TRANSFER_DST_BIT in result.usage):
+  if not (requireMappable or VK_BUFFER_USAGE_TRANSFER_DST_BIT in result.usage):
     result.usage.add VK_BUFFER_USAGE_TRANSFER_DST_BIT
   var createInfo = VkBufferCreateInfo(
     sType: VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
@@ -83,7 +92,7 @@
     pAllocator=nil,
     pBuffer=addr result.vk
   )
-  result.allocateMemory(preferVRAM, requiresMapping, autoFlush)
+  result.allocateMemory(requireMappable=requireMappable, preferVRAM=preferVRAM, preferAutoFlush=preferAutoFlush)
 
 
 proc copy*(src, dst: Buffer) =
@@ -139,7 +148,7 @@
     if dst.memory.needsFlushing:
       dst.memory.flush()
   else: # use staging buffer, slower but required if memory is not host visible
-    var stagingBuffer = dst.device.createBuffer(size, [VK_BUFFER_USAGE_TRANSFER_SRC_BIT], preferVRAM=false, requiresMapping=true, autoFlush=true)
+    var stagingBuffer = dst.device.createBuffer(size, [VK_BUFFER_USAGE_TRANSFER_SRC_BIT], requireMappable=true, preferVRAM=false, preferAutoFlush=true)
     stagingBuffer.setData(src, size, 0)
     stagingBuffer.copy(dst)
     stagingBuffer.destroy()
--- a/src/semicongine/vulkan/drawable.nim	Thu Apr 27 21:16:13 2023 +0700
+++ b/src/semicongine/vulkan/drawable.nim	Sat Apr 29 16:50:43 2023 +0700
@@ -5,13 +5,14 @@
 import ./api
 import ./utils
 import ./buffer
+import ./memory
 
 import ../gpu_data
 
 type
   Drawable* = object
     elementCount*: uint32 # number of vertices or indices
-    bufferOffsets*: seq[(MemoryLocation, uint64)] # list of buffers and list of offset for each attribute in that buffer
+    bufferOffsets*: seq[(MemoryPerformanceHint, uint64)] # list of buffers and list of offset for each attribute in that buffer
     instanceCount*: uint32 # number of instance
     case indexed*: bool
     of true:
@@ -26,14 +27,14 @@
   else:
     &"Drawable(elementCount: {drawable.elementCount}, instanceCount: {drawable.instanceCount}, bufferOffsets: {drawable.bufferOffsets})"
 
-proc draw*(commandBuffer: VkCommandBuffer, drawable: Drawable, vertexBuffers: Table[MemoryLocation, Buffer], indexBuffer: BUffer) =
+proc draw*(commandBuffer: VkCommandBuffer, drawable: Drawable, vertexBuffers: Table[MemoryPerformanceHint, Buffer], indexBuffer: BUffer) =
     debug "Draw ", drawable
 
     var buffers: seq[VkBuffer]
     var offsets: seq[VkDeviceSize]
 
-    for (location, offset) in drawable.bufferOffsets:
-      buffers.add vertexBuffers[location].vk
+    for (performanceHint, offset) in drawable.bufferOffsets:
+      buffers.add vertexBuffers[performanceHint].vk
       offsets.add VkDeviceSize(offset)
 
     commandBuffer.vkCmdBindVertexBuffers(
--- a/src/semicongine/vulkan/memory.nim	Thu Apr 27 21:16:13 2023 +0700
+++ b/src/semicongine/vulkan/memory.nim	Sat Apr 29 16:50:43 2023 +0700
@@ -1,5 +1,4 @@
 import std/strformat
-import std/algorithm
 
 import ./api
 import ./device
@@ -9,7 +8,7 @@
     size*: uint64
     flags*: seq[VkMemoryHeapFlagBits]
     index*: uint32
-  MemoryType = object
+  MemoryType* = object
     heap*: MemoryHeap
     flags*: seq[VkMemoryPropertyFlagBits]
     index*: uint32
@@ -26,7 +25,27 @@
       of true: data*: pointer
     needsFlushing*: bool
 
-proc getPhysicalDeviceMemoryProperties(physicalDevice: VkPhysicalDevice): PhyscialDeviceMemoryProperties =
+func `$`*(memoryType: MemoryType): string =
+  &"Memorytype {memoryType.flags} (heap size: {memoryType.heap.size}, heap flags: {memoryType.heap.flags})"
+
+proc selectBestMemoryType*(types: seq[MemoryType], requireMappable: bool, preferVRAM: bool, preferAutoFlush: bool): MemoryType =
+  # todo: we assume there is always at least one memory type that is mappable
+  assert types.len > 0
+  var highestRating = 0'f
+  result = types[0]
+  for t in types:
+    var rating = float(t.heap.size) / 1_000_000'f # select biggest heap if all else equal
+    if requireMappable and VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT in t.flags:
+      rating += 1000
+    if preferVRAM and VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT in t.flags:
+      rating += 500
+    if preferAutoFlush and VK_MEMORY_PROPERTY_HOST_COHERENT_BIT in t.flags:
+      rating += 100
+    if rating > highestRating:
+      highestRating = rating
+      result = t
+
+proc getMemoryProperties*(physicalDevice: VkPhysicalDevice): PhyscialDeviceMemoryProperties =
   var physicalProperties: VkPhysicalDeviceMemoryProperties
   vkGetPhysicalDeviceMemoryProperties(physicalDevice, addr physicalProperties)
   for i in 0 ..< physicalProperties.memoryHeapCount:
@@ -42,39 +61,13 @@
       index: i,
     )
 
-proc hasMemoryWith*(device: Device, requiredFlags: openArray[VkMemoryPropertyFlagBits]): bool =
-  for mtype in device.physicalDevice.vk.getPhysicalDeviceMemoryProperties.types:
-    var hasAllFlags = true
-    for flag in requiredFlags:
-      if not (flag in mtype.flags):
-        hasAllFlags = false
-        break
-    if hasAllFlags:
-      return true
-
-proc allocate*(device: Device, size: uint64, flags: openArray[VkMemoryPropertyFlagBits]): DeviceMemory =
+proc allocate*(device: Device, size: uint64, memoryType: MemoryType): DeviceMemory =
   assert device.vk.valid
   assert size > 0
 
   result.device = device
   result.size = size
-
-  var
-    hasAllFlags: bool
-    matchingTypes: seq[MemoryType]
-  for mtype in device.physicalDevice.vk.getPhysicalDeviceMemoryProperties.types:
-    hasAllFlags = true
-    for flag in flags:
-      if not (flag in mtype.flags):
-        hasAllFlags = false
-        break
-    if hasAllFlags:
-      matchingTypes.add mtype
-  if matchingTypes.len == 0:
-    raise newException(Exception, &"No memory with support for {flags}")
-  matchingTypes.sort(cmp= proc(a, b: MemoryType): int = cmp(a.heap.size, b.heap.size))
-
-  result.memoryType = matchingTypes[^1]
+  result.memoryType = memoryType
   result.canMap = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT in result.memoryType.flags
   result.needsFlushing = not (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT in result.memoryType.flags)
 
--- a/src/semicongine/vulkan/pipeline.nim	Thu Apr 27 21:16:13 2023 +0700
+++ b/src/semicongine/vulkan/pipeline.nim	Sat Apr 29 16:50:43 2023 +0700
@@ -50,8 +50,8 @@
     var buffer = pipeline.device.createBuffer(
       size=uniformBufferSize,
       usage=[VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT],
+      requireMappable=true,
       preferVRAM=true,
-      requiresMapping=true,
     )
     pipeline.uniformBuffers.add buffer
     pipeline.descriptorSets[i].setDescriptorSet(buffer)