# HG changeset patch # User Sam # Date 1682761843 -25200 # Node ID f7e7af33e9ee403ac8cb786406aa6ad4dfbf7074 # Parent 83e990b0eb6a46dc7e895c60b0514f8008620c70 did: refactor memory selection diff -r 83e990b0eb6a -r f7e7af33e9ee examples/E10_pong.nim --- a/examples/E10_pong.nim Thu Apr 27 21:16:13 2023 +0700 +++ b/examples/E10_pong.nim Sat Apr 29 16:50:43 2023 +0700 @@ -33,9 +33,9 @@ const vertexInput = @[ - attr[Vec3f]("position", memoryLocation=VRAM), - attr[Vec3f]("color", memoryLocation=VRAMVisible), - attr[Mat4]("transform", memoryLocation=VRAMVisible, perInstance=true), + attr[Vec3f]("position"), + attr[Vec3f]("color", memoryPerformanceHint=PreferFastWrite), + attr[Mat4]("transform", memoryPerformanceHint=PreferFastWrite, perInstance=true), ] vertexOutput = @[attr[Vec3f]("outcolor")] uniforms = @[attr[Mat4]("projection")] diff -r 83e990b0eb6a -r f7e7af33e9ee src/semicongine/gpu_data.nim --- a/src/semicongine/gpu_data.nim Thu Apr 27 21:16:13 2023 +0700 +++ b/src/semicongine/gpu_data.nim Sat Apr 29 16:50:43 2023 +0700 @@ -139,13 +139,13 @@ of Mat43F64: mat43f64: seq[TMat43[float64]] of Mat4F32: mat4f32*: seq[TMat4[float32]] of Mat4F64: mat4f64: seq[TMat4[float64]] - MemoryLocation* = enum - VRAM, VRAMVisible, RAM # VRAM is fastest, VRAMVisible allows updating memory directly, may be slower + MemoryPerformanceHint* = enum + PreferFastRead, PreferFastWrite ShaderAttribute* = object name*: string thetype*: DataType perInstance*: bool - memoryLocation*: MemoryLocation + memoryPerformanceHint*: MemoryPerformanceHint func vertexInputs*(attributes: seq[ShaderAttribute]): seq[ShaderAttribute] = for attr in attributes: @@ -280,13 +280,13 @@ func attr*[T: GPUType]( name: string, perInstance=false, - memoryLocation=VRAMVisible, + memoryPerformanceHint=PreferFastRead, ): auto = ShaderAttribute( name: name, thetype: getDataType[T](), perInstance: perInstance, - memoryLocation: memoryLocation, + memoryPerformanceHint: memoryPerformanceHint, ) func get*[T: GPUType|int|uint|float](value: DataValue): T = @@ -824,12 +824,6 @@ of Mat4F32: "mat4" of Mat4F64: "dmat4" -func groupByMemoryLocation*(attributes: openArray[ShaderAttribute]): Table[MemoryLocation, seq[ShaderAttribute]] = - for attr in attributes: - if not (attr.memoryLocation in result): - result[attr.memoryLocation] = @[] - result[attr.memoryLocation].add attr - func glslInput*(group: seq[ShaderAttribute]): seq[string] = if group.len == 0: return @[] diff -r 83e990b0eb6a -r f7e7af33e9ee src/semicongine/renderer.nim --- a/src/semicongine/renderer.nim Thu Apr 27 21:16:13 2023 +0700 +++ b/src/semicongine/renderer.nim Sat Apr 29 16:50:43 2023 +0700 @@ -5,6 +5,7 @@ import ./vulkan/api import ./vulkan/buffer +import ./vulkan/memory import ./vulkan/device import ./vulkan/drawable import ./vulkan/pipeline @@ -20,9 +21,9 @@ type SceneData = object drawables*: OrderedTable[Mesh, Drawable] - vertexBuffers*: Table[MemoryLocation, Buffer] + vertexBuffers*: Table[MemoryPerformanceHint, Buffer] indexBuffer*: Buffer - attributeLocation*: Table[string, MemoryLocation] + attributeLocation*: Table[string, MemoryPerformanceHint] attributeBindingNumber*: Table[string, int] transformAttribute: string # name of attribute that is used for per-instance mesh transformation entityTransformationCache: Table[Mesh, Mat4] # remembers last transformation, avoid to send GPU-updates if no changes @@ -85,42 +86,42 @@ data.indexBuffer = renderer.device.createBuffer( size=indicesBufferSize, usage=[VK_BUFFER_USAGE_INDEX_BUFFER_BIT], + requireMappable=false, preferVRAM=true, - requiresMapping=false, ) # one vertex data buffer per memory location var - perLocationOffsets: Table[MemoryLocation, uint64] - perLocationSizes: Table[MemoryLocation, uint64] + perLocationOffsets: Table[MemoryPerformanceHint, uint64] + perLocationSizes: Table[MemoryPerformanceHint, uint64] bindingNumber = 0 for attribute in inputs: - data.attributeLocation[attribute.name] = attribute.memoryLocation + data.attributeLocation[attribute.name] = attribute.memoryPerformanceHint data.attributeBindingNumber[attribute.name] = bindingNumber inc bindingNumber # setup one buffer per attribute-location-type - if not (attribute.memoryLocation in perLocationSizes): - perLocationSizes[attribute.memoryLocation] = 0'u64 + if not (attribute.memoryPerformanceHint in perLocationSizes): + perLocationSizes[attribute.memoryPerformanceHint] = 0'u64 for mesh in allMeshes: - perLocationSizes[attribute.memoryLocation] += mesh.dataSize(attribute.name) - for location, bufferSize in perLocationSizes.pairs: + perLocationSizes[attribute.memoryPerformanceHint] += mesh.dataSize(attribute.name) + for memoryPerformanceHint, bufferSize in perLocationSizes.pairs: if bufferSize > 0: - data.vertexBuffers[location] = renderer.device.createBuffer( + data.vertexBuffers[memoryPerformanceHint] = renderer.device.createBuffer( size=bufferSize, usage=[VK_BUFFER_USAGE_VERTEX_BUFFER_BIT], - preferVRAM=location in [VRAM, VRAMVisible], - requiresMapping=location in [VRAMVisible, RAM], + requireMappable=memoryPerformanceHint==PreferFastWrite, + preferVRAM=true, ) - perLocationOffsets[location] = 0 + perLocationOffsets[memoryPerformanceHint] = 0 var indexBufferOffset = 0'u64 for mesh in allMeshes: - var offsets: seq[(MemoryLocation, uint64)] + var offsets: seq[(MemoryPerformanceHint, uint64)] for attribute in inputs: - offsets.add (attribute.memoryLocation, perLocationOffsets[attribute.memoryLocation]) + offsets.add (attribute.memoryPerformanceHint, perLocationOffsets[attribute.memoryPerformanceHint]) var (pdata, size) = mesh.getRawData(attribute.name) - data.vertexBuffers[attribute.memoryLocation].setData(pdata, size, perLocationOffsets[attribute.memoryLocation]) - perLocationOffsets[attribute.memoryLocation] += size + data.vertexBuffers[attribute.memoryPerformanceHint].setData(pdata, size, perLocationOffsets[attribute.memoryPerformanceHint]) + perLocationOffsets[attribute.memoryPerformanceHint] += size let indexed = mesh.indexType != None var drawable = Drawable( @@ -150,9 +151,9 @@ proc refreshMeshAttributeData(sceneData: var SceneData, mesh: Mesh, attribute: string) = debug &"Refreshing data on mesh {mesh} for {attribute}" var (pdata, size) = mesh.getRawData(attribute) - let memoryLocation = sceneData.attributeLocation[attribute] + let memoryPerformanceHint = sceneData.attributeLocation[attribute] let bindingNumber = sceneData.attributeBindingNumber[attribute] - sceneData.vertexBuffers[memoryLocation].setData(pdata, size, sceneData.drawables[mesh].bufferOffsets[bindingNumber][1]) + sceneData.vertexBuffers[memoryPerformanceHint].setData(pdata, size, sceneData.drawables[mesh].bufferOffsets[bindingNumber][1]) proc refreshMeshData*(renderer: var Renderer, scene: Entity) = diff -r 83e990b0eb6a -r f7e7af33e9ee src/semicongine/vulkan/buffer.nim --- a/src/semicongine/vulkan/buffer.nim Thu Apr 27 21:16:13 2023 +0700 +++ b/src/semicongine/vulkan/buffer.nim Sat Apr 29 16:50:43 2023 +0700 @@ -20,6 +20,11 @@ of false: discard of true: memory*: DeviceMemory + MemoryRequirements = object + size: uint64 + alignment: uint64 + memoryTypes: seq[MemoryType] + proc `==`*(a, b: Buffer): bool = a.vk == b.vk @@ -27,39 +32,43 @@ func `$`*(buffer: Buffer): string = &"Buffer(vk: {buffer.vk}, size: {buffer.size}, usage: {buffer.usage})" +proc requirements(buffer: Buffer): MemoryRequirements = + assert buffer.vk.valid + assert buffer.device.vk.valid + var req: VkMemoryRequirements + buffer.device.vk.vkGetBufferMemoryRequirements(buffer.vk, addr req) + result.size = req.size + result.alignment = req.alignment + let memorytypes = buffer.device.physicaldevice.vk.getMemoryProperties().types + for i in 0 ..< sizeof(req.memoryTypeBits) * 8: + if ((req.memoryTypeBits shr i) and 1) == 1: + result.memoryTypes.add memorytypes[i] -proc allocateMemory(buffer: var Buffer, preferVRAM: bool, requiresMapping: bool, autoFlush: bool) = +proc allocateMemory(buffer: var Buffer, requireMappable: bool, preferVRAM: bool, preferAutoFlush: bool) = assert buffer.device.vk.valid assert buffer.memoryAllocated == false - var flags: seq[VkMemoryPropertyFlagBits] - if requiresMapping: - flags.add VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT - - if preferVRAM and buffer.device.hasMemoryWith(flags & @[VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT]): - flags.add VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT - - if requiresMapping and autoFlush and buffer.device.hasMemoryWith(flags & @[VK_MEMORY_PROPERTY_HOST_COHERENT_BIT]): - flags.add VK_MEMORY_PROPERTY_HOST_COHERENT_BIT - - assert buffer.device.hasMemoryWith(flags) - + let requirements = buffer.requirements() + let memoryType = requirements.memoryTypes.selectBestMemoryType( + requireMappable=requireMappable, + preferVRAM=preferVRAM, + preferAutoFlush=preferAutoFlush + ) buffer.memoryAllocated = true - debug "Allocating memory for buffer: ", buffer.size, " bytes ", flags - buffer.memory = buffer.device.allocate(buffer.size, flags) + debug "Allocating memory for buffer: ", buffer.size, " bytes of type ", memoryType + buffer.memory = buffer.device.allocate(requirements.size, memoryType) if buffer.memory.canMap: checkVkResult buffer.device.vk.vkBindBufferMemory(buffer.vk, buffer.memory.vk, VkDeviceSize(0)) - # currently no support for extended structure and concurrent/shared use # (shardingMode = VK_SHARING_MODE_CONCURRENT not supported) proc createBuffer*( device: Device, size: uint64, usage: openArray[VkBufferUsageFlagBits], + requireMappable: bool, preferVRAM: bool, - requiresMapping: bool, - autoFlush=true, + preferAutoFlush=true, ): Buffer = assert device.vk.valid assert size > 0 @@ -67,7 +76,7 @@ result.device = device result.size = size result.usage = usage.toSeq - if not (requiresMapping or VK_BUFFER_USAGE_TRANSFER_DST_BIT in result.usage): + if not (requireMappable or VK_BUFFER_USAGE_TRANSFER_DST_BIT in result.usage): result.usage.add VK_BUFFER_USAGE_TRANSFER_DST_BIT var createInfo = VkBufferCreateInfo( sType: VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, @@ -83,7 +92,7 @@ pAllocator=nil, pBuffer=addr result.vk ) - result.allocateMemory(preferVRAM, requiresMapping, autoFlush) + result.allocateMemory(requireMappable=requireMappable, preferVRAM=preferVRAM, preferAutoFlush=preferAutoFlush) proc copy*(src, dst: Buffer) = @@ -139,7 +148,7 @@ if dst.memory.needsFlushing: dst.memory.flush() else: # use staging buffer, slower but required if memory is not host visible - var stagingBuffer = dst.device.createBuffer(size, [VK_BUFFER_USAGE_TRANSFER_SRC_BIT], preferVRAM=false, requiresMapping=true, autoFlush=true) + var stagingBuffer = dst.device.createBuffer(size, [VK_BUFFER_USAGE_TRANSFER_SRC_BIT], requireMappable=true, preferVRAM=false, preferAutoFlush=true) stagingBuffer.setData(src, size, 0) stagingBuffer.copy(dst) stagingBuffer.destroy() diff -r 83e990b0eb6a -r f7e7af33e9ee src/semicongine/vulkan/drawable.nim --- a/src/semicongine/vulkan/drawable.nim Thu Apr 27 21:16:13 2023 +0700 +++ b/src/semicongine/vulkan/drawable.nim Sat Apr 29 16:50:43 2023 +0700 @@ -5,13 +5,14 @@ import ./api import ./utils import ./buffer +import ./memory import ../gpu_data type Drawable* = object elementCount*: uint32 # number of vertices or indices - bufferOffsets*: seq[(MemoryLocation, uint64)] # list of buffers and list of offset for each attribute in that buffer + bufferOffsets*: seq[(MemoryPerformanceHint, uint64)] # list of buffers and list of offset for each attribute in that buffer instanceCount*: uint32 # number of instance case indexed*: bool of true: @@ -26,14 +27,14 @@ else: &"Drawable(elementCount: {drawable.elementCount}, instanceCount: {drawable.instanceCount}, bufferOffsets: {drawable.bufferOffsets})" -proc draw*(commandBuffer: VkCommandBuffer, drawable: Drawable, vertexBuffers: Table[MemoryLocation, Buffer], indexBuffer: BUffer) = +proc draw*(commandBuffer: VkCommandBuffer, drawable: Drawable, vertexBuffers: Table[MemoryPerformanceHint, Buffer], indexBuffer: BUffer) = debug "Draw ", drawable var buffers: seq[VkBuffer] var offsets: seq[VkDeviceSize] - for (location, offset) in drawable.bufferOffsets: - buffers.add vertexBuffers[location].vk + for (performanceHint, offset) in drawable.bufferOffsets: + buffers.add vertexBuffers[performanceHint].vk offsets.add VkDeviceSize(offset) commandBuffer.vkCmdBindVertexBuffers( diff -r 83e990b0eb6a -r f7e7af33e9ee src/semicongine/vulkan/memory.nim --- a/src/semicongine/vulkan/memory.nim Thu Apr 27 21:16:13 2023 +0700 +++ b/src/semicongine/vulkan/memory.nim Sat Apr 29 16:50:43 2023 +0700 @@ -1,5 +1,4 @@ import std/strformat -import std/algorithm import ./api import ./device @@ -9,7 +8,7 @@ size*: uint64 flags*: seq[VkMemoryHeapFlagBits] index*: uint32 - MemoryType = object + MemoryType* = object heap*: MemoryHeap flags*: seq[VkMemoryPropertyFlagBits] index*: uint32 @@ -26,7 +25,27 @@ of true: data*: pointer needsFlushing*: bool -proc getPhysicalDeviceMemoryProperties(physicalDevice: VkPhysicalDevice): PhyscialDeviceMemoryProperties = +func `$`*(memoryType: MemoryType): string = + &"Memorytype {memoryType.flags} (heap size: {memoryType.heap.size}, heap flags: {memoryType.heap.flags})" + +proc selectBestMemoryType*(types: seq[MemoryType], requireMappable: bool, preferVRAM: bool, preferAutoFlush: bool): MemoryType = + # todo: we assume there is always at least one memory type that is mappable + assert types.len > 0 + var highestRating = 0'f + result = types[0] + for t in types: + var rating = float(t.heap.size) / 1_000_000'f # select biggest heap if all else equal + if requireMappable and VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT in t.flags: + rating += 1000 + if preferVRAM and VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT in t.flags: + rating += 500 + if preferAutoFlush and VK_MEMORY_PROPERTY_HOST_COHERENT_BIT in t.flags: + rating += 100 + if rating > highestRating: + highestRating = rating + result = t + +proc getMemoryProperties*(physicalDevice: VkPhysicalDevice): PhyscialDeviceMemoryProperties = var physicalProperties: VkPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties(physicalDevice, addr physicalProperties) for i in 0 ..< physicalProperties.memoryHeapCount: @@ -42,39 +61,13 @@ index: i, ) -proc hasMemoryWith*(device: Device, requiredFlags: openArray[VkMemoryPropertyFlagBits]): bool = - for mtype in device.physicalDevice.vk.getPhysicalDeviceMemoryProperties.types: - var hasAllFlags = true - for flag in requiredFlags: - if not (flag in mtype.flags): - hasAllFlags = false - break - if hasAllFlags: - return true - -proc allocate*(device: Device, size: uint64, flags: openArray[VkMemoryPropertyFlagBits]): DeviceMemory = +proc allocate*(device: Device, size: uint64, memoryType: MemoryType): DeviceMemory = assert device.vk.valid assert size > 0 result.device = device result.size = size - - var - hasAllFlags: bool - matchingTypes: seq[MemoryType] - for mtype in device.physicalDevice.vk.getPhysicalDeviceMemoryProperties.types: - hasAllFlags = true - for flag in flags: - if not (flag in mtype.flags): - hasAllFlags = false - break - if hasAllFlags: - matchingTypes.add mtype - if matchingTypes.len == 0: - raise newException(Exception, &"No memory with support for {flags}") - matchingTypes.sort(cmp= proc(a, b: MemoryType): int = cmp(a.heap.size, b.heap.size)) - - result.memoryType = matchingTypes[^1] + result.memoryType = memoryType result.canMap = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT in result.memoryType.flags result.needsFlushing = not (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT in result.memoryType.flags) diff -r 83e990b0eb6a -r f7e7af33e9ee src/semicongine/vulkan/pipeline.nim --- a/src/semicongine/vulkan/pipeline.nim Thu Apr 27 21:16:13 2023 +0700 +++ b/src/semicongine/vulkan/pipeline.nim Sat Apr 29 16:50:43 2023 +0700 @@ -50,8 +50,8 @@ var buffer = pipeline.device.createBuffer( size=uniformBufferSize, usage=[VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT], + requireMappable=true, preferVRAM=true, - requiresMapping=true, ) pipeline.uniformBuffers.add buffer pipeline.descriptorSets[i].setDescriptorSet(buffer)