changeset 581:87e500bd528a

add: better mesh support, indexed mesh
author Sam <sam@basx.dev>
date Thu, 06 Apr 2023 00:30:48 +0700
parents 3c4be9e13570
children 8cf5d3f921ae
files src/semicongine/gpu_data.nim src/semicongine/mesh.nim src/semicongine/scene.nim src/semicongine/vulkan/buffer.nim src/semicongine/vulkan/memory.nim src/semicongine/vulkan/swapchain.nim tests/test_vulkan_wrapper.nim
diffstat 7 files changed, 353 insertions(+), 139 deletions(-) [+]
line wrap: on
line diff
--- a/src/semicongine/gpu_data.nim	Wed Apr 05 00:45:16 2023 +0700
+++ b/src/semicongine/gpu_data.nim	Thu Apr 06 00:30:48 2023 +0700
@@ -19,13 +19,15 @@
     UInt16
     UInt32
     UInt64
+  MemoryLocation* = enum
+    VRAM, VRAMVisible, RAM # VRAM is fastest, VRAMVisible allows updating memory directly, may be slower
   Attribute* = object
     name*: string
     thetype*: DataType
     components*: CountType # how many components the vectors has (1 means scalar)
     rows*: CountType # used to split matrices into rows of vectors
     perInstance*: bool
-    useGPULocalMemory*: bool
+    memoryLocation*: MemoryLocation
   AttributeGroup* = object
     attributes*: seq[Attribute]
 
@@ -42,8 +44,22 @@
     if attr.perInstance == false:
       result.add attr
 
-func attr*(name: string, thetype: DataType, components=CountType(1), rows=CountType(1), perInstance=false): auto =
-  Attribute(name: name, thetype: thetype, components: components, rows: rows, perInstance: perInstance)
+func attr*(
+  name: string,
+  thetype: DataType,
+  components=CountType(1),
+  rows=CountType(1),
+  perInstance=false,
+  memoryLocation=VRAMVisible,
+): auto =
+  Attribute(
+    name: name,
+    thetype: thetype,
+    components: components,
+    rows: rows,
+    perInstance: perInstance,
+    memoryLocation: memoryLocation,
+  )
 
 func size*(thetype: DataType): uint32 =
   case thetype:
@@ -242,3 +258,9 @@
   for attribute in group.attributes:
     result.add &"layout(location = {i}) out {attribute.glslType} {attribute.name};"
     i += 1
+
+func groupByMemoryLocation*(attributes: openArray[Attribute]): Table[MemoryLocation, seq[Attribute]] =
+  for attr in attributes:
+    if not (attr.memoryLocation in result):
+      result[attr.memoryLocation] = @[]
+    result[attr.memoryLocation].add attr
--- a/src/semicongine/mesh.nim	Wed Apr 05 00:45:16 2023 +0700
+++ b/src/semicongine/mesh.nim	Thu Apr 06 00:30:48 2023 +0700
@@ -5,15 +5,17 @@
 import std/sequtils
 
 import ./vulkan/utils
+import ./vulkan/api
 import ./gpu_data
 import ./entity
 import ./math
 
 type
   SurfaceDataType = enum
-    Position, Color, Normal, Tangent, BiTangent, TextureCoordinate, Index, BigIndex
+    Position, Color, Normal, Tangent, BiTangent, TextureCoordinate
   MeshIndexType* = enum
     None
+    Tiny # up to 2^8 vertices
     Small # up to 2^16 vertices
     Big # up to 2^32 vertices
   MeshData = object
@@ -24,19 +26,28 @@
       of Tangent: tangent: seq[Vec3f]
       of BiTangent: bitangent: seq[Vec3f]
       of TextureCoordinate: texturecoord: seq[Vec2f]
-      of Index: index: seq[uint16]
-      of BigIndex: bigindex: seq[uint32]
   Mesh* = ref object of Component
     vertexCount*: uint32
     data: Table[Attribute, MeshData]
     case indexType*: MeshIndexType
-    of None:
-      discard
-    of Small:
-      smallIndices*: seq[array[3, uint16]]
-    of Big:
-      bigIndices*: seq[array[3, uint32]]
+      of None: discard
+      of Tiny: tinyIndices*: seq[array[3, uint8]]
+      of Small: smallIndices*: seq[array[3, uint16]]
+      of Big: bigIndices*: seq[array[3, uint32]]
 
+converter toVulkan*(indexType: MeshIndexType): VkIndexType =
+  case indexType:
+    of None: VK_INDEX_TYPE_NONE_KHR
+    of Tiny: VK_INDEX_TYPE_UINT8_EXT
+    of Small: VK_INDEX_TYPE_UINT16
+    of Big: VK_INDEX_TYPE_UINT32
+
+func indicesCount*(mesh: Mesh): uint32 =
+  case mesh.indexType:
+    of None: 0
+    of Tiny: mesh.tinyIndices.len * 3
+    of Small: mesh.smallIndices.len * 3
+    of Big: mesh.bigIndices.len * 3
 
 method `$`*(mesh: Mesh): string =
   &"Mesh ({mesh.vertexCount})"
@@ -51,28 +62,70 @@
     result.data[asAttribute(default(Vec3f), "color")] = MeshData(thetype: Color, color: colors.toSeq)
 
 
-func newMesh*(positions: openArray[Vec3f], indices: openArray[array[3, uint32|int32]]): auto =
-  let meshdata = {asAttribute(default(Vec3f), "position"): MeshData(thetype: Position, position: positions.toSeq)}.toTable
-  if uint16(positions.len) < high(uint16):
-    var smallIndices = newSeq[array[3, uint16]](indices.len)
+func newMesh*(positions: openArray[Vec3f], colors: openArray[Vec3f]=[], indices: openArray[array[3, uint32|int32|int]], autoResize=true): auto =
+  assert colors.len == 0 or colors.len == positions.len
+
+  result = new Mesh
+  result.vertexCount = uint32(positions.len)
+  result.data[asAttribute(default(Vec3f), "position")] = MeshData(thetype: Position, position: positions.toSeq)
+  if colors.len > 0:
+    result.data[asAttribute(default(Vec3f), "color")] = MeshData(thetype: Color, color: colors.toSeq)
+
+  for i in indices:
+    assert uint32(i[0]) < result.vertexCount
+    assert uint32(i[1]) < result.vertexCount
+    assert uint32(i[2]) < result.vertexCount
+
+  if autoResize and uint32(positions.len) < uint32(high(uint8)) and false: # todo: check feature support
+    result.indexType = Tiny
     for i, tri in enumerate(indices):
-      smallIndices[i] = [uint16(tri[0]), uint16(tri[1]), uint16(tri[3])]
-    Mesh(vertexCount: uint32(positions.len), data: meshdata, indexType: Small, smallIndices: smallIndices)
+      result.tinyIndices.add [uint8(tri[0]), uint8(tri[1]), uint8(tri[2])]
+  elif autoResize and uint32(positions.len) < uint32(high(uint16)):
+    result.indexType = Small
+    for i, tri in enumerate(indices):
+      result.smallIndices.add [uint16(tri[0]), uint16(tri[1]), uint16(tri[2])]
   else:
-    var bigIndices = newSeq[array[3, uint32]](indices.len)
+    result.indexType = Big
     for i, tri in enumerate(indices):
-      bigIndices[i] = [uint32(tri[0]), uint32(tri[1]), uint32(tri[3])]
-    Mesh(vertexCount: uint32(positions.len), data: meshdata, indexType: Big, bigIndices: bigIndices)
+      result.bigIndices.add [uint32(tri[0]), uint32(tri[1]), uint32(tri[2])]
+
+func newMesh*(positions: openArray[Vec3f], colors: openArray[Vec3f]=[], indices: openArray[array[3, uint16|int16]]): auto =
+  assert colors.len == 0 or colors.len == positions.len
+
+  result = new Mesh
+  result.vertexCount = uint32(positions.len)
+  result.data[asAttribute(default(Vec3f), "position")] = MeshData(thetype: Position, position: positions.toSeq)
+  if colors.len > 0:
+    result.data[asAttribute(default(Vec3f), "color")] = MeshData(thetype: Color, color: colors.toSeq)
 
-func newMesh*(positions: openArray[Vec3f], indices: openArray[array[3, uint16|int16]]): auto =
-  let meshdata = {asAttribute(default(Vec3f), "position"): MeshData(thetype: Position, position: positions.toSeq)}.toTable
-  var smallIndices = newSeq[array[3, uint16]](indices.len)
+  for i in indices:
+    assert i[0] < result.vertexCount
+    assert i[1] < result.vertexCount
+    assert i[2] < result.vertexCount
+  result.indexType = Small
   for i, tri in enumerate(indices):
-    smallIndices[i] = [uint16(tri[0]), uint16(tri[1]), uint16(tri[3])]
-  Mesh(vertexCount: positions.len, data: meshdata, indexType: Small, smallIndices: smallIndices)
+    result.smallIndices.add [uint16(tri[0]), uint16(tri[1]), uint16(tri[2])]
+
+func newMesh*(positions: openArray[Vec3f], colors: openArray[Vec3f]=[], indices: openArray[array[3, uint8|int8]]): auto =
+  assert colors.len == 0 or colors.len == positions.len
+  assert false # todo: check feature support
+
+  result = new Mesh
+  result.vertexCount = uint32(positions.len)
+  result.data[asAttribute(default(Vec3f), "position")] = MeshData(thetype: Position, position: positions.toSeq)
+  if colors.len > 0:
+    result.data[asAttribute(default(Vec3f), "color")] = MeshData(thetype: Color, color: colors.toSeq)
+
+  for i in indices:
+    assert i[0] < result.vertexCount
+    assert i[1] < result.vertexCount
+    assert i[2] < result.vertexCount
+  result.indexType = Tiny
+  for i, tri in enumerate(indices):
+    result.smallIndices.add [uint8(tri[0]), uint8(tri[1]), uint8(tri[2])]
 
 
-func size*(meshdata: MeshData): uint64 =
+func meshDataSize*(meshdata: MeshData): uint64 =
   case meshdata.thetype:
     of Position: meshdata.position.size
     of Color: meshdata.color.size
@@ -80,15 +133,20 @@
     of Tangent: meshdata.tangent.size
     of BiTangent: meshdata.bitangent.size
     of TextureCoordinate: meshdata.texturecoord.size
-    of Index: meshdata.index.size
-    of BigIndex: meshdata.bigindex.size
+
+func attributeSize*(mesh: Mesh, attribute: Attribute): uint64 =
+  mesh.data[attribute].meshDataSize
 
-func size*(mesh: Mesh, attribute: Attribute): uint64 =
-  mesh.data[attribute].size
+func vertexDataSize*(mesh: Mesh): uint64 =
+  for d in mesh.data.values:
+    result += d.meshDataSize
 
-func size*(mesh: Mesh): uint64 =
-  for d in mesh.data.values:
-    result += d.size
+func indexDataSize*(mesh: Mesh): uint64 =
+  case mesh.indexType
+    of None: 0
+    of Tiny: mesh.tinyIndices.len * sizeof(get(genericParams(typeof(mesh.tinyIndices)), 0))
+    of Small: mesh.smallIndices.len * sizeof(get(genericParams(typeof(mesh.smallIndices)), 0))
+    of Big: mesh.bigIndices.len * sizeof(get(genericParams(typeof(mesh.bigIndices)), 0))
 
 proc rawData[T: seq](value: var T): (pointer, uint64) =
   (pointer(addr(value[0])), uint64(sizeof(get(genericParams(typeof(value)), 0)) * value.len))
@@ -101,8 +159,13 @@
     of Tangent: rawData(data.tangent)
     of BiTangent: rawData(data.bitangent)
     of TextureCoordinate: rawData(data.texturecoord)
-    of Index: rawData(data.index)
-    of BigIndex: rawData(data.bigindex)
+
+proc getRawIndexData*(mesh: Mesh): (pointer, uint64) =
+  case mesh.indexType:
+    of None: raise newException(Exception, "Trying to get index data for non-indexed mesh")
+    of Tiny: rawData(mesh.tinyIndices)
+    of Small: rawData(mesh.smallIndices)
+    of Big: rawData(mesh.bigIndices)
 
 proc hasDataFor*(mesh: Mesh, attribute: Attribute): bool =
   assert attribute.perInstance == false, "Mesh data cannot handle per-instance attributes"
--- a/src/semicongine/scene.nim	Wed Apr 05 00:45:16 2023 +0700
+++ b/src/semicongine/scene.nim	Thu Apr 06 00:30:48 2023 +0700
@@ -19,6 +19,7 @@
     of true:
       indexBuffer*: Buffer
       indexType*: VkIndexType
+      indexOffset*: uint64
     of false:
       discard
 
@@ -29,92 +30,125 @@
 
 func `$`*(drawable: Drawable): string =
   if drawable.indexed:
-    &"Drawable(elementCount: {drawable.elementCount}, instanceCount: {drawable.instanceCount}, buffer: {drawable.buffer}, offsets: {drawable.offsets}, indexType: {drawable.indexType})"
+    &"Drawable(elementCount: {drawable.elementCount}, instanceCount: {drawable.instanceCount}, buffer: {drawable.buffer}, offsets: {drawable.offsets}, indexType: {drawable.indexType}, indexOffset: {drawable.indexOffset}, indexBuffer: {drawable.indexBuffer})"
   else:
     &"Drawable(elementCount: {drawable.elementCount}, instanceCount: {drawable.instanceCount}, buffer: {drawable.buffer}, offsets: {drawable.offsets})"
 
-proc destroy(drawable: var Drawable) =
-  drawable.buffer.destroy()
-  if drawable.indexed:
-    drawable.indexBuffer.destroy()
+proc getBuffers*(scene: Scene, pipeline: VkPipeline): seq[Buffer] =
+  var counted: seq[VkBuffer]
+  for drawable in scene.drawables[pipeline]:
+    if not (drawable.buffer.vk in counted):
+      result.add(drawable.buffer)
+      counted.add drawable.buffer.vk
+    if drawable.indexed and not (drawable.indexBuffer.vk in counted):
+      result.add(drawable.indexBuffer)
+      counted.add drawable.indexBuffer.vk
+
+proc destroy*(scene: var Scene, pipeline: VkPipeline) =
+  var buffers = scene.getBuffers(pipeline)
+  for buffer in buffers.mitems:
+      buffer.destroy()
+
+proc destroy*(scene: var Scene) =
+  for pipeline in scene.drawables.keys:
+    scene.destroy(pipeline)
 
 proc setupDrawables(scene: var Scene, pipeline: Pipeline) =
   assert pipeline.device.vk.valid
   if pipeline.vk in scene.drawables:
     for drawable in scene.drawables[pipeline.vk].mitems:
-      drawable.destroy()
+      scene.destroy(pipeline.vk)
   scene.drawables[pipeline.vk] = @[]
 
   var
-    nonIMeshes: seq[Mesh]
-    smallIMeshes: seq[Mesh]
-    bigIMeshes: seq[Mesh]
+    nonIndexedMeshes: seq[Mesh]
+    tinyIndexedMeshes: seq[Mesh]
+    smallIndexedMeshes: seq[Mesh]
+    bigIndexedMeshes: seq[Mesh]
+    allIndexedMeshes: seq[Mesh]
   for mesh in allPartsOfType[Mesh](scene.root):
     for inputAttr in pipeline.inputs.vertexInputs:
       assert mesh.hasDataFor(inputAttr), &"{mesh} missing data for {inputAttr}"
     case mesh.indexType:
-      of None: nonIMeshes.add mesh
-      of Small: smallIMeshes.add mesh
-      of Big: bigIMeshes.add mesh
+      of None: nonIndexedMeshes.add mesh
+      of Tiny: tinyIndexedMeshes.add mesh
+      of Small: smallIndexedMeshes.add mesh
+      of Big: bigIndexedMeshes.add mesh
+
+  allIndexedMeshes = bigIndexedMeshes & smallIndexedMeshes & tinyIndexedMeshes # that we don't have to care about index alignment
   
-  if nonIMeshes.len > 0:
+  var
+    indicesBufferSize = 0'u64
+    indexOffset = 0'u64
+  for mesh in allIndexedMeshes:
+    indicesBufferSize += mesh.indexDataSize
+  var indexBuffer: Buffer
+  if indicesBufferSize > 0:
+    indexBuffer = pipeline.device.createBuffer(
+      size=indicesBufferSize,
+      usage=[VK_BUFFER_USAGE_INDEX_BUFFER_BIT],
+      useVRAM=true,
+      mappable=false,
+    )
+
+  for location, attributes in pipeline.inputs.vertexInputs.groupByMemoryLocation().pairs:
+    # setup one buffer per attribute location
+    var bufferSize = 0'u64
+    for mesh in nonIndexedMeshes & allIndexedMeshes:
+      bufferSize += mesh.vertexDataSize
+    if bufferSize == 0:
+      continue
     var
-      bufferSize = 0'u64
-      vertexCount = 0'u32
-    for mesh in nonIMeshes:
-      bufferSize += mesh.size
-      vertexCount += mesh.vertexCount
-    var buffer = pipeline.device.createBuffer(
+      bufferOffset = 0'u64
+      buffer = pipeline.device.createBuffer(
         size=bufferSize,
         usage=[VK_BUFFER_USAGE_VERTEX_BUFFER_BIT],
-        memoryFlags=[VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT],
+        useVRAM=location in [VRAM, VRAMVisible],
+        mappable=location in [VRAMVisible, RAM],
       )
-    var offset = 0'u64
-    var drawable = Drawable(elementCount: vertexCount, buffer: buffer, indexed: false, instanceCount: 1)
-    for inputAttr in pipeline.inputs.vertexInputs:
-      drawable.offsets.add offset
-      for mesh in nonIMeshes:
-        var (pdata, size) = mesh.getRawData(inputAttr)
-        buffer.setData(pdata, size, offset)
-        offset += size
-    scene.drawables[pipeline.vk].add drawable
+
+    # non-indexed mesh drawable
+    if nonIndexedMeshes.len > 0:
+      var vertexCount = 0'u32
+      for mesh in nonIndexedMeshes:
+        vertexCount += mesh.vertexCount
+      # remark: we merge all meshes into a single drawcall... smart?#
+      # I think bad for instancing...
+      var nonIndexedDrawable = Drawable(elementCount: vertexCount, buffer: buffer, indexed: false, instanceCount: 1)
+      for inputAttr in attributes:
+        nonIndexedDrawable.offsets.add bufferOffset
+        for mesh in nonIndexedMeshes:
+          var (pdata, size) = mesh.getRawData(inputAttr)
+          buffer.setData(pdata, size, bufferOffset)
+          bufferOffset += size
+      scene.drawables[pipeline.vk].add nonIndexedDrawable
 
-#[
-proc createVertexBuffers*[M: Mesh](
-  mesh: M,
-  device: VkDevice,
-  physicalDevice: VkPhysicalDevice,
-  commandPool: VkCommandPool,
-  queue: VkQueue,
-): (seq[Buffer], uint32) =
-  result[1] = mesh.vertexData.VertexCount
-  for name, value in mesh.vertexData.fieldPairs:
-    assert value.data.len > 0
-    var flags = if value.useOnDeviceMemory: {TransferSrc} else: {VertexBuffer}
-    var stagingBuffer = device.InitBuffer(physicalDevice, value.datasize, flags, {HostVisible, HostCoherent})
-    copyMem(stagingBuffer.data, addr(value.data[0]), value.datasize)
-
-    if value.useOnDeviceMemory:
-      var finalBuffer = device.InitBuffer(physicalDevice, value.datasize, {TransferDst, VertexBuffer}, {DeviceLocal})
-      transferBuffer(commandPool, queue, stagingBuffer, finalBuffer, value.datasize)
-      stagingBuffer.trash()
-      result[0].add(finalBuffer)
-      value.buffer = finalBuffer
-    else:
-      result[0].add(stagingBuffer)
-      value.buffer = stagingBuffer
-]#
+    # indexed mesh drawable
+    for mesh in allIndexedMeshes:
+      var drawable = Drawable(
+        elementCount: mesh.indicesCount,
+        buffer: buffer,
+        indexed: true,
+        indexBuffer: indexBuffer,
+        indexOffset: indexOffset,
+        indexType: mesh.indexType,
+        instanceCount: 1
+      )
+      var (pdata, size) = mesh.getRawIndexData()
+      indexBuffer.setData(pdata, size, indexOffset)
+      indexOffset += size
+      for inputAttr in attributes:
+        drawable.offsets.add bufferOffset
+        var (pdata, size) = mesh.getRawData(inputAttr)
+        buffer.setData(pdata, size, bufferOffset)
+        bufferOffset += size
+      scene.drawables[pipeline.vk].add drawable
+  echo scene.getBuffers(pipeline.vk)
 
 proc setupDrawables*(scene: var Scene, renderPass: var RenderPass) =
   for subpass in renderPass.subpasses.mitems:
     for pipeline in subpass.pipelines.mitems:
       scene.setupDrawables(pipeline)
 
-
 proc getDrawables*(scene: Scene, pipeline: Pipeline): seq[Drawable] =
   scene.drawables.getOrDefault(pipeline.vk, @[])
-
-proc destroy*(scene: var Scene) =
-  for drawables in scene.drawables.mvalues:
-    for drawable in drawables.mitems:
-      drawable.destroy()
--- a/src/semicongine/vulkan/buffer.nim	Wed Apr 05 00:45:16 2023 +0700
+++ b/src/semicongine/vulkan/buffer.nim	Thu Apr 06 00:30:48 2023 +0700
@@ -15,32 +15,31 @@
     vk*: VkBuffer
     size*: uint64
     usage*: seq[VkBufferUsageFlagBits]
-    case hasMemory*: bool
+    case memoryAllocated*: bool
       of false: discard
       of true:
         memory*: DeviceMemory
-        data*: pointer
 
-func `$`*(buffer: Buffer): string = &"Buffer(size: {buffer.size}, usage: {buffer.usage})"
+func `$`*(buffer: Buffer): string =
+  &"Buffer(vk: {buffer.vk}, size: {buffer.size}, usage: {buffer.usage})"
 
-proc setData*(dst: Buffer, src: pointer, len: uint64, offset=0'u64) =
-  assert offset + len <= dst.size
-  copyMem(cast[pointer](cast[uint64](dst.data) + offset), src, len)
+
+proc allocateMemory(buffer: var Buffer, useVRAM: bool, mappable: bool, autoFlush: bool) =
+  assert buffer.device.vk.valid
+  assert buffer.memoryAllocated == false
 
-proc setData*[T: seq](dst: Buffer, src: ptr T, offset=0'u64) =
-  dst.setData(src, sizeof(get(genericParams(T), 0)) * src[].len, offset=offset)
-
-proc setData*[T](dst: Buffer, src: ptr T, offset=0'u64) =
-  dst.setData(src, sizeof(T), offset=offset)
+  var flags: seq[VkMemoryPropertyFlagBits]
+  if useVRAM:
+    flags.add VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
+  if mappable:
+    flags.add VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+  if autoFlush:
+    flags.add VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
 
-proc allocateMemory(buffer: var Buffer, flags: openArray[VkMemoryPropertyFlagBits]) =
-  assert buffer.device.vk.valid
-  assert buffer.hasMemory == false
-
-  buffer.hasMemory = true
+  buffer.memoryAllocated = true
   buffer.memory = buffer.device.allocate(buffer.size, flags)
   checkVkResult buffer.device.vk.vkBindBufferMemory(buffer.vk, buffer.memory.vk, VkDeviceSize(0))
-  buffer.data = buffer.memory.map()
+
 
 # currently no support for extended structure and concurrent/shared use
 # (shardingMode = VK_SHARING_MODE_CONCURRENT not supported)
@@ -48,8 +47,9 @@
   device: Device,
   size: uint64,
   usage: openArray[VkBufferUsageFlagBits],
-  flags: openArray[VkBufferCreateFlagBits] = @[],
-  memoryFlags: openArray[VkMemoryPropertyFlagBits] = @[],
+  useVRAM: bool,
+  mappable: bool,
+  autoFlush=true,
 ): Buffer =
   assert device.vk.valid
   assert size > 0
@@ -57,9 +57,11 @@
   result.device = device
   result.size = size
   result.usage = usage.toSeq
+  if not (mappable or VK_BUFFER_USAGE_TRANSFER_DST_BIT in result.usage):
+    result.usage.add VK_BUFFER_USAGE_TRANSFER_DST_BIT
   var createInfo = VkBufferCreateInfo(
     sType: VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
-    flags: toBits(flags),
+    flags: VkBufferCreateFlags(0),
     size: size,
     usage: toBits(usage),
     sharingMode: VK_SHARING_MODE_EXCLUSIVE,
@@ -71,7 +73,7 @@
     pAllocator=nil,
     pBuffer=addr result.vk
   )
-  result.allocateMemory(memoryFlags)
+  result.allocateMemory(useVRAM, mappable, autoFlush)
 
 
 proc copy*(src, dst: Buffer) =
@@ -114,8 +116,27 @@
 proc destroy*(buffer: var Buffer) =
   assert buffer.device.vk.valid
   assert buffer.vk.valid
-  if buffer.hasMemory:
+  if buffer.memoryAllocated:
     assert buffer.memory.vk.valid
     buffer.memory.free
   buffer.device.vk.vkDestroyBuffer(buffer.vk, nil)
-  buffer = default(Buffer)
+  buffer.vk.reset
+
+proc setData*(dst: Buffer, src: pointer, size: uint64, bufferOffset=0'u64) =
+  assert bufferOffset + size <= dst.size
+  if dst.memory.canMap:
+    copyMem(cast[pointer](cast[uint64](dst.memory.data) + bufferOffset), src, size)
+    if dst.memory.needsFlushing:
+      dst.memory.flush()
+  else: # use staging buffer, slower but required if memory is not host visible
+    var stagingBuffer = dst.device.createBuffer(size, [VK_BUFFER_USAGE_TRANSFER_SRC_BIT], useVRAM=false, mappable=true, autoFlush=true)
+    stagingBuffer.setData(src, size, 0)
+    stagingBuffer.copy(dst)
+    stagingBuffer.destroy()
+
+proc setData*[T: seq](dst: Buffer, src: ptr T, offset=0'u64) =
+  dst.setData(src, sizeof(get(genericParams(T), 0)) * src[].len, offset=offset)
+
+proc setData*[T](dst: Buffer, src: ptr T, offset=0'u64) =
+  dst.setData(src, sizeof(T), offset=offset)
+
--- a/src/semicongine/vulkan/memory.nim	Wed Apr 05 00:45:16 2023 +0700
+++ b/src/semicongine/vulkan/memory.nim	Thu Apr 06 00:30:48 2023 +0700
@@ -1,4 +1,5 @@
 import std/strformat
+import std/algorithm
 
 import ./api
 import ./device
@@ -19,6 +20,11 @@
     device*: Device
     vk*: VkDeviceMemory
     size*: uint64
+    memoryType*: MemoryType
+    case canMap*: bool
+      of false: discard
+      of true: data*: pointer
+    needsFlushing*: bool
 
 proc getPhysicalDeviceMemoryProperties(physicalDevice: VkPhysicalDevice): PhyscialDeviceMemoryProperties =
   var physicalProperties: VkPhysicalDeviceMemoryProperties
@@ -38,13 +44,14 @@
 
 proc allocate*(device: Device, size: uint64, flags: openArray[VkMemoryPropertyFlagBits]): DeviceMemory =
   assert device.vk.valid
+  assert size > 0
 
   result.device = device
   result.size = size
 
   var
-    memtype: MemoryType
     hasAllFlags: bool
+    matchingTypes: seq[MemoryType]
   for mtype in device.physicalDevice.vk.getPhysicalDeviceMemoryProperties.types:
     hasAllFlags = true
     for flag in flags:
@@ -52,15 +59,19 @@
         hasAllFlags = false
         break
     if hasAllFlags:
-      memtype = mtype
-      break
-  if not hasAllFlags:
+      matchingTypes.add mtype
+  if matchingTypes.len == 0:
     raise newException(Exception, &"No memory with support for {flags}")
+  matchingTypes.sort(cmp= proc(a, b: MemoryType): int = cmp(a.heap.size, b.heap.size))
+
+  result.memoryType = matchingTypes[^1]
+  result.canMap = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT in result.memoryType.flags
+  result.needsFlushing = not (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT in result.memoryType.flags)
 
   var allocationInfo = VkMemoryAllocateInfo(
     sType: VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
     allocationSize: size,
-    memoryTypeIndex: memtype.index,
+    memoryTypeIndex: result.memoryType.index,
   )
 
   checkVkResult vkAllocateMemory(
@@ -70,25 +81,62 @@
     addr result.vk
   )
 
-proc map*(memory: DeviceMemory, offset=0'u64, size=0'u64): pointer =
+  if result.canMap:
+    checkVkResult result.device.vk.vkMapMemory(
+      memory=result.vk,
+      offset=VkDeviceSize(0),
+      size=VkDeviceSize(result.size),
+      flags=VkMemoryMapFlags(0), # unused up to Vulkan 1.3
+      ppData=addr(result.data)
+    )
+
+proc allocate*(device: Device, size: uint64, useVRAM: bool, mappable: bool, autoFlush: bool): DeviceMemory =
+  var flags: seq[VkMemoryPropertyFlagBits]
+  if useVRAM:
+    flags.add VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
+  if mappable:
+    flags.add VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+  if autoFlush:
+    flags.add VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
+  device.allocate(size=size, flags=flags)
+
+# flush host -> device
+proc flush*(memory: DeviceMemory, offset=0'u64, size=0'u64) =
   assert memory.device.vk.valid
   assert memory.vk.valid
-  
-  var thesize = size
-  if thesize == 0:
-    thesize = memory.size
+  assert memory.needsFlushing
+
+  var actualSize = size
+  if actualSize == 0:
+    actualSize = memory.size
+  var flushrange = VkMappedMemoryRange(
+    sType: VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+    memory: memory.vk,
+    offset: VkDeviceSize(offset),
+    size: VkDeviceSize(size)
+  )
+  checkVkResult memory.device.vk.vkFlushMappedMemoryRanges(memoryRangeCount=1, pMemoryRanges=addr(flushrange))
 
-  checkVkResult memory.device.vk.vkMapMemory(
-    memory=memory.vk,
-    offset=VkDeviceSize(offset),
-    size=VkDeviceSize(thesize),
-    flags=VkMemoryMapFlags(0), # unused up to Vulkan 1.3
-    ppData=addr(result)
+# flush device -> host
+proc invalidate*(memory: DeviceMemory, offset=0'u64, size=0'u64) =
+  assert memory.device.vk.valid
+  assert memory.vk.valid
+  assert memory.needsFlushing
+
+  var actualSize = size
+  if actualSize == 0:
+    actualSize = memory.size
+  var flushrange = VkMappedMemoryRange(
+    sType: VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+    memory: memory.vk,
+    offset: VkDeviceSize(offset),
+    size: VkDeviceSize(size)
   )
+  checkVkResult memory.device.vk.vkInvalidateMappedMemoryRanges(memoryRangeCount=1, pMemoryRanges=addr(flushrange))
 
 proc free*(memory: var DeviceMemory) =
   assert memory.device.vk.valid
   assert memory.vk.valid
 
   memory.device.vk.vkFreeMemory(memory.vk, nil)
-  memory = default(DeviceMemory)
+  memory.vk.reset
--- a/src/semicongine/vulkan/swapchain.nim	Wed Apr 05 00:45:16 2023 +0700
+++ b/src/semicongine/vulkan/swapchain.nim	Thu Apr 06 00:30:48 2023 +0700
@@ -170,7 +170,7 @@
       pOffsets=offsets.toCPointer()
     )
     if drawable.indexed:
-      commandBuffer.vkCmdBindIndexBuffer(drawable.indexBuffer.vk, VkDeviceSize(0), drawable.indexType)
+      commandBuffer.vkCmdBindIndexBuffer(drawable.indexBuffer.vk, VkDeviceSize(drawable.indexOffset), drawable.indexType)
       commandBuffer.vkCmdDrawIndexed(
         indexCount=drawable.elementCount,
         instanceCount=drawable.instanceCount,
--- a/tests/test_vulkan_wrapper.nim	Wed Apr 05 00:45:16 2023 +0700
+++ b/tests/test_vulkan_wrapper.nim	Thu Apr 06 00:30:48 2023 +0700
@@ -49,7 +49,7 @@
   var device = instance.createDevice(
     selectedPhysicalDevice,
     @[],
-    @[],
+    @["VK_EXT_index_type_uint8"],
     selectedPhysicalDevice.filterForGraphicsPresentationQueues()
   )
 
@@ -90,13 +90,39 @@
         positions=[newVec3f(0.0, -0.5), newVec3f(0.5, 0.5), newVec3f(-0.5, 0.5)],
         colors=[newVec3f(1.0, 0.0, 0.0), newVec3f(0.0, 1.0, 0.0), newVec3f(0.0, 0.0, 1.0)],
       )),
+      newEntity("triangle1b", newMesh(
+        positions=[newVec3f(0.0, -0.4), newVec3f(0.4, 0.4), newVec3f(-0.4, 0.5)],
+        colors=[newVec3f(1.0, 0.0, 0.0), newVec3f(0.0, 1.0, 0.0), newVec3f(0.0, 0.0, 1.0)],
+      )),
+      newEntity("triangle2a", newMesh(
+        positions=[newVec3f(0.0, 0.5), newVec3f(0.5, -0.5), newVec3f(-0.5, -0.5)],
+        colors=[newVec3f(1.0, 0.0, 0.0), newVec3f(0.0, 1.0, 0.0), newVec3f(0.0, 0.0, 1.0)],
+        indices=[[0'u16, 2'u16, 1'u16]]
+      )),
+      newEntity("triangle2b", newMesh(
+        positions=[newVec3f(0.0, 0.4), newVec3f(0.4, -0.4), newVec3f(-0.4, -0.4)],
+        colors=[newVec3f(1.0, 0.0, 0.0), newVec3f(0.0, 1.0, 0.0), newVec3f(0.0, 0.0, 1.0)],
+        indices=[[0'u16, 2'u16, 1'u16]]
+      )),
+      newEntity("triangle3a", newMesh(
+        positions=[newVec3f(0.4, 0.5), newVec3f(0.9, -0.3), newVec3f(0.0, -0.3)],
+        colors=[newVec3f(1.0, 1.0, 0.0), newVec3f(1.0, 1.0, 0.0), newVec3f(1.0, 1.0, 0.0)],
+        indices=[[0'u32, 2'u32, 1'u32]],
+        autoResize=false
+      )),
+      newEntity("triangle3b", newMesh(
+        positions=[newVec3f(0.4, 0.5), newVec3f(0.9, -0.3), newVec3f(0.0, -0.3)],
+        colors=[newVec3f(1.0, 1.0, 0.0), newVec3f(1.0, 1.0, 0.0), newVec3f(1.0, 1.0, 0.0)],
+        indices=[[0'u32, 2'u32, 1'u32]],
+        autoResize=false
+      )),
     )
   )
   thescene.setupDrawables(renderPass)
 
   # MAINLOOP
   echo "Setup successfull, start rendering"
-  for i in 0 ..< 1000:
+  for i in 0 ..< 10000:
     discard swapchain.drawScene(thescene)
   echo "Rendered ", swapchain.framesRendered, " frames"
   checkVkResult device.vk.vkDeviceWaitIdle()