#include "GrVkGpu.h"
#include "GrVkUtil.h"
+#ifdef SK_DEBUG
+// for simple tracking of how much we're using in each heap
+// last counter is for non-subheap allocations
+VkDeviceSize gHeapUsage[VK_MAX_MEMORY_HEAPS+1] = { 0 };
+#endif
+
static bool get_valid_memory_type_index(const VkPhysicalDeviceMemoryProperties& physDevMemProps,
uint32_t typeBits,
VkMemoryPropertyFlags requestedMemFlags,
- uint32_t* typeIndex) {
+ uint32_t* typeIndex,
+ uint32_t* heapIndex) {
for (uint32_t i = 0; i < physDevMemProps.memoryTypeCount; ++i) {
if (typeBits & (1 << i)) {
uint32_t supportedFlags = physDevMemProps.memoryTypes[i].propertyFlags &
requestedMemFlags;
if (supportedFlags == requestedMemFlags) {
*typeIndex = i;
+ *heapIndex = physDevMemProps.memoryTypes[i].heapIndex;
return true;
}
}
GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs));
uint32_t typeIndex = 0;
+ uint32_t heapIndex = 0;
const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties();
if (dynamic) {
// try to get cached and ideally non-coherent memory first
memReqs.memoryTypeBits,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
- &typeIndex)) {
+ &typeIndex,
+ &heapIndex)) {
// some sort of host-visible memory type should always be available for dynamic buffers
SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
memReqs.memoryTypeBits,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
- &typeIndex));
+ &typeIndex,
+ &heapIndex));
}
VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags;
SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
memReqs.memoryTypeBits,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
- &typeIndex));
+ &typeIndex,
+ &heapIndex));
alloc->fFlags = 0x0;
}
GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type));
- if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) {
- SkDebugf("Failed to alloc buffer\n");
- return false;
+ if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
+ // if static, try to allocate from non-host-visible non-device-local memory instead
+ if (dynamic ||
+ !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits,
+ 0, &typeIndex, &heapIndex) ||
+ !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
+ SkDebugf("Failed to alloc buffer\n");
+ return false;
+ }
}
// Bind buffer
GR_VK_CALL(iface, GetImageMemoryRequirements(device, image, &memReqs));
uint32_t typeIndex = 0;
+ uint32_t heapIndex = 0;
GrVkHeap* heap;
const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties();
if (linearTiling) {
if (!get_valid_memory_type_index(phDevMemProps,
memReqs.memoryTypeBits,
desiredMemProps,
- &typeIndex)) {
+ &typeIndex,
+ &heapIndex)) {
// some sort of host-visible memory type should always be available
SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
memReqs.memoryTypeBits,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
- &typeIndex));
+ &typeIndex,
+ &heapIndex));
}
heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap);
VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags;
SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
memReqs.memoryTypeBits,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
- &typeIndex));
+ &typeIndex,
+ &heapIndex));
if (memReqs.size <= kMaxSmallImageSize) {
heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap);
} else {
alloc->fFlags = 0x0;
}
- if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) {
- SkDebugf("Failed to alloc image\n");
- return false;
+ if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
+ // if optimal, try to allocate from non-host-visible non-device-local memory instead
+ if (linearTiling ||
+ !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits,
+ 0, &typeIndex, &heapIndex) ||
+ !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
+ SkDebugf("Failed to alloc image\n");
+ return false;
+ }
}
// Bind image
#endif
}
-GrVkSubHeap::GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex,
+GrVkSubHeap::GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, uint32_t heapIndex,
VkDeviceSize size, VkDeviceSize alignment)
: INHERITED(size, alignment)
, fGpu(gpu)
- , fMemoryTypeIndex(memoryTypeIndex) {
+ , fMemoryTypeIndex(memoryTypeIndex)
+ , fHeapIndex(heapIndex) {
VkMemoryAllocateInfo allocInfo = {
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType
&fAlloc));
if (VK_SUCCESS != err) {
this->reset();
+ }
+#ifdef SK_DEBUG
+ else {
+ gHeapUsage[heapIndex] += size;
}
+#endif
}
GrVkSubHeap::~GrVkSubHeap() {
const GrVkInterface* iface = fGpu->vkInterface();
GR_VK_CALL(iface, FreeMemory(fGpu->device(), fAlloc, nullptr));
+#ifdef SK_DEBUG
+ gHeapUsage[fHeapIndex] -= fSize;
+#endif
}
bool GrVkSubHeap::alloc(VkDeviceSize size, GrVkAlloc* alloc) {
}
bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment,
- uint32_t memoryTypeIndex, GrVkAlloc* alloc) {
+ uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) {
VkDeviceSize alignedSize = align_size(size, alignment);
// if requested is larger than our subheap allocation, just alloc directly
}
alloc->fOffset = 0;
alloc->fSize = 0; // hint that this is not a subheap allocation
+#ifdef SK_DEBUG
+ gHeapUsage[VK_MAX_MEMORY_HEAPS] += alignedSize;
+#endif
return true;
}
// need to allocate a new subheap
SkAutoTDelete<GrVkSubHeap>& subHeap = fSubHeaps.push_back();
- subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, fSubHeapSize, alignment));
+ subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, fSubHeapSize, alignment));
// try to recover from failed allocation by only allocating what we need
if (subHeap->size() == 0) {
VkDeviceSize alignedSize = align_size(size, alignment);
- subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, alignedSize, alignment));
+ subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment));
if (subHeap->size() == 0) {
return false;
}
}
bool GrVkHeap::singleAlloc(VkDeviceSize size, VkDeviceSize alignment,
- uint32_t memoryTypeIndex, GrVkAlloc* alloc) {
+ uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) {
VkDeviceSize alignedSize = align_size(size, alignment);
// first try to find an unallocated subheap that fits our allocation request
// need to allocate a new subheap
SkAutoTDelete<GrVkSubHeap>& subHeap = fSubHeaps.push_back();
- subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, alignedSize, alignment));
+ subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment));
fAllocSize += alignedSize;
if (subHeap->alloc(size, alloc)) {
fUsedSize += alloc->fSize;
class GrVkSubHeap : public GrVkFreeListAlloc {
public:
- GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex,
+ GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, uint32_t heapIndex,
VkDeviceSize size, VkDeviceSize alignment);
~GrVkSubHeap();
private:
const GrVkGpu* fGpu;
uint32_t fMemoryTypeIndex;
+ uint32_t fHeapIndex;
VkDeviceMemory fAlloc;
typedef GrVkFreeListAlloc INHERITED;
VkDeviceSize allocSize() const { return fAllocSize; }
VkDeviceSize usedSize() const { return fUsedSize; }
- bool alloc(VkDeviceSize size, VkDeviceSize alignment, uint32_t memoryTypeIndex,
- GrVkAlloc* alloc) {
+ bool alloc(VkDeviceSize size, VkDeviceSize alignment, uint32_t memoryTypeIndex,
+ uint32_t heapIndex, GrVkAlloc* alloc) {
SkASSERT(size > 0);
- return (*this.*fAllocFunc)(size, alignment, memoryTypeIndex, alloc);
+ return (*this.*fAllocFunc)(size, alignment, memoryTypeIndex, heapIndex, alloc);
}
bool free(const GrVkAlloc& alloc);
private:
- typedef bool (GrVkHeap::*AllocFunc)(VkDeviceSize size, VkDeviceSize alignment,
- uint32_t memoryTypeIndex, GrVkAlloc* alloc);
+ typedef bool (GrVkHeap::*AllocFunc)(VkDeviceSize size, VkDeviceSize alignment,
+ uint32_t memoryTypeIndex, uint32_t heapIndex,
+ GrVkAlloc* alloc);
- bool subAlloc(VkDeviceSize size, VkDeviceSize alignment,
- uint32_t memoryTypeIndex, GrVkAlloc* alloc);
+ bool subAlloc(VkDeviceSize size, VkDeviceSize alignment,
+ uint32_t memoryTypeIndex, uint32_t heapIndex,
+ GrVkAlloc* alloc);
bool singleAlloc(VkDeviceSize size, VkDeviceSize alignment,
- uint32_t memoryTypeIndex, GrVkAlloc* alloc);
+ uint32_t memoryTypeIndex, uint32_t heapIndex,
+ GrVkAlloc* alloc);
const GrVkGpu* fGpu;
VkDeviceSize fSubHeapSize;
void subheap_test(skiatest::Reporter* reporter, GrContext* context) {
GrVkGpu* gpu = static_cast<GrVkGpu*>(context->getGpu());
- // heap index doesn't matter, we're just testing the suballocation algorithm so we'll use 0
- GrVkSubHeap heap(gpu, 0, 64 * 1024, 32);
+ // memtype doesn't matter, we're just testing the suballocation algorithm so we'll use 0
+ GrVkSubHeap heap(gpu, 0, 0, 64 * 1024, 32);
GrVkAlloc alloc0, alloc1, alloc2, alloc3;
// test full allocation and free
REPORTER_ASSERT(reporter, heap.alloc(64 * 1024, &alloc0));
void suballoc_test(skiatest::Reporter* reporter, GrContext* context) {
GrVkGpu* gpu = static_cast<GrVkGpu*>(context->getGpu());
- // heap index doesn't matter, we're just testing the allocation algorithm so we'll use 0
+ // memtype/heap index don't matter, we're just testing the allocation algorithm so we'll use 0
GrVkHeap heap(gpu, GrVkHeap::kSubAlloc_Strategy, 64 * 1024);
GrVkAlloc alloc0, alloc1, alloc2, alloc3;
const VkDeviceSize kAlignment = 16;
+ const uint32_t kMemType = 0;
const uint32_t kHeapIndex = 0;
REPORTER_ASSERT(reporter, heap.allocSize() == 0 && heap.usedSize() == 0);
// fragment allocations so we need to grow heap
- REPORTER_ASSERT(reporter, heap.alloc(19 * 1024 - 3, kAlignment, kHeapIndex, &alloc0));
- REPORTER_ASSERT(reporter, heap.alloc(5 * 1024 - 9, kAlignment, kHeapIndex, &alloc1));
- REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 15, kAlignment, kHeapIndex, &alloc2));
- REPORTER_ASSERT(reporter, heap.alloc(3 * 1024 - 6, kAlignment, kHeapIndex, &alloc3));
+ REPORTER_ASSERT(reporter, heap.alloc(19 * 1024 - 3, kAlignment, kMemType, kHeapIndex, &alloc0));
+ REPORTER_ASSERT(reporter, heap.alloc(5 * 1024 - 9, kAlignment, kMemType, kHeapIndex, &alloc1));
+ REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 15, kAlignment, kMemType, kHeapIndex, &alloc2));
+ REPORTER_ASSERT(reporter, heap.alloc(3 * 1024 - 6, kAlignment, kMemType, kHeapIndex, &alloc3));
REPORTER_ASSERT(reporter, heap.allocSize() == 64 * 1024 && heap.usedSize() == 42 * 1024);
heap.free(alloc0);
REPORTER_ASSERT(reporter, heap.allocSize() == 64 * 1024 && heap.usedSize() == 23 * 1024);
heap.free(alloc2);
REPORTER_ASSERT(reporter, heap.allocSize() == 64 * 1024 && heap.usedSize() == 8 * 1024);
// we expect the heap to grow here
- REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kHeapIndex, &alloc0));
+ REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 48 * 1024);
heap.free(alloc3);
REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 45 * 1024);
// heap should not grow here (first subheap has exactly enough room)
- REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kHeapIndex, &alloc3));
+ REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kMemType, kHeapIndex, &alloc3));
REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 85 * 1024);
// heap should not grow here (second subheap has room)
- REPORTER_ASSERT(reporter, heap.alloc(22 * 1024, kAlignment, kHeapIndex, &alloc2));
+ REPORTER_ASSERT(reporter, heap.alloc(22 * 1024, kAlignment, kMemType, kHeapIndex, &alloc2));
REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 107 * 1024);
heap.free(alloc1);
REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 102 * 1024);
heap.free(alloc3);
REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 0 * 1024);
// heap should not grow here (allocating more than subheap size)
- REPORTER_ASSERT(reporter, heap.alloc(128 * 1024, kAlignment, kHeapIndex, &alloc0));
+ REPORTER_ASSERT(reporter, heap.alloc(128 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
REPORTER_ASSERT(reporter, 0 == alloc0.fSize);
REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 0 * 1024);
heap.free(alloc0);
+ REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 24 * 1024);
+ // heap should alloc a new subheap because the memory type is different
+ REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType+1, kHeapIndex, &alloc1));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 192 * 1024 && heap.usedSize() == 48 * 1024);
+ // heap should alloc a new subheap because the alignment is different
+ REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, 128, kMemType, kHeapIndex, &alloc2));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 256 * 1024 && heap.usedSize() == 72 * 1024);
+ heap.free(alloc2);
+ heap.free(alloc0);
+ heap.free(alloc1);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 256 * 1024 && heap.usedSize() == 0 * 1024);
}
void singlealloc_test(skiatest::Reporter* reporter, GrContext* context) {
GrVkGpu* gpu = static_cast<GrVkGpu*>(context->getGpu());
- // heap index doesn't matter, we're just testing the allocation algorithm so we'll use 0
+ // memtype/heap index don't matter, we're just testing the allocation algorithm so we'll use 0
GrVkHeap heap(gpu, GrVkHeap::kSingleAlloc_Strategy, 64 * 1024);
GrVkAlloc alloc0, alloc1, alloc2, alloc3;
const VkDeviceSize kAlignment = 64;
+ const uint32_t kMemType = 0;
const uint32_t kHeapIndex = 0;
REPORTER_ASSERT(reporter, heap.allocSize() == 0 && heap.usedSize() == 0);
// make a few allocations
- REPORTER_ASSERT(reporter, heap.alloc(49 * 1024 - 3, kAlignment, kHeapIndex, &alloc0));
- REPORTER_ASSERT(reporter, heap.alloc(5 * 1024 - 37, kAlignment, kHeapIndex, &alloc1));
- REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 11, kAlignment, kHeapIndex, &alloc2));
- REPORTER_ASSERT(reporter, heap.alloc(3 * 1024 - 29, kAlignment, kHeapIndex, &alloc3));
+ REPORTER_ASSERT(reporter, heap.alloc(49 * 1024 - 3, kAlignment, kMemType, kHeapIndex, &alloc0));
+ REPORTER_ASSERT(reporter, heap.alloc(5 * 1024 - 37, kAlignment, kMemType, kHeapIndex, &alloc1));
+ REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 11, kAlignment, kMemType, kHeapIndex, &alloc2));
+ REPORTER_ASSERT(reporter, heap.alloc(3 * 1024 - 29, kAlignment, kMemType, kHeapIndex, &alloc3));
REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 72 * 1024);
heap.free(alloc0);
REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 23 * 1024);
heap.free(alloc2);
REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 8 * 1024);
// heap should not grow here (first subheap has room)
- REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kHeapIndex, &alloc0));
+ REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 48 * 1024);
heap.free(alloc3);
REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 45 * 1024);
// check for exact fit -- heap should not grow here (third subheap has room)
- REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 63, kAlignment, kHeapIndex, &alloc2));
+ REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 63, kAlignment, kMemType, kHeapIndex, &alloc2));
REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 60 * 1024);
heap.free(alloc2);
REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 45 * 1024);
// heap should grow here (no subheap has room)
- REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kHeapIndex, &alloc3));
+ REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kMemType, kHeapIndex, &alloc3));
REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 85 * 1024);
heap.free(alloc1);
REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 80 * 1024);
REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 40 * 1024);
heap.free(alloc3);
REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 0 * 1024);
+ REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 24 * 1024);
+ // heap should alloc a new subheap because the memory type is different
+ REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType + 1, kHeapIndex, &alloc1));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 136 * 1024 && heap.usedSize() == 48 * 1024);
+ // heap should alloc a new subheap because the alignment is different
+ REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, 128, kMemType, kHeapIndex, &alloc2));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 160 * 1024 && heap.usedSize() == 72 * 1024);
+ heap.free(alloc1);
+ heap.free(alloc2);
+ heap.free(alloc0);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 160 * 1024 && heap.usedSize() == 0 * 1024);
}
DEF_GPUTEST_FOR_VULKAN_CONTEXT(VkHeapTests, reporter, ctxInfo) {