diff --git a/intern/guardedalloc/MEM_guardedalloc.h b/intern/guardedalloc/MEM_guardedalloc.h index 8cd2c9f94dd..64fc1782ca9 100644 --- a/intern/guardedalloc/MEM_guardedalloc.h +++ b/intern/guardedalloc/MEM_guardedalloc.h @@ -131,6 +131,24 @@ extern void *(*MEM_mallocN_aligned)(size_t len, const char *str) /* ATTR_MALLOC */ ATTR_WARN_UNUSED_RESULT ATTR_ALLOC_SIZE(1) ATTR_NONNULL(3); +extern void *(*MEM_direct_mallocN)(size_t len, + size_t alignment, + const char *str) /* ATTR_MALLOC */ ATTR_WARN_UNUSED_RESULT; + +extern void *(*MEM_direct_callocN)(size_t len, + size_t alignment, + const char *str) /* ATTR_MALLOC */ ATTR_WARN_UNUSED_RESULT; + +extern void *(*MEM_direct_reallocN)( + void *ptr, + size_t new_len, + size_t new_alignment, + const char *str, + size_t old_len, + size_t old_alignment) /* ATTR_MALLOC */ ATTR_WARN_UNUSED_RESULT; + +extern void (*MEM_direct_freeN)(void *ptr, size_t len, size_t alignment); + /** * Print a list of the names and sizes of all allocated memory * blocks. as a python dict for easy investigation. diff --git a/intern/guardedalloc/intern/mallocn.c b/intern/guardedalloc/intern/mallocn.c index f7979168799..3e15fae1a87 100644 --- a/intern/guardedalloc/intern/mallocn.c +++ b/intern/guardedalloc/intern/mallocn.c @@ -35,6 +35,19 @@ void *(*MEM_malloc_arrayN)(size_t len, size_t size, const char *str) = MEM_lockf void *(*MEM_mallocN_aligned)(size_t len, size_t alignment, const char *str) = MEM_lockfree_mallocN_aligned; +void *(*MEM_direct_mallocN)(size_t len, + size_t alignment, + const char *str) = MEM_lockfree_direct_mallocN; +void *(*MEM_direct_callocN)(size_t len, + size_t alignment, + const char *str) = MEM_lockfree_direct_callocN; +void *(*MEM_direct_reallocN)(void *ptr, + size_t new_len, + size_t new_alignment, + const char *str, + size_t old_len, + size_t old_alignment) = MEM_lockfree_direct_reallocN; +void (*MEM_direct_freeN)(void *ptr, size_t len, size_t alignment) = MEM_lockfree_direct_freeN; void (*MEM_printmemlist_pydict)(void) = MEM_lockfree_printmemlist_pydict; void (*MEM_printmemlist)(void) = MEM_lockfree_printmemlist; void (*MEM_callbackmemlist)(void (*func)(void *)) = MEM_lockfree_callbackmemlist; @@ -145,6 +158,10 @@ void MEM_use_guarded_allocator(void) MEM_mallocN = MEM_guarded_mallocN; MEM_malloc_arrayN = MEM_guarded_malloc_arrayN; MEM_mallocN_aligned = MEM_guarded_mallocN_aligned; + MEM_direct_mallocN = MEM_guarded_direct_mallocN; + MEM_direct_callocN = MEM_guarded_direct_callocN; + MEM_direct_reallocN = MEM_guarded_direct_reallocN; + MEM_direct_freeN = MEM_guarded_direct_freeN; MEM_printmemlist_pydict = MEM_guarded_printmemlist_pydict; MEM_printmemlist = MEM_guarded_printmemlist; MEM_callbackmemlist = MEM_guarded_callbackmemlist; diff --git a/intern/guardedalloc/intern/mallocn_guarded_impl.c b/intern/guardedalloc/intern/mallocn_guarded_impl.c index 8bf1680e6f8..10e81cfc2a6 100644 --- a/intern/guardedalloc/intern/mallocn_guarded_impl.c +++ b/intern/guardedalloc/intern/mallocn_guarded_impl.c @@ -230,6 +230,7 @@ size_t MEM_guarded_allocN_len(const void *vmemh) { if (vmemh) { const MemHead *memh = vmemh; + /* TODO: Add check that memhead is valid. */ memh--; return memh->len; @@ -572,6 +573,37 @@ void *MEM_guarded_calloc_arrayN(size_t len, size_t size, const char *str) return MEM_guarded_callocN(total_size, str); } +void *MEM_guarded_direct_mallocN(const size_t len, const size_t alignment, const char *str) +{ + return MEM_guarded_mallocN_aligned(len, alignment, str); +} + +void *MEM_guarded_direct_callocN(const size_t len, const size_t alignment, const char *str) +{ + void *ptr = MEM_guarded_mallocN_aligned(len, alignment, str); + memset(ptr, 0, len); + return ptr; +} + +void *MEM_guarded_direct_reallocN(void *ptr, + const size_t new_len, + const size_t new_alignment, + const char *str, + const size_t old_len, + const size_t UNUSED(old_alignment)) +{ + void *new_ptr = MEM_guarded_mallocN_aligned(new_len, new_alignment, str); + const size_t bytes_to_copy = new_len < old_len ? new_len : old_len; + memcpy(new_ptr, ptr, bytes_to_copy); + MEM_guarded_freeN(ptr); + return new_ptr; +} + +void MEM_guarded_direct_freeN(void *ptr, const size_t UNUSED(len), const size_t UNUSED(alignment)) +{ + MEM_freeN(ptr); +} + /* Memory statistics print */ typedef struct MemPrintBlock { const char *name; diff --git a/intern/guardedalloc/intern/mallocn_intern.h b/intern/guardedalloc/intern/mallocn_intern.h index f8b16ff6ddf..78c13ebfe3d 100644 --- a/intern/guardedalloc/intern/mallocn_intern.h +++ b/intern/guardedalloc/intern/mallocn_intern.h @@ -118,6 +118,22 @@ void *MEM_lockfree_mallocN_aligned(size_t len, size_t alignment, const char *str) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT ATTR_ALLOC_SIZE(1) ATTR_NONNULL(3); +void *MEM_lockfree_direct_mallocN(size_t len, + size_t alignment, + const char *str) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT + ATTR_ALLOC_SIZE(1) ATTR_NONNULL(3); +void *MEM_lockfree_direct_callocN(size_t len, + size_t alignment, + const char *str) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT + ATTR_ALLOC_SIZE(1) ATTR_NONNULL(3); +void *MEM_lockfree_direct_reallocN(void *ptr, + size_t new_len, + size_t new_alignment, + const char *str, + size_t old_len, + size_t old_alignment) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT + ATTR_ALLOC_SIZE(2) ATTR_NONNULL(4); +void MEM_lockfree_direct_freeN(void *ptr, size_t len, size_t alignment); void MEM_lockfree_printmemlist_pydict(void); void MEM_lockfree_printmemlist(void); void MEM_lockfree_callbackmemlist(void (*func)(void *)); @@ -161,6 +177,22 @@ void *MEM_guarded_mallocN_aligned(size_t len, size_t alignment, const char *str) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT ATTR_ALLOC_SIZE(1) ATTR_NONNULL(3); +void *MEM_guarded_direct_mallocN(size_t len, + size_t alignment, + const char *str) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT + ATTR_ALLOC_SIZE(1) ATTR_NONNULL(3); +void *MEM_guarded_direct_callocN(size_t len, + size_t alignment, + const char *str) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT + ATTR_ALLOC_SIZE(1) ATTR_NONNULL(3); +void *MEM_guarded_direct_reallocN(void *ptr, + size_t new_len, + size_t new_alignment, + const char *str, + size_t old_len, + size_t old_alignment) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT + ATTR_ALLOC_SIZE(2) ATTR_NONNULL(4); +void MEM_guarded_direct_freeN(void *ptr, size_t len, size_t alignment); void MEM_guarded_printmemlist_pydict(void); void MEM_guarded_printmemlist(void); void MEM_guarded_callbackmemlist(void (*func)(void *)); diff --git a/intern/guardedalloc/intern/mallocn_lockfree_impl.c b/intern/guardedalloc/intern/mallocn_lockfree_impl.c index 73912ad07b1..f6deb975ba3 100644 --- a/intern/guardedalloc/intern/mallocn_lockfree_impl.c +++ b/intern/guardedalloc/intern/mallocn_lockfree_impl.c @@ -352,6 +352,66 @@ void *MEM_lockfree_mallocN_aligned(size_t len, size_t alignment, const char *str return NULL; } +void *MEM_lockfree_direct_mallocN(const size_t len, + const size_t alignment, + const char *UNUSED(str)) +{ + if (alignment <= ALIGNED_MALLOC_MINIMUM_ALIGNMENT) { + return malloc(len); + } + return aligned_malloc(len, alignment); +} + +void *MEM_lockfree_direct_callocN(const size_t len, + const size_t alignment, + const char *UNUSED(str)) +{ + if (alignment <= ALIGNED_MALLOC_MINIMUM_ALIGNMENT) { + return calloc(1, len); + } + void *ptr = aligned_malloc(len, alignment); + memset(ptr, 0, len); + return ptr; +} + +void *MEM_lockfree_direct_reallocN(void *ptr, + const size_t new_len, + const size_t new_alignment, + const char *str, + const size_t old_len, + const size_t old_alignment) +{ + const bool new_alignment_is_small = new_alignment <= ALIGNED_MALLOC_MINIMUM_ALIGNMENT; + const bool old_alignment_is_small = old_alignment <= ALIGNED_MALLOC_MINIMUM_ALIGNMENT; + if (new_alignment_is_small && old_alignment_is_small) { + return realloc(ptr, new_len); + } + void *new_ptr = MEM_lockfree_direct_mallocN(new_len, new_alignment, str); + if (ptr == NULL) { + assert(old_len == 0); + return new_ptr; + } + const size_t bytes_to_copy = new_len < old_len ? new_len : old_len; + memcpy(new_ptr, ptr, bytes_to_copy); + if (old_alignment_is_small) { + free(ptr); + } + else { + aligned_free(ptr); + } + return new_ptr; +} + +void MEM_lockfree_direct_freeN(void *ptr, const size_t UNUSED(len), const size_t alignment) +{ + if (alignment <= ALIGNED_MALLOC_MINIMUM_ALIGNMENT) { + free(ptr); + } + else { + aligned_free(ptr); + } +} + void MEM_lockfree_printmemlist_pydict(void) { } diff --git a/source/blender/blenlib/BLI_allocator.hh b/source/blender/blenlib/BLI_allocator.hh index f19292fffd8..f8b6d14e788 100644 --- a/source/blender/blenlib/BLI_allocator.hh +++ b/source/blender/blenlib/BLI_allocator.hh @@ -24,6 +24,7 @@ */ #include +#include #include #include "MEM_guardedalloc.h" @@ -33,11 +34,76 @@ namespace blender { +class GuardedDirectAllocator { + public: + void *direct_allocate(const size_t size, const size_t alignment, const char *name) + { + return MEM_direct_mallocN(size, alignment, name); + } + + void *direct_allocate_zero(const size_t size, const size_t alignment, const char *name) + { + return MEM_direct_callocN(size, alignment, name); + } + + void *direct_reallocate(void *ptr, + const size_t new_size, + const size_t new_alignment, + const char *name, + const size_t old_size, + const size_t old_alignment) + { + return MEM_direct_reallocN(ptr, new_size, new_alignment, name, old_size, old_alignment); + } + + void direct_deallocate(void *ptr, const size_t size, const size_t alignment) + { + MEM_direct_freeN(ptr, size, alignment); + } +}; + +namespace allocator_detail { +template class DirectAllocatorInterfaceFromSimple { + public: + void *direct_allocate(const size_t size, const size_t alignment, const char *name) + { + return static_cast(this)->allocate(size, alignment, name); + } + + void *direct_allocate_zero(const size_t size, const size_t alignment, const char *name) + { + void *ptr = static_cast(this)->allocate(size, alignment, name); + memset(ptr, 0, size); + return ptr; + } + + void *direct_reallocate(void *ptr, + const size_t new_size, + const size_t new_alignment, + const char *name, + const size_t old_size, + const size_t old_alignment) + { + void *new_ptr = static_cast(this)->allocate(new_size, new_alignment, name); + const size_t bytes_to_copy = std::min(old_size, new_size); + memcpy(new_ptr, ptr, bytes_to_copy); + static_cast(this)->deallocate(ptr); + return new_ptr; + } + + void direct_deallocate(void *ptr, const size_t UNUSED(size), const size_t UNUSED(alignment)) + { + static_cast(this)->deallocate(ptr); + } +}; +} // namespace allocator_detail + /** * Use Blender's guarded allocator (aka MEM_*). This should always be used except there is a * good reason not to use it. */ -class GuardedAllocator { +class GuardedAllocator + : public allocator_detail::DirectAllocatorInterfaceFromSimple { public: void *allocate(size_t size, size_t alignment, const char *name) { @@ -56,7 +122,7 @@ class GuardedAllocator { * used. This can be the case when the allocated memory might live longer than Blender's * allocator. For example, when the memory is owned by a static variable. */ -class RawAllocator { +class RawAllocator : public allocator_detail::DirectAllocatorInterfaceFromSimple { private: struct MemHead { int offset; diff --git a/source/blender/blenlib/BLI_memory_utils.hh b/source/blender/blenlib/BLI_memory_utils.hh index 940542c9f1d..5678b8a4a25 100644 --- a/source/blender/blenlib/BLI_memory_utils.hh +++ b/source/blender/blenlib/BLI_memory_utils.hh @@ -495,6 +495,21 @@ inline constexpr bool is_span_convertible_pointer_v = template inline constexpr bool is_same_any_v = (std::is_same_v || ...); +template +inline constexpr bool is_trivially_relocatable_v = + std::is_trivially_copyable_v &&std::is_trivially_destructible_v; + +template inline bool can_zero_initialize_on_fill(const T &value) +{ + if constexpr (std::is_trivially_copyable_v) { + const std::byte *value_ptr = static_cast(&value); + /* TODO */ + } + else { + return false; + } +} + /** * Inline buffers for small-object-optimization should be disable by default. Otherwise we might * get large unexpected allocations on the stack. diff --git a/source/blender/blenlib/BLI_vector.hh b/source/blender/blenlib/BLI_vector.hh index c23d846d277..d88b5e4f562 100644 --- a/source/blender/blenlib/BLI_vector.hh +++ b/source/blender/blenlib/BLI_vector.hh @@ -60,7 +60,7 @@ template< * The allocator used by this vector. Should rarely be changed, except when you don't want that * MEM_* is used internally. */ - typename Allocator = GuardedAllocator> + typename Allocator = GuardedDirectAllocator> class Vector { public: using value_type = T; @@ -84,7 +84,7 @@ class Vector { T *capacity_end_; /** Used for allocations when the inline buffer is too small. */ - BLI_NO_UNIQUE_ADDRESS Allocator allocator_; + BLI_NO_UNIQUE_ADDRESS GuardedDirectAllocator allocator_; /** A placeholder buffer that will remain uninitialized until it is used. */ BLI_NO_UNIQUE_ADDRESS TypedBuffer inline_buffer_; @@ -244,7 +244,7 @@ class Vector { /* Copy from inline buffer to newly allocated buffer. */ const int64_t capacity = size; begin_ = static_cast( - allocator_.allocate(sizeof(T) * static_cast(capacity), alignof(T), AT)); + allocator_.direct_allocate(sizeof(T) * static_cast(capacity), alignof(T), AT)); capacity_end_ = begin_ + capacity; uninitialized_relocate_n(other.begin_, size, begin_); end_ = begin_ + size; @@ -268,7 +268,7 @@ class Vector { { destruct_n(begin_, this->size()); if (!this->is_inline()) { - allocator_.deallocate(begin_); + allocator_.direct_deallocate(begin_, this->capacity(), alignof(T)); } } @@ -337,10 +337,10 @@ class Vector { * This won't necessarily make an allocation when min_capacity is small. * The actual size of the vector does not change. */ - void reserve(const int64_t min_capacity) + void reserve(const int64_t min_capacity, const bool zero_new_capacity = false) { if (min_capacity > this->capacity()) { - this->realloc_to_at_least(min_capacity); + this->realloc_to_at_least(min_capacity, zero_new_capacity); } } @@ -415,7 +415,7 @@ class Vector { { destruct_n(begin_, this->size()); if (!this->is_inline()) { - allocator_.deallocate(begin_); + allocator_.direct_deallocate(begin_, this->capacity(), alignof(T)); } begin_ = inline_buffer_; @@ -949,38 +949,63 @@ class Vector { void ensure_space_for_one() { if (UNLIKELY(end_ >= capacity_end_)) { - this->realloc_to_at_least(this->size() + 1); + this->realloc_to_at_least(this->size() + 1, false); } } - BLI_NOINLINE void realloc_to_at_least(const int64_t min_capacity) - { - if (this->capacity() >= min_capacity) { - return; - } - - /* At least double the size of the previous allocation. Otherwise consecutive calls to grow can - * cause a reallocation every time even though min_capacity only increments. */ - const int64_t min_new_capacity = this->capacity() * 2; - - const int64_t new_capacity = std::max(min_capacity, min_new_capacity); - const int64_t size = this->size(); - - T *new_array = static_cast( - allocator_.allocate(static_cast(new_capacity) * sizeof(T), alignof(T), AT)); - try { - uninitialized_relocate_n(begin_, size, new_array); + BLI_NOINLINE void realloc_to_at_least(const int64_t min_capacity_, const bool zero_new_capacity) + { + const size_t min_capacity = static_cast(min_capacity_); + const size_t old_capacity = static_cast(this->capacity()); + BLI_assert(min_capacity > old_capacity); + + /* At least increase the last allocation by a factor that is greater than 1. Otherwise + * consecutive calls to #reserve can cause a reallocation every time even though min_capacity + * only increases linearly. */ + const size_t min_new_capacity = std::max(old_capacity * 3 / 2, 4); + const size_t new_capacity = std::max(min_capacity, min_new_capacity); + const size_t size = this->size(); + const size_t old_capacity_in_bytes = old_capacity * sizeof(T); + const size_t new_capacity_in_bytes = new_capacity * sizeof(T); + const bool was_inline = this->is_inline(); + const bool was_allocated = !was_inline; + + bool zero_new_capacity_manually = false; + if (is_trivially_relocatable_v && was_allocated) { + begin_ = static_cast(allocator_.direct_reallocate( + begin_, new_capacity_in_bytes, alignof(T), __func__, old_capacity_in_bytes, alignof(T))); + zero_new_capacity_manually = zero_new_capacity; } - catch (...) { - allocator_.deallocate(new_array); - throw; + else { + T *new_array; + if (zero_new_capacity && was_inline) { + new_array = static_cast( + allocator_.direct_allocate_zero(new_capacity_in_bytes, alignof(T), __func__)); + } + else { + new_array = static_cast( + allocator_.direct_allocate(new_capacity_in_bytes, alignof(T), __func__)); + zero_new_capacity_manually = zero_new_capacity; + } + try { + uninitialized_relocate_n(begin_, size, new_array); + } + catch (...) { + allocator_.direct_deallocate(new_array, new_capacity_in_bytes, alignof(T)); + throw; + } + if (was_allocated) { + allocator_.direct_deallocate(begin_, old_capacity_in_bytes, alignof(T)); + } + begin_ = new_array; } - if (!this->is_inline()) { - allocator_.deallocate(begin_); + if (zero_new_capacity_manually) { + memset(static_cast(begin_ + old_capacity), + 0, + new_capacity_in_bytes - old_capacity_in_bytes); } - begin_ = new_array; end_ = begin_ + size; capacity_end_ = begin_ + new_capacity; }