diff --git a/include/runtime/arena.h b/include/runtime/arena.h index fc72155fb..3f71445e4 100644 --- a/include/runtime/arena.h +++ b/include/runtime/arena.h @@ -1,38 +1,49 @@ #ifndef ARENA_H #define ARENA_H +#include #include +#include #include +#include #include "runtime/alloc.h" extern "C" { +size_t const HYPERBLOCK_SIZE = (size_t)BLOCK_SIZE * 1024 * 1024; + // An arena can be used to allocate objects that can then be deallocated all at // once. class arena { public: arena(char id) - : allocation_semispace_id(id) { } + : allocation_semispace_id(id) { + initialize_semispace(); + } // Allocates the requested number of bytes as a contiguous region and returns a // pointer to the first allocated byte. - // If called with requested size greater than the maximun single allocation - // size, the space is allocated in a general (not garbage collected pool). void *kore_arena_alloc(size_t requested); // Returns the address of the first byte that belongs in the given arena. // Returns 0 if nothing has been allocated ever in that arena. - char *arena_start_ptr() const; + char *arena_start_ptr() const { + return current_addr_ptr ? current_addr_ptr + sizeof(memory_block_header) + : nullptr; + } // Returns a pointer to a location holding the address of last allocated // byte in the given arena plus 1. // This address is 0 if nothing has been allocated ever in that arena. - char **arena_end_ptr(); + char **arena_end_ptr() { return &allocation_ptr; } // return the total number of allocatable bytes currently in the arena in its // active semispace. - size_t arena_size() const; + size_t arena_size() const { + update_num_blocks(); + return BLOCK_SIZE * std::max(num_blocks, num_collection_blocks); + } // Clears the current allocation space by setting its start back to its first // block. It is used during garbage collection to effectively collect all of the @@ -41,15 +52,18 @@ class arena { // Resizes the last allocation as long as the resize does not require a new // block allocation. - // Returns the address of the byte following the last newlly allocated byte when - // the resize succeeds, returns 0 otherwise. - void *arena_resize_last_alloc(ssize_t increase); + // Returns the address of the byte following the last newlly allocated byte. + void *arena_resize_last_alloc(ssize_t increase) { + return (allocation_ptr += increase); + } // Returns the given arena's current collection semispace ID. // Each arena has 2 semispace IDs one equal to the arena ID and the other equal // to the 1's complement of the arena ID. At any time one of these semispaces // is used for allocation and the other is used for collection. - char get_arena_collection_semispace_id() const; + char get_arena_collection_semispace_id() const { + return ~allocation_semispace_id; + } // Exchanges the current allocation and collection semispaces and clears the new // current allocation semispace by setting its start back to its first block. @@ -61,7 +75,7 @@ class arena { // by the blocks of that arena. This difference will include blocks containing // sentinel bytes. Undefined behavior will result if the pointers belong to // different arenas. - static ssize_t ptr_diff(char *ptr1, char *ptr2); + static ssize_t ptr_diff(char *ptr1, char *ptr2) { return ptr1 - ptr2; } // Given a starting pointer to an address allocated in an arena and a size in // bytes, this function returns a pointer to an address allocated in the @@ -72,7 +86,10 @@ class arena { // 3rd argument: the address of last allocated byte in the arena plus 1 // Return value: the address allocated in the arena after size bytes from the // starting pointer, or 0 if this is equal to the 3rd argument. - static char *move_ptr(char *ptr, size_t size, char const *arena_end_ptr); + static char *move_ptr(char *ptr, size_t size, char const *arena_end_ptr) { + char *next_ptr = ptr + size; + return (next_ptr == arena_end_ptr) ? 0 : next_ptr; + } // Returns the ID of the semispace where the given address was allocated. // The behavior is undefined if called with an address that has not been @@ -80,34 +97,59 @@ class arena { static char get_arena_semispace_id_of_object(void *ptr); private: - struct memory_block_header { - char *next_block; + union memory_block_header { + // + // Currently the header just holds the semispace id. But we need it to be a + // multiple of sizeof(char*) for alignment purposes so we add a dummy char*. + // char semispace; + char *alignment_dummy; }; - void fresh_block(); - static memory_block_header *mem_block_header(void *ptr); + // + // We update the number of 1MB blocks actually written to, only when we need this value, + // or before a garbage collection rather than trying to determine when we write to a fresh block. + // + void update_num_blocks() const { + // + // Calculate how many 1M blocks of the current arena we used. + // + size_t num_used_blocks + = (allocation_ptr - current_addr_ptr - 1) / BLOCK_SIZE + 1; + if (num_used_blocks > num_blocks) + num_blocks = num_used_blocks; + } + + void initialize_semispace(); - // helper function for `kore_arena_alloc`. Do not call directly. - void *do_alloc_slow(size_t requested); + static memory_block_header *mem_block_header(void *ptr) { + uintptr_t address = reinterpret_cast(ptr); + return reinterpret_cast( + (address - 1) & ~(HYPERBLOCK_SIZE - 1)); + } - char *first_block; // beginning of first block - char *block; // where allocations are being made in current block - char *block_start; // start of current block - char *block_end; // 1 past end of current block - char *first_collection_block; // beginning of other semispace - size_t num_blocks; // number of blocks in current semispace - size_t num_collection_blocks; // number of blocks in other semispace + // + // Current semispace where allocations are being made. + // + char *current_addr_ptr; // pointer to start of current address space + char *allocation_ptr; // next available location in current semispace + char *tripwire; // allocating past this triggers slow allocation + mutable size_t + num_blocks; // notional number of BLOCK_SIZE blocks in current semispace char allocation_semispace_id; // id of current semispace + // + // Semispace where allocations will be made during and after garbage collect. + // + char *collection_addr_ptr + = nullptr; // pointer to start of collection address space + size_t num_collection_blocks + = 0; // notional number of BLOCK_SIZE blocks in collection semispace }; // Macro to define a new arena with the given ID. Supports IDs ranging from 0 to // 127. #define REGISTER_ARENA(name, id) static thread_local arena name(id) -#define MEM_BLOCK_START(ptr) \ - ((char *)(((uintptr_t)(ptr)-1) & ~(BLOCK_SIZE - 1))) - #ifdef __MACH__ // // thread_local disabled for Apple @@ -120,16 +162,51 @@ extern thread_local bool time_for_collection; size_t get_gc_threshold(); inline void *arena::kore_arena_alloc(size_t requested) { - if (block + requested > block_end) { - return do_alloc_slow(requested); + if (allocation_ptr + requested >= tripwire) { + // + // We got close to or past the last location accessed in this address range so far, + // depending on the requested size and tripwire setting. This triggers a garbage + // collect when allowed. + // + time_for_collection = true; + tripwire = current_addr_ptr + + HYPERBLOCK_SIZE; // won't trigger again until arena swap } - void *result = block; - block += requested; + void *result = allocation_ptr; + allocation_ptr += requested; MEM_LOG( - "Allocation at %p (size %zd), next alloc at %p (if it fits)\n", result, - requested, block); + "Allocation at %p (size %zd), next alloc at %p\n", result, requested, + block); return result; } + +inline void arena::arena_clear() { + // + // We set the allocation pointer to the first available address. + // + allocation_ptr = arena_start_ptr(); + // + // If the number of blocks we've touched is >= threshold, we want to trigger + // a garbage collection if we get within 1 block of the end of this area. + // Otherwise we only want to generate a garbage collect if we allocate off the + // end of this area. + // + tripwire = current_addr_ptr + + (num_blocks - (num_blocks >= get_gc_threshold())) * BLOCK_SIZE; } +inline void arena::arena_swap_and_clear() { + update_num_blocks(); // so we save the correct number of touched blocks + std::swap(current_addr_ptr, collection_addr_ptr); + std::swap(num_blocks, num_collection_blocks); + allocation_semispace_id = ~allocation_semispace_id; + if (current_addr_ptr == nullptr) { + // + // The other semispace hasn't be initialized yet. + // + initialize_semispace(); + } else + arena_clear(); +} +} #endif // ARENA_H diff --git a/runtime/alloc/arena.cpp b/runtime/alloc/arena.cpp index c4384642e..9fbde8bf9 100644 --- a/runtime/alloc/arena.cpp +++ b/runtime/alloc/arena.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -12,226 +11,58 @@ extern size_t const VAR_BLOCK_SIZE = BLOCK_SIZE; -__attribute__((always_inline)) arena::memory_block_header * -arena::mem_block_header(void *ptr) { - // NOLINTNEXTLINE(*-reinterpret-cast) - return reinterpret_cast( - ((uintptr_t)(ptr)-1) & ~(BLOCK_SIZE - 1)); -} - -__attribute__((always_inline)) char -arena::get_arena_collection_semispace_id() const { - return ~allocation_semispace_id; -} - __attribute__((always_inline)) char arena::get_arena_semispace_id_of_object(void *ptr) { return mem_block_header(ptr)->semispace; } -// -// We will reserve enough address space for 1 million 1MB blocks. Might want to increase this on a > 1TB server. -// -size_t const HYPERBLOCK_SIZE = (size_t)BLOCK_SIZE * 1024 * 1024; - -static void *megabyte_malloc() { - // - // Return pointer to a BLOCK_SIZE chunk of memory with BLOCK_SIZE alignment. - // - static thread_local char *currentblock_ptr - = nullptr; // char* rather than void* to permit pointer arithmetic - if (currentblock_ptr) { - // - // We expect an page fault due to not being able to map physical memory to this block or the - // process to be killed by the OOM killer long before we run off the end of our address space. - // - currentblock_ptr += BLOCK_SIZE; - } else { - // - // First call - need to reserve the address space. - // - size_t request = HYPERBLOCK_SIZE; - void *addr = mmap( - nullptr, // let OS choose the address - request, // Linux and MacOS both allow up to 64TB - PROT_READ | PROT_WRITE, // read, write but not execute - MAP_ANONYMOUS | MAP_PRIVATE - | MAP_NORESERVE, // allocate address space only - -1, // no file backing - 0); // no offset - if (addr == MAP_FAILED) { - perror("mmap()"); - abort(); - } - // - // We ask for one block worth of address space less than we allocated so alignment will always succeed. - // We don't worry about unused address space either side of our aligned address space because there will be no - // memory mapped to it. - // - currentblock_ptr = reinterpret_cast( - std::align(BLOCK_SIZE, HYPERBLOCK_SIZE - BLOCK_SIZE, addr, request)); - } - return currentblock_ptr; -} - -#ifdef __MACH__ -// -// thread_local disabled for Apple -// -bool time_for_collection; -#else -thread_local bool time_for_collection; -#endif - -void arena::fresh_block() { - char *next_block = nullptr; - if (block_start == nullptr) { - next_block = (char *)megabyte_malloc(); - first_block = next_block; - auto *next_header = (arena::memory_block_header *)next_block; - next_header->next_block = nullptr; - next_header->semispace = allocation_semispace_id; - num_blocks++; - } else { - next_block = *(char **)block_start; - if (block != block_end) { - if (block_end - block == 8) { - *(uint64_t *)block = NOT_YOUNG_OBJECT_BIT; // 8 bit sentinel value - } else { - *(uint64_t *)block - = block_end - block - 8; // 16-bit or more sentinel value - } - } - if (!next_block) { - MEM_LOG( - "Allocating new block for the first time in arena %d\n", - allocation_semispace_id); - next_block = (char *)megabyte_malloc(); - *(char **)block_start = next_block; - auto *next_header = (arena::memory_block_header *)next_block; - next_header->next_block = nullptr; - next_header->semispace = allocation_semispace_id; - num_blocks++; - time_for_collection = true; - } - } - if (!*(char **)next_block && num_blocks >= get_gc_threshold()) { - time_for_collection = true; - } - block = next_block + sizeof(arena::memory_block_header); - block_start = next_block; - block_end = next_block + BLOCK_SIZE; - MEM_LOG( - "New block at %p (remaining %zd)\n", block, - BLOCK_SIZE - sizeof(arena::memory_block_header)); -} - #ifdef __MACH__ // // thread_local disabled for Apple // +bool time_for_collection = false; bool gc_enabled = true; #else +thread_local bool time_for_collection = false; thread_local bool gc_enabled = true; #endif -__attribute__((noinline)) void *arena::do_alloc_slow(size_t requested) { - MEM_LOG( - "Block at %p too small, %zd remaining but %zd needed\n", block, - block_end - block, requested); - if (requested > BLOCK_SIZE - sizeof(arena::memory_block_header)) { - return malloc(requested); - } - fresh_block(); - void *result = block; - block += requested; - MEM_LOG( - "Allocation at %p (size %zd), next alloc at %p (if it fits)\n", result, - requested, block); - return result; -} - -__attribute__((always_inline)) void * -arena::arena_resize_last_alloc(ssize_t increase) { - if (block + increase <= block_end) { - block += increase; - return block; - } - return nullptr; -} - -__attribute__((always_inline)) void arena::arena_swap_and_clear() { - char *tmp = first_block; - first_block = first_collection_block; - first_collection_block = tmp; - size_t tmp2 = num_blocks; - num_blocks = num_collection_blocks; - num_collection_blocks = tmp2; - allocation_semispace_id = ~allocation_semispace_id; - arena_clear(); -} - -__attribute__((always_inline)) void arena::arena_clear() { - block = first_block ? first_block + sizeof(arena::memory_block_header) - : nullptr; - block_start = first_block; - block_end = first_block ? first_block + BLOCK_SIZE : nullptr; -} - -__attribute__((always_inline)) char *arena::arena_start_ptr() const { - return first_block ? first_block + sizeof(arena::memory_block_header) - : nullptr; -} - -__attribute__((always_inline)) char **arena::arena_end_ptr() { - return █ -} - -char *arena::move_ptr(char *ptr, size_t size, char const *arena_end_ptr) { - char *next_ptr = ptr + size; - if (next_ptr == arena_end_ptr) { - return nullptr; - } - if (next_ptr != MEM_BLOCK_START(ptr) + BLOCK_SIZE) { - return next_ptr; - } - char *next_block = *(char **)MEM_BLOCK_START(ptr); - if (!next_block) { - return nullptr; - } - return next_block + sizeof(arena::memory_block_header); -} - -ssize_t arena::ptr_diff(char *ptr1, char *ptr2) { - if (MEM_BLOCK_START(ptr1) == MEM_BLOCK_START(ptr2)) { - return ptr1 - ptr2; - } - arena::memory_block_header *hdr = mem_block_header(ptr2); - ssize_t result = 0; - while (hdr != mem_block_header(ptr1) && hdr->next_block) { - if (ptr2) { - result += ((char *)hdr + BLOCK_SIZE) - ptr2; - ptr2 = nullptr; - } else { - result += (BLOCK_SIZE - sizeof(arena::memory_block_header)); - } - hdr = (arena::memory_block_header *)hdr->next_block; +void arena::initialize_semispace() { + // + // Current semispace is uninitialized so mmap() a big chuck of address space. + // + size_t request = 2 * HYPERBLOCK_SIZE; + void *addr = mmap( + nullptr, // let OS choose the address + request, // Linux and MacOS both allow up to 64TB + PROT_READ | PROT_WRITE, // read, write but not execute + MAP_ANONYMOUS | MAP_PRIVATE + | MAP_NORESERVE, // allocate address space only + -1, // no file backing + 0); // no offset + if (addr == MAP_FAILED) { + perror("mmap()"); + abort(); } - if (hdr == mem_block_header(ptr1)) { - result += ptr1 - (char *)(hdr + 1); - return result; - } // reached the end of the arena and didn't find the block - // it's possible that the result should be negative, in which - // case the block will have been prior to the block we started - // at. To handle this, we recurse with reversed arguments and - // negate the result. This means that the code might not - // terminate if the two pointers do not belong to the same - // arena. - return -ptr_diff(ptr2, ptr1); -} - -size_t arena::arena_size() const { - return (num_blocks > num_collection_blocks ? num_blocks - : num_collection_blocks) - * (BLOCK_SIZE - sizeof(arena::memory_block_header)); + // + // We allocated 2 * HYPERBLOCK_SIZE worth of address space but we're only going to use 1, aligned on a + // HYPERBLOCK_SIZE boundry. This is so we can get the start of the hyperblock by masking any address within it. + // We don't worry about unused address space either side of our aligned address space because there will be no + // memory mapped to it. + // + current_addr_ptr = reinterpret_cast( + std::align(HYPERBLOCK_SIZE, HYPERBLOCK_SIZE, addr, request)); + // + // We put a memory_block_header at the beginning so we can identify the semispace a pointer belongs to + // id by masking off the low bits to access this memory_block_header. + // + auto *header = reinterpret_cast(current_addr_ptr); + header->semispace = allocation_semispace_id; + allocation_ptr = current_addr_ptr + sizeof(arena::memory_block_header); + // + // We set the tripwire for this space so we get trigger a garbage collection when we pass BLOCK_SIZE of memory + // allocated from this space. + // + tripwire = current_addr_ptr + BLOCK_SIZE; + num_blocks = 1; } diff --git a/runtime/collect/collect.cpp b/runtime/collect/collect.cpp index cc596d205..c439fdd0e 100644 --- a/runtime/collect/collect.cpp +++ b/runtime/collect/collect.cpp @@ -316,22 +316,7 @@ void kore_collect( if (collect_old || !previous_oldspace_alloc_ptr) { scan_ptr = oldspace_ptr(); } else { - if (MEM_BLOCK_START(previous_oldspace_alloc_ptr + 1) - == previous_oldspace_alloc_ptr) { - // this means that the previous oldspace allocation pointer points to an - // address that is megabyte-aligned. This can only happen if we have just - // filled up a block but have not yet allocated the next block in the - // sequence at the start of the collection cycle. This means that the - // allocation pointer is invalid and does not actually point to the next - // address that would have been allocated at, according to the logic of - // kore_arena_alloc, which will have allocated a fresh memory block and put - // the allocation at the start of it. Thus, we use arena::move_ptr with a size - // of zero to adjust and get the true address of the allocation. - scan_ptr - = arena::move_ptr(previous_oldspace_alloc_ptr, 0, *old_alloc_ptr()); - } else { - scan_ptr = previous_oldspace_alloc_ptr; - } + scan_ptr = previous_oldspace_alloc_ptr; } if (scan_ptr != *old_alloc_ptr()) { MEM_LOG("Evacuating old generation\n");