diff --git a/include/runtime/arena.h b/include/runtime/arena.h
index fc72155fb..3f71445e4 100644
--- a/include/runtime/arena.h
+++ b/include/runtime/arena.h
@@ -1,38 +1,49 @@
 #ifndef ARENA_H
 #define ARENA_H
 
+#include <algorithm>
 #include <cstddef>
+#include <cstdint>
 #include <sys/types.h>
+#include <utility>
 
 #include "runtime/alloc.h"
 
 extern "C" {
 
+size_t const HYPERBLOCK_SIZE = (size_t)BLOCK_SIZE * 1024 * 1024;
+
 // An arena can be used to allocate objects that can then be deallocated all at
 // once.
 class arena {
 public:
   arena(char id)
-      : allocation_semispace_id(id) { }
+      : allocation_semispace_id(id) {
+    initialize_semispace();
+  }
 
   // Allocates the requested number of bytes as a contiguous region and returns a
   // pointer to the first allocated byte.
-  // If called with requested size greater than the maximun single allocation
-  // size, the space is allocated in a general (not garbage collected pool).
   void *kore_arena_alloc(size_t requested);
 
   // Returns the address of the first byte that belongs in the given arena.
   // Returns 0 if nothing has been allocated ever in that arena.
-  char *arena_start_ptr() const;
+  char *arena_start_ptr() const {
+    return current_addr_ptr ? current_addr_ptr + sizeof(memory_block_header)
+                            : nullptr;
+  }
 
   // Returns a pointer to a location holding the address of last allocated
   // byte in the given arena plus 1.
   // This address is 0 if nothing has been allocated ever in that arena.
-  char **arena_end_ptr();
+  char **arena_end_ptr() { return &allocation_ptr; }
 
   // return the total number of allocatable bytes currently in the arena in its
   // active semispace.
-  size_t arena_size() const;
+  size_t arena_size() const {
+    update_num_blocks();
+    return BLOCK_SIZE * std::max(num_blocks, num_collection_blocks);
+  }
 
   // Clears the current allocation space by setting its start back to its first
   // block. It is used during garbage collection to effectively collect all of the
@@ -41,15 +52,18 @@ class arena {
 
   // Resizes the last allocation as long as the resize does not require a new
   // block allocation.
-  // Returns the address of the byte following the last newlly allocated byte when
-  // the resize succeeds, returns 0 otherwise.
-  void *arena_resize_last_alloc(ssize_t increase);
+  // Returns the address of the byte following the last newlly allocated byte.
+  void *arena_resize_last_alloc(ssize_t increase) {
+    return (allocation_ptr += increase);
+  }
 
   // Returns the given arena's current collection semispace ID.
   // Each arena has 2 semispace IDs one equal to the arena ID and the other equal
   // to the 1's complement of the arena ID. At any time one of these semispaces
   // is used for allocation and the other is used for collection.
-  char get_arena_collection_semispace_id() const;
+  char get_arena_collection_semispace_id() const {
+    return ~allocation_semispace_id;
+  }
 
   // Exchanges the current allocation and collection semispaces and clears the new
   // current allocation semispace by setting its start back to its first block.
@@ -61,7 +75,7 @@ class arena {
   // by the blocks of that arena. This difference will include blocks containing
   // sentinel bytes. Undefined behavior will result if the pointers belong to
   // different arenas.
-  static ssize_t ptr_diff(char *ptr1, char *ptr2);
+  static ssize_t ptr_diff(char *ptr1, char *ptr2) { return ptr1 - ptr2; }
 
   // Given a starting pointer to an address allocated in an arena and a size in
   // bytes, this function returns a pointer to an address allocated in the
@@ -72,7 +86,10 @@ class arena {
   // 3rd argument: the address of last allocated byte in the arena plus 1
   // Return value: the address allocated in the arena after size bytes from the
   //               starting pointer, or 0 if this is equal to the 3rd argument.
-  static char *move_ptr(char *ptr, size_t size, char const *arena_end_ptr);
+  static char *move_ptr(char *ptr, size_t size, char const *arena_end_ptr) {
+    char *next_ptr = ptr + size;
+    return (next_ptr == arena_end_ptr) ? 0 : next_ptr;
+  }
 
   // Returns the ID of the semispace where the given address was allocated.
   // The behavior is undefined if called with an address that has not been
@@ -80,34 +97,59 @@ class arena {
   static char get_arena_semispace_id_of_object(void *ptr);
 
 private:
-  struct memory_block_header {
-    char *next_block;
+  union memory_block_header {
+    //
+    //	Currently the header just holds the semispace id. But we need it to be a
+    //	multiple of sizeof(char*) for alignment purposes so we add a dummy char*.
+    //
     char semispace;
+    char *alignment_dummy;
   };
 
-  void fresh_block();
-  static memory_block_header *mem_block_header(void *ptr);
+  //
+  //	We update the number of 1MB blocks actually written to, only when we need this value,
+  //	or before a garbage collection rather than trying to determine when we write to a fresh block.
+  //
+  void update_num_blocks() const {
+    //
+    //	Calculate how many 1M blocks of the current arena we used.
+    //
+    size_t num_used_blocks
+        = (allocation_ptr - current_addr_ptr - 1) / BLOCK_SIZE + 1;
+    if (num_used_blocks > num_blocks)
+      num_blocks = num_used_blocks;
+  }
+
+  void initialize_semispace();
 
-  // helper function for `kore_arena_alloc`. Do not call directly.
-  void *do_alloc_slow(size_t requested);
+  static memory_block_header *mem_block_header(void *ptr) {
+    uintptr_t address = reinterpret_cast<uintptr_t>(ptr);
+    return reinterpret_cast<arena::memory_block_header *>(
+        (address - 1) & ~(HYPERBLOCK_SIZE - 1));
+  }
 
-  char *first_block; // beginning of first block
-  char *block; // where allocations are being made in current block
-  char *block_start; // start of current block
-  char *block_end; // 1 past end of current block
-  char *first_collection_block; // beginning of other semispace
-  size_t num_blocks; // number of blocks in current semispace
-  size_t num_collection_blocks; // number of blocks in other semispace
+  //
+  //	Current semispace where allocations are being made.
+  //
+  char *current_addr_ptr; // pointer to start of current address space
+  char *allocation_ptr; // next available location in current semispace
+  char *tripwire; // allocating past this triggers slow allocation
+  mutable size_t
+      num_blocks; // notional number of BLOCK_SIZE blocks in current semispace
   char allocation_semispace_id; // id of current semispace
+  //
+  //	Semispace where allocations will be made during and after garbage collect.
+  //
+  char *collection_addr_ptr
+      = nullptr; // pointer to start of collection address space
+  size_t num_collection_blocks
+      = 0; // notional number of BLOCK_SIZE blocks in collection semispace
 };
 
 // Macro to define a new arena with the given ID. Supports IDs ranging from 0 to
 // 127.
 #define REGISTER_ARENA(name, id) static thread_local arena name(id)
 
-#define MEM_BLOCK_START(ptr)                                                   \
-  ((char *)(((uintptr_t)(ptr)-1) & ~(BLOCK_SIZE - 1)))
-
 #ifdef __MACH__
 //
 //	thread_local disabled for Apple
@@ -120,16 +162,51 @@ extern thread_local bool time_for_collection;
 size_t get_gc_threshold();
 
 inline void *arena::kore_arena_alloc(size_t requested) {
-  if (block + requested > block_end) {
-    return do_alloc_slow(requested);
+  if (allocation_ptr + requested >= tripwire) {
+    //
+    //	We got close to or past the last location accessed in this address range so far,
+    //	depending on the requested size and tripwire setting. This triggers a garbage
+    //	collect when allowed.
+    //
+    time_for_collection = true;
+    tripwire = current_addr_ptr
+               + HYPERBLOCK_SIZE; // won't trigger again until arena swap
   }
-  void *result = block;
-  block += requested;
+  void *result = allocation_ptr;
+  allocation_ptr += requested;
   MEM_LOG(
-      "Allocation at %p (size %zd), next alloc at %p (if it fits)\n", result,
-      requested, block);
+      "Allocation at %p (size %zd), next alloc at %p\n", result, requested,
+      block);
   return result;
 }
+
+inline void arena::arena_clear() {
+  //
+  //	We set the allocation pointer to the first available address.
+  //
+  allocation_ptr = arena_start_ptr();
+  //
+  //	If the number of blocks we've touched is >= threshold, we want to trigger
+  //	a garbage collection if we get within 1 block of the end of this area.
+  //	Otherwise we only want to generate a garbage collect if we allocate off the
+  //	end of this area.
+  //
+  tripwire = current_addr_ptr
+             + (num_blocks - (num_blocks >= get_gc_threshold())) * BLOCK_SIZE;
 }
 
+inline void arena::arena_swap_and_clear() {
+  update_num_blocks(); // so we save the correct number of touched blocks
+  std::swap(current_addr_ptr, collection_addr_ptr);
+  std::swap(num_blocks, num_collection_blocks);
+  allocation_semispace_id = ~allocation_semispace_id;
+  if (current_addr_ptr == nullptr) {
+    //
+    //	The other semispace hasn't be initialized yet.
+    //
+    initialize_semispace();
+  } else
+    arena_clear();
+}
+}
 #endif // ARENA_H
diff --git a/runtime/alloc/arena.cpp b/runtime/alloc/arena.cpp
index c4384642e..9fbde8bf9 100644
--- a/runtime/alloc/arena.cpp
+++ b/runtime/alloc/arena.cpp
@@ -1,4 +1,3 @@
-
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
@@ -12,226 +11,58 @@
 
 extern size_t const VAR_BLOCK_SIZE = BLOCK_SIZE;
 
-__attribute__((always_inline)) arena::memory_block_header *
-arena::mem_block_header(void *ptr) {
-  // NOLINTNEXTLINE(*-reinterpret-cast)
-  return reinterpret_cast<arena::memory_block_header *>(
-      ((uintptr_t)(ptr)-1) & ~(BLOCK_SIZE - 1));
-}
-
-__attribute__((always_inline)) char
-arena::get_arena_collection_semispace_id() const {
-  return ~allocation_semispace_id;
-}
-
 __attribute__((always_inline)) char
 arena::get_arena_semispace_id_of_object(void *ptr) {
   return mem_block_header(ptr)->semispace;
 }
 
-//
-//	We will reserve enough address space for 1 million 1MB blocks. Might want to increase this on a > 1TB server.
-//
-size_t const HYPERBLOCK_SIZE = (size_t)BLOCK_SIZE * 1024 * 1024;
-
-static void *megabyte_malloc() {
-  //
-  //	Return pointer to a BLOCK_SIZE chunk of memory with BLOCK_SIZE alignment.
-  //
-  static thread_local char *currentblock_ptr
-      = nullptr; // char* rather than void* to permit pointer arithmetic
-  if (currentblock_ptr) {
-    //
-    //	We expect an page fault due to not being able to map physical memory to this block or the
-    //	process to be killed by the OOM killer long before we run off the end of our address space.
-    //
-    currentblock_ptr += BLOCK_SIZE;
-  } else {
-    //
-    //	First call - need to reserve the address space.
-    //
-    size_t request = HYPERBLOCK_SIZE;
-    void *addr = mmap(
-        nullptr, // let OS choose the address
-        request, // Linux and MacOS both allow up to 64TB
-        PROT_READ | PROT_WRITE, // read, write but not execute
-        MAP_ANONYMOUS | MAP_PRIVATE
-            | MAP_NORESERVE, // allocate address space only
-        -1, // no file backing
-        0); // no offset
-    if (addr == MAP_FAILED) {
-      perror("mmap()");
-      abort();
-    }
-    //
-    //	We ask for one block worth of address space less than we allocated so alignment will always succeed.
-    //	We don't worry about unused address space either side of our aligned address space because there will be no
-    //	memory mapped to it.
-    //
-    currentblock_ptr = reinterpret_cast<char *>(
-        std::align(BLOCK_SIZE, HYPERBLOCK_SIZE - BLOCK_SIZE, addr, request));
-  }
-  return currentblock_ptr;
-}
-
-#ifdef __MACH__
-//
-//	thread_local disabled for Apple
-//
-bool time_for_collection;
-#else
-thread_local bool time_for_collection;
-#endif
-
-void arena::fresh_block() {
-  char *next_block = nullptr;
-  if (block_start == nullptr) {
-    next_block = (char *)megabyte_malloc();
-    first_block = next_block;
-    auto *next_header = (arena::memory_block_header *)next_block;
-    next_header->next_block = nullptr;
-    next_header->semispace = allocation_semispace_id;
-    num_blocks++;
-  } else {
-    next_block = *(char **)block_start;
-    if (block != block_end) {
-      if (block_end - block == 8) {
-        *(uint64_t *)block = NOT_YOUNG_OBJECT_BIT; // 8 bit sentinel value
-      } else {
-        *(uint64_t *)block
-            = block_end - block - 8; // 16-bit or more sentinel value
-      }
-    }
-    if (!next_block) {
-      MEM_LOG(
-          "Allocating new block for the first time in arena %d\n",
-          allocation_semispace_id);
-      next_block = (char *)megabyte_malloc();
-      *(char **)block_start = next_block;
-      auto *next_header = (arena::memory_block_header *)next_block;
-      next_header->next_block = nullptr;
-      next_header->semispace = allocation_semispace_id;
-      num_blocks++;
-      time_for_collection = true;
-    }
-  }
-  if (!*(char **)next_block && num_blocks >= get_gc_threshold()) {
-    time_for_collection = true;
-  }
-  block = next_block + sizeof(arena::memory_block_header);
-  block_start = next_block;
-  block_end = next_block + BLOCK_SIZE;
-  MEM_LOG(
-      "New block at %p (remaining %zd)\n", block,
-      BLOCK_SIZE - sizeof(arena::memory_block_header));
-}
-
 #ifdef __MACH__
 //
 //	thread_local disabled for Apple
 //
+bool time_for_collection = false;
 bool gc_enabled = true;
 #else
+thread_local bool time_for_collection = false;
 thread_local bool gc_enabled = true;
 #endif
 
-__attribute__((noinline)) void *arena::do_alloc_slow(size_t requested) {
-  MEM_LOG(
-      "Block at %p too small, %zd remaining but %zd needed\n", block,
-      block_end - block, requested);
-  if (requested > BLOCK_SIZE - sizeof(arena::memory_block_header)) {
-    return malloc(requested);
-  }
-  fresh_block();
-  void *result = block;
-  block += requested;
-  MEM_LOG(
-      "Allocation at %p (size %zd), next alloc at %p (if it fits)\n", result,
-      requested, block);
-  return result;
-}
-
-__attribute__((always_inline)) void *
-arena::arena_resize_last_alloc(ssize_t increase) {
-  if (block + increase <= block_end) {
-    block += increase;
-    return block;
-  }
-  return nullptr;
-}
-
-__attribute__((always_inline)) void arena::arena_swap_and_clear() {
-  char *tmp = first_block;
-  first_block = first_collection_block;
-  first_collection_block = tmp;
-  size_t tmp2 = num_blocks;
-  num_blocks = num_collection_blocks;
-  num_collection_blocks = tmp2;
-  allocation_semispace_id = ~allocation_semispace_id;
-  arena_clear();
-}
-
-__attribute__((always_inline)) void arena::arena_clear() {
-  block = first_block ? first_block + sizeof(arena::memory_block_header)
-                      : nullptr;
-  block_start = first_block;
-  block_end = first_block ? first_block + BLOCK_SIZE : nullptr;
-}
-
-__attribute__((always_inline)) char *arena::arena_start_ptr() const {
-  return first_block ? first_block + sizeof(arena::memory_block_header)
-                     : nullptr;
-}
-
-__attribute__((always_inline)) char **arena::arena_end_ptr() {
-  return &block;
-}
-
-char *arena::move_ptr(char *ptr, size_t size, char const *arena_end_ptr) {
-  char *next_ptr = ptr + size;
-  if (next_ptr == arena_end_ptr) {
-    return nullptr;
-  }
-  if (next_ptr != MEM_BLOCK_START(ptr) + BLOCK_SIZE) {
-    return next_ptr;
-  }
-  char *next_block = *(char **)MEM_BLOCK_START(ptr);
-  if (!next_block) {
-    return nullptr;
-  }
-  return next_block + sizeof(arena::memory_block_header);
-}
-
-ssize_t arena::ptr_diff(char *ptr1, char *ptr2) {
-  if (MEM_BLOCK_START(ptr1) == MEM_BLOCK_START(ptr2)) {
-    return ptr1 - ptr2;
-  }
-  arena::memory_block_header *hdr = mem_block_header(ptr2);
-  ssize_t result = 0;
-  while (hdr != mem_block_header(ptr1) && hdr->next_block) {
-    if (ptr2) {
-      result += ((char *)hdr + BLOCK_SIZE) - ptr2;
-      ptr2 = nullptr;
-    } else {
-      result += (BLOCK_SIZE - sizeof(arena::memory_block_header));
-    }
-    hdr = (arena::memory_block_header *)hdr->next_block;
+void arena::initialize_semispace() {
+  //
+  //	Current semispace is uninitialized so mmap() a big chuck of address space.
+  //
+  size_t request = 2 * HYPERBLOCK_SIZE;
+  void *addr = mmap(
+      nullptr, // let OS choose the address
+      request, // Linux and MacOS both allow up to 64TB
+      PROT_READ | PROT_WRITE, // read, write but not execute
+      MAP_ANONYMOUS | MAP_PRIVATE
+          | MAP_NORESERVE, // allocate address space only
+      -1, // no file backing
+      0); // no offset
+  if (addr == MAP_FAILED) {
+    perror("mmap()");
+    abort();
   }
-  if (hdr == mem_block_header(ptr1)) {
-    result += ptr1 - (char *)(hdr + 1);
-    return result;
-  } // reached the end of the arena and didn't find the block
-  // it's possible that the result should be negative, in which
-  // case the block will have been prior to the block we started
-  // at. To handle this, we recurse with reversed arguments and
-  // negate the result. This means that the code might not
-  // terminate if the two pointers do not belong to the same
-  // arena.
-  return -ptr_diff(ptr2, ptr1);
-}
-
-size_t arena::arena_size() const {
-  return (num_blocks > num_collection_blocks ? num_blocks
-                                             : num_collection_blocks)
-         * (BLOCK_SIZE - sizeof(arena::memory_block_header));
+  //
+  //	We allocated 2 * HYPERBLOCK_SIZE worth of address space but we're only going to use 1, aligned on a
+  //	HYPERBLOCK_SIZE boundry. This is so we can get the start of the hyperblock by masking any address within it.
+  //	We don't worry about unused address space either side of our aligned address space because there will be no
+  //	memory mapped to it.
+  //
+  current_addr_ptr = reinterpret_cast<char *>(
+      std::align(HYPERBLOCK_SIZE, HYPERBLOCK_SIZE, addr, request));
+  //
+  //	We put a memory_block_header at the beginning so we can identify the semispace a pointer belongs to
+  //	id by masking off the low bits to access this memory_block_header.
+  //
+  auto *header = reinterpret_cast<memory_block_header *>(current_addr_ptr);
+  header->semispace = allocation_semispace_id;
+  allocation_ptr = current_addr_ptr + sizeof(arena::memory_block_header);
+  //
+  //	We set the tripwire for this space so we get trigger a garbage collection when we pass BLOCK_SIZE of memory
+  //	allocated from this space.
+  //
+  tripwire = current_addr_ptr + BLOCK_SIZE;
+  num_blocks = 1;
 }
diff --git a/runtime/collect/collect.cpp b/runtime/collect/collect.cpp
index cc596d205..c439fdd0e 100644
--- a/runtime/collect/collect.cpp
+++ b/runtime/collect/collect.cpp
@@ -316,22 +316,7 @@ void kore_collect(
   if (collect_old || !previous_oldspace_alloc_ptr) {
     scan_ptr = oldspace_ptr();
   } else {
-    if (MEM_BLOCK_START(previous_oldspace_alloc_ptr + 1)
-        == previous_oldspace_alloc_ptr) {
-      // this means that the previous oldspace allocation pointer points to an
-      // address that is megabyte-aligned. This can only happen if we have just
-      // filled up a block but have not yet allocated the next block in the
-      // sequence at the start of the collection cycle. This means that the
-      // allocation pointer is invalid and does not actually point to the next
-      // address that would have been allocated at, according to the logic of
-      // kore_arena_alloc, which will have allocated a fresh memory block and put
-      // the allocation at the start of it. Thus, we use arena::move_ptr with a size
-      // of zero to adjust and get the true address of the allocation.
-      scan_ptr
-          = arena::move_ptr(previous_oldspace_alloc_ptr, 0, *old_alloc_ptr());
-    } else {
-      scan_ptr = previous_oldspace_alloc_ptr;
-    }
+    scan_ptr = previous_oldspace_alloc_ptr;
   }
   if (scan_ptr != *old_alloc_ptr()) {
     MEM_LOG("Evacuating old generation\n");