diff --git a/arbor/backends/event.hpp b/arbor/backends/event.hpp
index 10249aa34c..7fd9eb8491 100644
--- a/arbor/backends/event.hpp
+++ b/arbor/backends/event.hpp
@@ -4,7 +4,6 @@
 #include <arbor/fvm_types.hpp>
 #include <arbor/serdes.hpp>
 #include <arbor/mechanism_abi.h>
-#include <arbor/generic_event.hpp>
 
 // Structures for the representation of event delivery targets and
 // staged events.
@@ -46,9 +45,6 @@ struct deliverable_event {
     ARB_SERDES_ENABLE(deliverable_event, time, weight, handle);
 };
 
-template<>
-struct has_event_index<deliverable_event> : public std::true_type {};
-
 // Subset of event information required for mechanism delivery.
 struct deliverable_event_data {
     cell_local_size_type mech_index; // same as target_handle::mech_index
@@ -61,11 +57,6 @@ struct deliverable_event_data {
                       weight);
 };
 
-// Stream index accessor function for multi_event_stream:
-inline cell_local_size_type event_index(const arb_deliverable_event_data& ed) {
-    return ed.mech_index;
-}
-
 // Delivery data accessor function for multi_event_stream:
 inline arb_deliverable_event_data event_data(const deliverable_event& ev) {
     return {ev.handle.mech_index, ev.weight};
diff --git a/arbor/backends/event_stream_base.hpp b/arbor/backends/event_stream_base.hpp
index f825b503f6..a81254a781 100644
--- a/arbor/backends/event_stream_base.hpp
+++ b/arbor/backends/event_stream_base.hpp
@@ -2,10 +2,8 @@
 
 #include <vector>
 
-#include <arbor/generic_event.hpp>
 #include <arbor/mechanism_abi.h>
 
-
 #include "backends/event.hpp"
 #include "backends/event_stream_state.hpp"
 #include "event_lane.hpp"
@@ -18,15 +16,13 @@ namespace arb {
 
 template <typename Event>
 struct event_stream_base {
-    using size_type = std::size_t;
     using event_type = Event;
-    using event_time_type = ::arb::event_time_type<Event>;
-    using event_data_type = ::arb::event_data_type<Event>;
+    using event_data_type = decltype(event_data(std::declval<Event>()));
 
 protected: // members
     std::vector<event_data_type> ev_data_;
     std::vector<std::size_t> ev_spans_ = {0};
-    size_type index_ = 0;
+    std::size_t index_ = 0;
     event_data_type* base_ptr_ = nullptr;
 
 public:
@@ -62,24 +58,32 @@ struct event_stream_base {
         index_ = 0;
     }
 
-    // Construct a mapping of mech_id to a stream s.t. streams are partitioned into
-    // time step buckets by `ev_span`
+  protected:
+    // backend specific initializations
+    virtual void init() = 0;
+};
+
+struct spike_event_stream_base : event_stream_base<deliverable_event> {
     template<typename EventStream>
-    static std::enable_if_t<std::is_base_of_v<event_stream_base, EventStream>>
-    multi_event_stream(const event_lane_subrange& lanes,
-                       const std::vector<target_handle>& handles,
-                       const std::vector<std::size_t>& divs,
-                       const timestep_range& steps,
-                       std::unordered_map<unsigned, EventStream>& streams) {
+    friend void initialize(const event_lane_subrange& lanes,
+                           const std::vector<target_handle>& handles,
+                           const std::vector<std::size_t>& divs,
+                           const timestep_range& steps,
+                           std::unordered_map<unsigned, EventStream>& streams) {
         arb_assert(lanes.size() < divs.size());
 
+        // reset streams and allocate sufficient space for temporaries
         auto n_steps = steps.size();
-        std::unordered_map<unsigned, std::vector<std::size_t>> dt_sizes;
         for (auto& [k, v]: streams) {
             v.clear();
-            dt_sizes[k].resize(n_steps, 0);
+            v.spike_counter_.clear();
+            v.spike_counter_.resize(steps.size(), 0);
+            v.spikes_.clear();
+            // ev_data_ has been cleared during v.clear(), so we use its capacity
+            v.spikes_.reserve(v.ev_data_.capacity());
         }
 
+        // loop over lanes: group events by mechanism and sort them by time
         auto cell = 0;
         for (const auto& lane: lanes) {
             auto div = divs[cell];
@@ -94,16 +98,71 @@ struct event_stream_base {
                 if (step >= n_steps) break;
                 arb_assert(div + target < handles.size());
                 const auto& handle = handles[div + target];
-                streams[handle.mech_id].ev_data_.push_back({handle.mech_index, weight});
-                dt_sizes[handle.mech_id][step]++;
+                auto& stream = streams[handle.mech_id];
+                stream.spikes_.push_back(spike_data{step, handle.mech_index, time, weight});
+                // insertion sort with last element as pivot
+                // ordering: first w.r.t. step, within a step: mech_index, within a mech_index: time
+                auto first = stream.spikes_.begin();
+                auto last = stream.spikes_.end();
+                auto pivot = std::prev(last, 1);
+                std::rotate(std::upper_bound(first, pivot, *pivot), pivot, last);
+                // increment count in current time interval
+                stream.spike_counter_[step]++;
             }
         }
 
         for (auto& [id, stream]: streams) {
-            util::make_partition(stream.ev_spans_, dt_sizes[id]);
-            stream.init();
+            // copy temporary deliverable_events into stream's ev_data_
+            stream.ev_data_.reserve(stream.spikes_.size());
+            std::transform(stream.spikes_.begin(), stream.spikes_.end(), std::back_inserter(stream.ev_data_),
+                [](auto const& e) noexcept -> arb_deliverable_event_data {
+                return {e.mech_index, e.weight}; });
+            // scan over spike_counter_ and  written to ev_spans_
+            util::make_partition(stream.ev_spans_, stream.spike_counter_);
+            // delegate to derived class init: static cast necessary to access protected init()
+            static_cast<spike_event_stream_base&>(stream).init();
         }
     }
+
+  protected: // members
+    struct spike_data {
+        arb_size_type step = 0;
+        cell_local_size_type mech_index = 0;
+        time_type time = 0;
+        float weight = 0;
+        auto operator<=>(spike_data const&) const noexcept = default;
+    };
+    std::vector<spike_data> spikes_;
+    std::vector<std::size_t> spike_counter_;
+};
+
+struct sample_event_stream_base : event_stream_base<sample_event> {
+    friend void initialize(const std::vector<std::vector<sample_event>>& staged,
+                           sample_event_stream_base& stream) {
+        // clear previous data
+        stream.clear();
+
+        // return if there are no timestep bins
+        if (!staged.size()) return;
+
+        // return if there are no events
+        auto num_events = util::sum_by(staged, [] (const auto& v) {return v.size();});
+        if (!num_events) return;
+
+        // allocate space for spans and data
+        stream.ev_spans_.reserve(staged.size() + 1);
+        stream.ev_data_.reserve(num_events);
+
+        // add event data and spans
+        for (const auto& v : staged) {
+            for (const auto& ev: v) stream.ev_data_.push_back(ev.raw);
+            stream.ev_spans_.push_back(stream.ev_data_.size());
+        }
+
+        arb_assert(num_events == stream.ev_data_.size());
+        arb_assert(staged.size() + 1 == stream.ev_spans_.size());
+        stream.init();
+    }
 };
 
 } // namespace arb
diff --git a/arbor/backends/gpu/event_stream.hpp b/arbor/backends/gpu/event_stream.hpp
index 0045d93813..e332bedbc0 100644
--- a/arbor/backends/gpu/event_stream.hpp
+++ b/arbor/backends/gpu/event_stream.hpp
@@ -2,113 +2,33 @@
 
 // Indexed collection of pop-only event queues --- CUDA back-end implementation.
 
-#include <arbor/mechanism_abi.h>
-
 #include "backends/event_stream_base.hpp"
-#include "util/transform.hpp"
-#include "threading/threading.hpp"
-#include "timestep_range.hpp"
 #include "memory/memory.hpp"
 
 namespace arb {
 namespace gpu {
 
-template <typename Event>
-struct event_stream: public event_stream_base<Event> {
-public:
-    using base = event_stream_base<Event>;
-    using size_type = typename base::size_type;
-    using event_data_type = typename base::event_data_type;
-    using device_array = memory::device_vector<event_data_type>;
-
-    using base::clear;
-    using base::ev_data_;
-    using base::ev_spans_;
-    using base::base_ptr_;
-
-    event_stream() = default;
-    event_stream(task_system_handle t): base(), thread_pool_{t} {}
-
-    // Initialize event streams from a vector of vector of events
-    // Outer vector represents time step bins
-    void init(const std::vector<std::vector<Event>>& staged) {
-        // clear previous data
-        clear();
-
-        // return if there are no timestep bins
-        if (!staged.size()) return;
-
-        // return if there are no events
-        const size_type num_events = util::sum_by(staged, [] (const auto& v) {return v.size();});
-        if (!num_events) return;
-
-        // allocate space for spans and data
-        ev_spans_.resize(staged.size() + 1);
-        ev_data_.resize(num_events);
-        resize(device_ev_data_, num_events);
-
-        // compute offsets by exclusive scan over staged events
-        util::make_partition(ev_spans_,
-                             util::transform_view(staged, [](const auto& v) { return v.size(); }),
-                             0ull);
-
-        // assign, copy to device (and potentially sort) the event data in parallel
-        arb_assert(thread_pool_);
-        arb_assert(ev_spans_.size() == staged.size() + 1);
-        threading::parallel_for::apply(0, ev_spans_.size() - 1, thread_pool_.get(),
-            [this, &staged](size_type i) {
-                const auto beg = ev_spans_[i];
-                const auto end = ev_spans_[i + 1];
-                arb_assert(end >= beg);
-                const auto len = end - beg;
-
-                auto host_span = memory::make_view(ev_data_)(beg, end);
-
-                // make event data and copy
-                std::copy_n(util::transform_view(staged[i],
-                                                 [](const auto& x) { return event_data(x); }).begin(),
-                            len,
-                            host_span.begin());
-                // sort if necessary
-                if constexpr (has_event_index<Event>::value) {
-                    util::stable_sort_by(host_span,
-                                         [](const event_data_type& ed) { return event_index(ed); });
-                }
-                // copy to device
-                auto device_span = memory::make_view(device_ev_data_)(beg, end);
-                memory::copy_async(host_span, device_span);
-            });
-
-        base_ptr_ = device_ev_data_.data();
+template<typename BaseEventStream>
+struct event_stream : BaseEventStream {
+  public:
+    ARB_SERDES_ENABLE(event_stream<BaseEventStream>,
+                      ev_data_,
+                      ev_spans_,
+                      device_ev_data_,
+                      index_);
 
-        arb_assert(num_events == device_ev_data_.size());
-        arb_assert(num_events == ev_data_.size());
+  protected:
+    void init() override final {
+        resize(this->device_ev_data_, this->device_ev_data_.size());
+        memory::copy_async(this->ev_data_, this->device_ev_data_);
+        this->base_ptr_ = this->device_ev_data_.data();
     }
 
-    // Initialize event stream assuming ev_data_ and ev_span_ has
-    // been set previously (e.g. by `base::multi_event_stream`)
-    void init() {
-        resize(device_ev_data_, ev_data_.size());
-        base_ptr_ = device_ev_data_.data();
-
-        threading::parallel_for::apply(0, ev_spans_.size() - 1, thread_pool_.get(),
-           [this](size_type i) {
-               const auto beg = ev_spans_[i];
-               const auto end = ev_spans_[i + 1];
-               arb_assert(end >= beg);
-
-               auto host_span = memory::make_view(ev_data_)(beg, end);
-               auto device_span = memory::make_view(device_ev_data_)(beg, end);
+  private: // device memory
+    using event_data_type = typename BaseEventStream::event_data_type;
+    using device_array = memory::device_vector<event_data_type>;
 
-               // sort if necessary
-               if constexpr (has_event_index<Event>::value) {
-                   util::stable_sort_by(host_span,
-                                        [](const event_data_type& ed) { return event_index(ed); });
-               }
-               // copy to device
-               memory::copy_async(host_span, device_span);
-           });
-    }
+    device_array device_ev_data_;
 
     template<typename D>
     static void resize(D& d, std::size_t size) {
@@ -117,16 +37,10 @@ struct event_stream: public event_stream_base<Event> {
             d = D(size);
         }
     }
-
-    ARB_SERDES_ENABLE(event_stream<Event>,
-                      ev_data_,
-                      ev_spans_,
-                      device_ev_data_,
-                      index_);
-
-    task_system_handle thread_pool_;
-    device_array device_ev_data_;
 };
 
+using spike_event_stream = event_stream<spike_event_stream_base>;
+using sample_event_stream = event_stream<sample_event_stream_base>;
+
 } // namespace gpu
 } // namespace arb
diff --git a/arbor/backends/gpu/fvm.hpp b/arbor/backends/gpu/fvm.hpp
index 37023c4f40..96de41daba 100644
--- a/arbor/backends/gpu/fvm.hpp
+++ b/arbor/backends/gpu/fvm.hpp
@@ -37,8 +37,6 @@ struct backend {
     using threshold_watcher        = arb::gpu::threshold_watcher;
     using cable_solver             = arb::gpu::matrix_state_fine<arb_value_type, arb_index_type>;
     using diffusion_solver         = arb::gpu::diffusion_state<arb_value_type, arb_index_type>;
-    using deliverable_event_stream = arb::gpu::deliverable_event_stream;
-    using sample_event_stream      = arb::gpu::sample_event_stream;
 
     using shared_state = arb::gpu::shared_state;
     using ion_state = arb::gpu::ion_state;
diff --git a/arbor/backends/gpu/gpu_store_types.hpp b/arbor/backends/gpu/gpu_store_types.hpp
index 386f8bac7d..b8ce5369da 100644
--- a/arbor/backends/gpu/gpu_store_types.hpp
+++ b/arbor/backends/gpu/gpu_store_types.hpp
@@ -18,9 +18,6 @@ using array  = memory::device_vector<arb_value_type>;
 using iarray = memory::device_vector<arb_index_type>;
 using sarray = memory::device_vector<arb_size_type>;
 
-using deliverable_event_stream = arb::gpu::event_stream<deliverable_event>;
-using sample_event_stream = arb::gpu::event_stream<sample_event>;
-
 } // namespace gpu
 } // namespace arb
 
diff --git a/arbor/backends/gpu/shared_state.cpp b/arbor/backends/gpu/shared_state.cpp
index c6213b54af..091bcde51c 100644
--- a/arbor/backends/gpu/shared_state.cpp
+++ b/arbor/backends/gpu/shared_state.cpp
@@ -214,7 +214,7 @@ shared_state::shared_state(task_system_handle tp,
     time_since_spike(n_cell*n_detector),
     src_to_spike(make_const_view(src_to_spike_)),
     cbprng_seed(cbprng_seed_),
-    sample_events(thread_pool),
+    sample_events(),
     watcher{n_cv_, src_to_spike.data(), detector_info}
 {
     memory::fill(time_since_spike, -1.0);
@@ -262,7 +262,7 @@ void shared_state::instantiate(mechanism& m,
 
     if (storage.count(id)) throw arb::arbor_internal_error("Duplicate mech id in shared state");
     auto& store = storage.emplace(id, mech_storage{}).first->second;
-    streams[id] = deliverable_event_stream{thread_pool};
+    streams[id] = spike_event_stream{};
 
     // Allocate view pointers
     store.state_vars_ = std::vector<arb_value_type*>(m.mech_.n_state_vars);
@@ -410,14 +410,6 @@ void shared_state::take_samples() {
     }
 }
 
-void shared_state::init_events(const event_lane_subrange& lanes,
-                               const std::vector<target_handle>& handles,
-                               const std::vector<size_t>& divs,
-                               const timestep_range& dts) {
-    arb::gpu::event_stream<deliverable_event>::multi_event_stream(lanes, handles, divs, dts, streams);
-}
-
-
 // Debug interface
 ARB_ARBOR_API std::ostream& operator<<(std::ostream& o, shared_state& s) {
     using io::csv;
diff --git a/arbor/backends/gpu/shared_state.hpp b/arbor/backends/gpu/shared_state.hpp
index 792fc7476d..1ca224ae96 100644
--- a/arbor/backends/gpu/shared_state.hpp
+++ b/arbor/backends/gpu/shared_state.hpp
@@ -174,7 +174,7 @@ struct ARB_ARBOR_API shared_state: shared_state_base<shared_state, array, ion_st
     istim_state stim_data;
     std::unordered_map<std::string, ion_state> ion_data;
     std::unordered_map<unsigned, mech_storage> storage;
-    std::unordered_map<unsigned, deliverable_event_stream> streams;
+    std::unordered_map<unsigned, spike_event_stream> streams;
 
     shared_state() = default;
 
@@ -245,11 +245,6 @@ struct ARB_ARBOR_API shared_state: shared_state_base<shared_state, array, ion_st
         sample_time_host  = memory::on_host(sample_time);
         sample_value_host = memory::on_host(sample_value);
     }
-
-    void init_events(const event_lane_subrange& lanes,
-                     const std::vector<target_handle>& handles,
-                     const std::vector<size_t>& divs,
-                     const timestep_range& dts);
 };
 
 // For debugging only
diff --git a/arbor/backends/multicore/event_stream.hpp b/arbor/backends/multicore/event_stream.hpp
index f280777db9..a72d4a518c 100644
--- a/arbor/backends/multicore/event_stream.hpp
+++ b/arbor/backends/multicore/event_stream.hpp
@@ -2,61 +2,26 @@
 
 // Indexed collection of pop-only event queues --- multicore back-end implementation.
 
-#include "arbor/spike_event.hpp"
 #include "backends/event_stream_base.hpp"
-#include "timestep_range.hpp"
 
 namespace arb {
 namespace multicore {
 
-template <typename Event>
-struct event_stream: public event_stream_base<Event> {
-    using base = event_stream_base<Event>;
-    using size_type = typename base::size_type;
-
-    using base::clear;
-    using base::ev_spans_;
-    using base::ev_data_;
-    using base::base_ptr_;
-
-    event_stream() = default;
-
-    // Initialize event stream from a vector of vector of events
-    // Outer vector represents time step bins
-    void init(const std::vector<std::vector<Event>>& staged) {
-        // clear previous data
-        clear();
-
-        // return if there are no timestep bins
-        if (!staged.size()) return;
-
-        // return if there are no events
-        const size_type num_events = util::sum_by(staged, [] (const auto& v) {return v.size();});
-        if (!num_events) return;
-
-        // allocate space for spans and data
-        ev_spans_.reserve(staged.size() + 1);
-        ev_data_.reserve(num_events);
-
-        // add event data and spans
-        for (const auto& v : staged) {
-            for (const auto& ev: v) ev_data_.push_back(event_data(ev));
-            ev_spans_.push_back(ev_data_.size());
-        }
-
-        arb_assert(num_events == ev_data_.size());
-        arb_assert(staged.size() + 1 == ev_spans_.size());
-        base_ptr_ = ev_data_.data();
-    }
-
-    // Initialize event stream assuming ev_data_ and ev_span_ has
-    // been set previously (e.g. by `base::multi_event_stream`)
-    void init() { base_ptr_ = ev_data_.data(); }
-
-    ARB_SERDES_ENABLE(event_stream<Event>,
+template<typename BaseEventStream>
+struct event_stream : BaseEventStream {
+  public:
+    ARB_SERDES_ENABLE(event_stream<BaseEventStream>,
                       ev_data_,
                       ev_spans_,
                       index_);
+  protected:
+    void init() override final {
+        this->base_ptr_ = this->ev_data_.data();
+    }
 };
+
+using spike_event_stream = event_stream<spike_event_stream_base>;
+using sample_event_stream = event_stream<sample_event_stream_base>;
+
 } // namespace multicore
 } // namespace arb
diff --git a/arbor/backends/multicore/fvm.hpp b/arbor/backends/multicore/fvm.hpp
index a845a52bf2..89e7242117 100644
--- a/arbor/backends/multicore/fvm.hpp
+++ b/arbor/backends/multicore/fvm.hpp
@@ -38,8 +38,6 @@ struct backend {
     using cable_solver             = arb::multicore::cable_solver;
     using diffusion_solver         = arb::multicore::diffusion_solver;
     using threshold_watcher        = arb::multicore::threshold_watcher;
-    using deliverable_event_stream = arb::multicore::deliverable_event_stream;
-    using sample_event_stream      = arb::multicore::sample_event_stream;
     using shared_state             = arb::multicore::shared_state;
     using ion_state                = arb::multicore::ion_state;
 
diff --git a/arbor/backends/multicore/multicore_common.hpp b/arbor/backends/multicore/multicore_common.hpp
index 9186628eb0..fca714c459 100644
--- a/arbor/backends/multicore/multicore_common.hpp
+++ b/arbor/backends/multicore/multicore_common.hpp
@@ -23,9 +23,6 @@ using padded_vector = std::vector<V, util::padded_allocator<V>>;
 using array  = padded_vector<arb_value_type>;
 using iarray = padded_vector<arb_index_type>;
 
-using deliverable_event_stream = arb::multicore::event_stream<deliverable_event>;
-using sample_event_stream = arb::multicore::event_stream<sample_event>;
-
 } // namespace multicore
 } // namespace arb
 
diff --git a/arbor/backends/multicore/shared_state.cpp b/arbor/backends/multicore/shared_state.cpp
index e625c5b975..b70089c4e7 100644
--- a/arbor/backends/multicore/shared_state.cpp
+++ b/arbor/backends/multicore/shared_state.cpp
@@ -403,7 +403,7 @@ void shared_state::instantiate(arb::mechanism& m,
     util::padded_allocator<> pad(m.data_alignment());
 
     if (storage.count(id)) throw arbor_internal_error("Duplicate mechanism id in MC shared state.");
-    streams[id] = deliverable_event_stream{};
+    streams[id] = spike_event_stream{};
     auto& store = storage[id];
     auto width = pos_data.cv.size();
     // Assign non-owning views onto shared state:
@@ -556,13 +556,5 @@ void shared_state::instantiate(arb::mechanism& m,
     }
 }
 
-void shared_state::init_events(const event_lane_subrange& lanes,
-                               const std::vector<target_handle>& handles,
-                               const std::vector<size_t>& divs,
-                               const timestep_range& dts) {
-    arb::multicore::event_stream<deliverable_event>::multi_event_stream(lanes, handles, divs, dts, streams);
-}
-
-
 } // namespace multicore
 } // namespace arb
diff --git a/arbor/backends/multicore/shared_state.hpp b/arbor/backends/multicore/shared_state.hpp
index 81a10bcc88..1bb3c59f22 100644
--- a/arbor/backends/multicore/shared_state.hpp
+++ b/arbor/backends/multicore/shared_state.hpp
@@ -180,7 +180,7 @@ struct ARB_ARBOR_API shared_state:
     istim_state stim_data;
     std::unordered_map<std::string, ion_state> ion_data;
     std::unordered_map<unsigned, mech_storage> storage;
-    std::unordered_map<unsigned, deliverable_event_stream> streams;
+    std::unordered_map<unsigned, spike_event_stream> streams;
 
     shared_state() = default;
 
@@ -250,11 +250,6 @@ struct ARB_ARBOR_API shared_state:
         sample_time_host = util::range_pointer_view(sample_time);
         sample_value_host = util::range_pointer_view(sample_value);
     }
-
-    void init_events(const event_lane_subrange& lanes,
-                     const std::vector<target_handle>& handles,
-                     const std::vector<size_t>& divs,
-                     const timestep_range& dts);
 };
 
 // For debugging only:
diff --git a/arbor/backends/shared_state_base.hpp b/arbor/backends/shared_state_base.hpp
index 92b3a5cb54..0c1742a0d5 100644
--- a/arbor/backends/shared_state_base.hpp
+++ b/arbor/backends/shared_state_base.hpp
@@ -35,14 +35,14 @@ struct shared_state_base {
                      const std::vector<size_t>& divs) {
         auto d = static_cast<D*>(this);
         // events
-        d->init_events(lanes, handles, divs, dts);
+        initialize(lanes, handles, divs, dts, d->streams);
         // samples
         auto n_samples = util::sum_by(samples, [] (const auto& s) {return s.size();});
         if (d->sample_time.size() < n_samples) {
             d->sample_time = array(n_samples);
             d->sample_value = array(n_samples);
         }
-        d->sample_events.init(samples);
+        initialize(samples, d->sample_events);
         // thresholds
         d->watcher.clear_crossings();
     }
diff --git a/arbor/include/arbor/event_generator.hpp b/arbor/include/arbor/event_generator.hpp
index cab7e5fe2b..d809fbb2d6 100644
--- a/arbor/include/arbor/event_generator.hpp
+++ b/arbor/include/arbor/event_generator.hpp
@@ -4,7 +4,6 @@
 
 #include <arbor/assert.hpp>
 #include <arbor/common_types.hpp>
-#include <arbor/generic_event.hpp>
 #include <arbor/spike_event.hpp>
 #include <arbor/schedule.hpp>
 #include <arbor/arbexcept.hpp>
diff --git a/arbor/include/arbor/generic_event.hpp b/arbor/include/arbor/generic_event.hpp
deleted file mode 100644
index 9a139a6705..0000000000
--- a/arbor/include/arbor/generic_event.hpp
+++ /dev/null
@@ -1,91 +0,0 @@
-#pragma once
-
-#include <utility>
-#include <type_traits>
-
-// Generic accessors for event types used in `event_queue` and
-// `event_stream`.
-//
-// 1. event_time(const Event&):
-//
-//    Returns ordered type (typically `time_type`) representing
-//    the event time. Default implementation returns `e.time`
-//    for an event `e`.
-//
-// 2. event_index(const Event&):
-//
-//    Returns the stream index associated with the event (an
-//    unsigned index type), for use with `event_stream`.
-//    Default implementation returns `e.index` for an event `e`.
-//
-// 3. event_data(const Event&):
-//
-//    Returns the event _payload_, viz. the event data that
-//    does not include (necessarily) the time or index. This
-//    is used with `event_stream`.
-//    Default implementation returns `e.data` for an event `e`.
-//
-// The type aliases event_time_type<Event>, event_index_type<Event> and event_data_type<Event>
-// give the corresponding return types.
-//
-// The accessors act as customization points, in that they can be
-// specialized for a particular event class. In order for ADL
-// to work correctly across namespaces, the accessor functions
-// should be brought into scope with a `using` declaration.
-//
-// Example use:
-//
-// template <typename Event>
-// bool is_before(const Event& a, const Event& b) {
-//     using ::arb::event_time;
-//     return event_time(a)<event_time(b);
-// }
-
-namespace arb {
-
-template <typename Event>
-auto event_time(const Event& ev) {
-    return ev.time;
-}
-
-struct event_time_less {
-    template <typename T, typename Event, typename = std::enable_if_t<std::is_floating_point<T>::value>>
-    bool operator() (T l, const Event& r) {
-        return l<event_time(r);
-    }
-
-    template <typename T, typename Event, typename = std::enable_if_t<std::is_floating_point<T>::value>>
-    bool operator() (const Event& l, T r) {
-        return event_time(l)<r;
-    }
-};
-
-namespace impl {
-    // Wrap in `impl::` namespace to obtain correct ADL for return type.
-
-    using ::arb::event_time;
-
-    template <typename Event>
-    using event_time_type = decltype(event_time(std::declval<Event>()));
-
-    template <typename Event>
-    using event_data_type = decltype(event_data(std::declval<Event>()));
-
-    template <typename Event>
-    using event_index_type = decltype(event_index(std::declval<event_data_type<Event>>()));
-}
-
-template <typename Event>
-using event_time_type = impl::event_time_type<Event>;
-
-template <typename Event>
-using event_index_type = impl::event_index_type<Event>;
-
-template <typename Event>
-using event_data_type = impl::event_data_type<Event>;
-
-template<typename Event>
-struct has_event_index : public std::false_type {};
-
-} // namespace arb
-
diff --git a/arbor/simulation.cpp b/arbor/simulation.cpp
index 998963790b..9945e25082 100644
--- a/arbor/simulation.cpp
+++ b/arbor/simulation.cpp
@@ -5,7 +5,6 @@
 #include <arbor/context.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/export.hpp>
-#include <arbor/generic_event.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/schedule.hpp>
 #include <arbor/simple_sampler.hpp>
@@ -43,7 +42,11 @@ ARB_ARBOR_API void merge_cell_events(time_type t_from,
                                      pse_vector& new_events) {
     PE(communication:enqueue:setup);
     new_events.clear();
-    old_events = split_sorted_range(old_events, t_from, event_time_less()).second;
+    constexpr auto event_time_less = [](auto const& l, auto const& r) noexcept {
+        if constexpr (std::is_floating_point_v<std::remove_reference_t<decltype(l)>>) { return l < r.time; }
+        else { return l.time < r; }
+    };
+    old_events = split_sorted_range(old_events, t_from, event_time_less).second;
     PL();
 
     if (!generators.empty()) {
@@ -53,8 +56,8 @@ ARB_ARBOR_API void merge_cell_events(time_type t_from,
         std::vector<event_span> spanbuf;
         spanbuf.reserve(2+generators.size());
 
-        auto old_split = split_sorted_range(old_events, t_to, event_time_less());
-        auto pending_split = split_sorted_range(pending, t_to, event_time_less());
+        auto old_split = split_sorted_range(old_events, t_to, event_time_less);
+        auto pending_split = split_sorted_range(pending, t_to, event_time_less);
 
         spanbuf.push_back(old_split.first);
         spanbuf.push_back(pending_split.first);
diff --git a/test/ubench/event_binning.cpp b/test/ubench/event_binning.cpp
index 115328ca8b..e56057e340 100644
--- a/test/ubench/event_binning.cpp
+++ b/test/ubench/event_binning.cpp
@@ -11,7 +11,6 @@
 
 #include <arbor/spike_event.hpp>
 
-#include "event_queue.hpp"
 #include "backends/event.hpp"
 
 
diff --git a/arbor/event_queue.hpp b/test/ubench/event_queue.hpp
similarity index 86%
rename from arbor/event_queue.hpp
rename to test/ubench/event_queue.hpp
index 5e314f8d96..8964ee415d 100644
--- a/arbor/event_queue.hpp
+++ b/test/ubench/event_queue.hpp
@@ -10,22 +10,27 @@
 
 #include <arbor/common_types.hpp>
 #include <arbor/spike_event.hpp>
-#include <arbor/generic_event.hpp>
 
 namespace arb {
 
 /* Event classes `Event` used with `event_queue` must be move and copy constructible,
- * and either have a public field `time` that returns the time value, or provide an
- * overload of `event_time(const Event&)` which returns this value (see generic_event.hpp).
+ * and either have a public field `time` that returns the time value, or provide a
+ * projection to a time value through the EventTime functor.
  *
  * Time values must be well ordered with respect to `operator>`.
  */
 
-template <typename Event>
+struct default_event_time {
+    template<typename Event>
+    auto operator()(Event const& e) const noexcept { return e.time; }
+};
+
+template <typename Event, typename EventTime = default_event_time>
 class event_queue {
 public:
+    static constexpr EventTime event_time = {};
     using value_type = Event;
-    using event_time_type = ::arb::event_time_type<Event>;
+    using event_time_type = decltype(event_time(std::declval<Event>()));
 
     event_queue() = default;
 
@@ -50,8 +55,6 @@ class event_queue {
         if (queue_.empty()) {
             return std::nullopt;
         }
-
-        using ::arb::event_time;
         auto t = event_time(queue_.top());
         return t_until > t? std::optional(t): std::nullopt;
     }
@@ -60,7 +63,6 @@ class event_queue {
     // queue non-empty and the head satisfies predicate.
     template <typename Pred>
     std::optional<value_type> pop_if(Pred&& pred) {
-        using ::arb::event_time;
         if (!queue_.empty() && pred(queue_.top())) {
             auto ev = queue_.top();
             queue_.pop();
@@ -73,7 +75,6 @@ class event_queue {
 
     // Pop and return top event `ev` of queue if `t_until` > `event_time(ev)`.
     std::optional<value_type> pop_if_before(const event_time_type& t_until) {
-        using ::arb::event_time;
         return pop_if(
             [&t_until](const value_type& ev) { return t_until > event_time(ev); }
         );
@@ -81,7 +82,6 @@ class event_queue {
 
     // Pop and return top event `ev` of queue unless `event_time(ev)` > `t_until`
     std::optional<value_type> pop_if_not_after(const event_time_type& t_until) {
-        using ::arb::event_time;
         return pop_if(
             [&t_until](const value_type& ev) { return !(event_time(ev) > t_until); }
         );
@@ -95,7 +95,6 @@ class event_queue {
 private:
     struct event_greater {
         bool operator()(const Event& a, const Event& b) {
-            using ::arb::event_time;
             return event_time(a) > event_time(b);
         }
     };
diff --git a/test/ubench/event_setup.cpp b/test/ubench/event_setup.cpp
index 1b5ff57ced..035effc042 100644
--- a/test/ubench/event_setup.cpp
+++ b/test/ubench/event_setup.cpp
@@ -17,9 +17,10 @@
 
 #include <benchmark/benchmark.h>
 
-#include "event_queue.hpp"
 #include "backends/event.hpp"
 
+#include "event_queue.hpp"
+
 using namespace arb;
 
 std::vector<std::pair<cell_gid_type, spike_event>> generate_inputs(size_t ncells, size_t ev_per_cell) {
diff --git a/test/ubench/fvm_discretize.cpp b/test/ubench/fvm_discretize.cpp
index e78558ac29..2531a38d85 100644
--- a/test/ubench/fvm_discretize.cpp
+++ b/test/ubench/fvm_discretize.cpp
@@ -10,7 +10,6 @@
 
 #include <benchmark/benchmark.h>
 
-#include "event_queue.hpp"
 #include "fvm_layout.hpp"
 
 #ifndef DATADIR
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 070ebb783c..206f29fd40 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -72,7 +72,6 @@ set(unit_sources
     test_dry_run_context.cpp
     test_event_delivery.cpp
     test_event_generators.cpp
-    test_event_queue.cpp
     test_event_stream.cpp
     test_expected.cpp
     test_filter.cpp
diff --git a/test/unit/test_event_queue.cpp b/test/unit/test_event_queue.cpp
deleted file mode 100644
index 2020d51552..0000000000
--- a/test/unit/test_event_queue.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-#include <gtest/gtest.h>
-
-#include <cmath>
-#include <vector>
-#include <algorithm>
-
-#include <arbor/spike_event.hpp>
-
-#include "event_queue.hpp"
-
-using namespace arb;
-
-TEST(event_queue, push) {
-    using ps_event_queue = event_queue<spike_event>;
-
-    ps_event_queue q;
-
-    q.push({0u, 2.f, 2.f});
-    q.push({1u, 1.f, 2.f});
-    q.push({2u, 20.f, 2.f});
-    q.push({3u, 8.f, 2.f});
-
-    EXPECT_EQ(4u, q.size());
-
-    std::vector<float> times;
-    float maxtime(INFINITY);
-    while (!q.empty()) {
-        times.push_back(q.pop_if_before(maxtime)->time);
-    }
-
-    EXPECT_TRUE(std::is_sorted(times.begin(), times.end()));
-}
-
-TEST(event_queue, pop_if_before) {
-    using ps_event_queue = event_queue<spike_event>;
-
-    cell_lid_type target[4] = {0u, 1u, 2u, 3u};
-
-    spike_event events[] = {
-        {target[0], 1.f, 2.f},
-        {target[1], 2.f, 2.f},
-        {target[2], 3.f, 2.f},
-        {target[3], 4.f, 2.f}
-    };
-
-    ps_event_queue q;
-    for (const auto& ev: events) {
-        q.push(ev);
-    }
-
-    EXPECT_EQ(4u, q.size());
-
-    auto e1 = q.pop_if_before(0.);
-    EXPECT_FALSE(e1);
-    EXPECT_EQ(4u, q.size());
-
-    auto e2 = q.pop_if_before(5.);
-    EXPECT_TRUE(e2);
-    EXPECT_EQ(e2->target, target[0]);
-    EXPECT_EQ(3u, q.size());
-
-    auto e3 = q.pop_if_before(5.);
-    EXPECT_TRUE(e3);
-    EXPECT_EQ(e3->target, target[1]);
-    EXPECT_EQ(2u, q.size());
-
-    auto e4 = q.pop_if_before(2.5);
-    EXPECT_FALSE(e4);
-    EXPECT_EQ(2u, q.size());
-
-    auto e5 = q.pop_if_before(5.);
-    EXPECT_TRUE(e5);
-    EXPECT_EQ(e5->target, target[2]);
-    EXPECT_EQ(1u, q.size());
-
-    q.pop_if_before(5.);
-    EXPECT_EQ(0u, q.size());
-    EXPECT_TRUE(q.empty());
-
-    // empty queue should always return "false"
-    auto e6 = q.pop_if_before(100.);
-    EXPECT_FALSE(e6);
-}
-
-TEST(event_queue, pop_if_not_after) {
-    struct event {
-        int time;
-
-        event(int t): time(t) {}
-    };
-
-    event_queue<event> queue;
-
-    queue.push(1);
-    queue.push(3);
-    queue.push(5);
-
-    auto e1 = queue.pop_if_not_after(2);
-    EXPECT_TRUE(e1);
-    EXPECT_EQ(1, e1->time);
-
-    auto e2 = queue.pop_if_before(3);
-    EXPECT_FALSE(e2);
-
-    auto e3 = queue.pop_if_not_after(3);
-    EXPECT_TRUE(e3);
-    EXPECT_EQ(3, e3->time);
-
-    auto e4 = queue.pop_if_not_after(4);
-    EXPECT_FALSE(e4);
-}
-
-// Event queues can be defined for arbitrary copy-constructible events
-// for which `event_time(ev)` returns the corresponding time. Time values just
-// need to be well-ordered on '>'.
-
-struct wrapped_float {
-    wrapped_float() {}
-    wrapped_float(float f): f(f) {}
-
-    float f;
-    bool operator>(wrapped_float x) const { return f>x.f; }
-};
-
-struct minimal_event {
-    wrapped_float value;
-    explicit minimal_event(float x): value(x) {}
-};
-
-const wrapped_float& event_time(const minimal_event& ev) { return ev.value; }
-
-TEST(event_queue, minimal_event_impl) {
-    minimal_event events[] = {
-        minimal_event(3.f),
-        minimal_event(2.f),
-        minimal_event(2.f),
-        minimal_event(10.f)
-    };
-
-    std::vector<float> expected;
-    for (const auto& ev: events) {
-        expected.push_back(ev.value.f);
-    }
-    std::sort(expected.begin(), expected.end());
-
-    event_queue<minimal_event> q;
-    for (auto& ev: events) {
-        q.push(ev);
-    }
-
-    wrapped_float maxtime(INFINITY);
-
-    std::vector<float> times;
-    while (q.size()) {
-        times.push_back(q.pop_if_before(maxtime)->value.f);
-    }
-
-    EXPECT_EQ(expected, times);
-}
-
diff --git a/test/unit/test_event_stream.cpp b/test/unit/test_event_stream.cpp
index c6b53e04d3..b7f8d762ee 100644
--- a/test/unit/test_event_stream.cpp
+++ b/test/unit/test_event_stream.cpp
@@ -1,110 +1,25 @@
-#include <vector>
-#include <gtest/gtest.h>
-
-#include "timestep_range.hpp"
-#include "backends/event.hpp"
 #include "backends/multicore/event_stream.hpp"
-#include "util/rangeutil.hpp"
-
-using namespace arb;
+#include "./test_event_stream.hpp"
 
 namespace {
-    constexpr cell_local_size_type mech = 13u;
-
-    target_handle handle[4] = {
-        target_handle(mech, 0u),
-        target_handle(mech, 1u),
-        target_handle(mech, 4u),
-        target_handle(mech, 2u)
-    };
 
-    std::vector<deliverable_event> common_events = {
-        deliverable_event(2.0, handle[1], 2.f),
-        deliverable_event(3.0, handle[3], 4.f),
-        deliverable_event(3.0, handle[0], 1.f),
-        deliverable_event(5.0, handle[2], 3.f),
-        deliverable_event(5.5, handle[2], 6.f)
-    };
-
-    bool event_matches(const arb_deliverable_event_data& e, unsigned i) {
-        const auto& expected = common_events[i];
-        return (e.weight == expected.weight);
+template<typename Result>
+void check(Result result) {
+    for (std::size_t step=0; step<result.steps.size(); ++step) {
+        for (auto& [mech_id, stream] :  result.streams) {
+            stream.mark();
+            auto marked = stream.marked_events();
+            check_result(marked.begin, result.expected[mech_id][step]);
+        }
     }
 }
 
-TEST(event_stream, mark) {
-    using event_stream = multicore::event_stream<deliverable_event>;
-
-    event_stream m;
-
-    arb_deliverable_event_stream s;
-
-    timestep_range dts{0, 6, 1.0};
-    EXPECT_EQ(dts.size(), 6u);
-
-    std::vector<std::vector<deliverable_event>> events(dts.size());
-    for (const auto& ev : common_events) {
-        events[dts.find(event_time(ev))-dts.begin()].push_back(ev);
-    }
-    arb_assert(util::sum_by(events, [] (const auto& v) {return v.size();}) == common_events.size());
-
-    m.init(events);
-
-    EXPECT_TRUE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 0u);
-
-    m.mark();
-    // current time is 0: no events
-    EXPECT_TRUE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 0u);
-
-    m.mark();
-    // current time is 1: no events
-    EXPECT_TRUE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 0u);
-
-    m.mark();
-    // current time is 2: 1 event at mech_index 1
-    EXPECT_FALSE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 1u);
-    EXPECT_TRUE(event_matches(s.begin[0], 0u));
-
-    m.mark();
-    // current time is 3: 2 events at mech_index 0 and 2
-    EXPECT_FALSE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 2u);
-    // the order of these 2 events will be inverted on GPU due to sorting
-    EXPECT_TRUE(event_matches(s.begin[0], 1u));
-    EXPECT_TRUE(event_matches(s.begin[1], 2u));
-
-    m.mark();
-    // current time is 4: no events
-    EXPECT_TRUE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 0u);
-
-    m.mark();
-    // current time is 5: 2 events at mech_index 4
-    EXPECT_FALSE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 2u);
-    EXPECT_TRUE(event_matches(s.begin[0], 3u));
-    EXPECT_TRUE(event_matches(s.begin[1], 4u));
+}
 
-    m.mark();
-    // current time is past time range
-    EXPECT_TRUE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 0u);
+TEST(event_stream, single_step) {
+    check(single_step<multicore::spike_event_stream>());
+}
 
-    m.clear();
-    // no events after clear
-    EXPECT_TRUE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 0u);
+TEST(event_stream, multi_step) {
+    check(multi_step<multicore::spike_event_stream>());
 }
diff --git a/test/unit/test_event_stream.hpp b/test/unit/test_event_stream.hpp
new file mode 100644
index 0000000000..412e5f7239
--- /dev/null
+++ b/test/unit/test_event_stream.hpp
@@ -0,0 +1,212 @@
+#pragma once
+
+#include <vector>
+#include <random>
+#include <gtest/gtest.h>
+
+#include "timestep_range.hpp"
+#include "backends/event.hpp"
+#include "util/rangeutil.hpp"
+
+namespace {
+
+using namespace arb;
+
+void check_result(arb_deliverable_event_data const* results, std::vector<arb_deliverable_event_data> const& expected) {
+    for (std::size_t i=0; i<expected.size(); ++i) {
+        EXPECT_EQ(results[i].weight, expected[i].weight);
+    }
+}
+
+template<typename Stream>
+struct result {
+    timestep_range steps;
+    std::unordered_map<unsigned, Stream> streams;
+    std::unordered_map<unsigned, std::vector<std::vector<arb_deliverable_event_data>>> expected;
+};
+
+template<typename Stream>
+result<Stream> single_step() {
+    // events for 3 cells and 2 mechanisms and according targets
+    //
+    // target handles                            | events
+    // ===================================================================
+    // target  cell  div mech_id  mech_index lid | t=[0,1)
+    // ===================================================================
+    // 0       0     0   0        0          0   | e@t=0.0,w=0.0
+    // -------------------------------------------------------------------
+    // 1       0         1        0          1   |   e@t=0.1,w=1.0
+    // ===================================================================
+    // 2       1     2   0        0          0   |           e@t=0.5,w=0.2
+    // 3       1         0        1          1   |   e@t=0.1,w=0.3
+    // -------------------------------------------------------------------
+    // 4       1         1        0          2   |         e@t=0.4,w=1.2
+    // ===================================================================
+    // 5       3     5   0        0          0   |         e@t=0.4,w=0.1
+    // 6       3         0        1          1   |
+    // -------------------------------------------------------------------
+    // 7       3         1        0          2   |       e@t=0.3,w=1.1
+    // 8       3         1        1          3   |
+    // 9       3         1        2          4   |       e@t=0.3,w=1.3
+    // ===================================================================
+    //               10
+
+    const std::vector<target_handle> handles = {
+        {0, 0},
+        {1, 0},
+        {0, 0},
+        {0, 1},
+        {1, 0},
+        {0, 0},
+        {0, 1},
+        {1, 0},
+        {1, 1},
+        {1, 2}
+    };
+
+    const std::vector<std::size_t> divs = {0, 2, 5, 10};
+
+    std::vector<std::vector<spike_event>> events = {
+        {{0, 0.0, 0.0f}, {1, 0.0, 1.0f}},
+        {{1, 0.1, 0.3f}, {2, 0.4, 1.2f}, {0, 0.5, 0.2f}},
+        {{2, 0.3, 1.1f}, {4, 0.3, 1.3f}, {0, 0.4, 0.1f}}
+    };
+
+    // prepare return value
+    result<Stream> res {
+        timestep_range{0,1,1},
+        {{0u, Stream{}}, {1u, Stream{}}},
+        {}
+    };
+
+    // expected outcome: one stream per mechanism, events ordered
+    res.expected[0u] = std::vector<std::vector<arb_deliverable_event_data>>{
+        { {0, 0.0f}, {0, 0.1f}, {0, 0.2f}, {1, 0.3f} } };
+    res.expected[1u] = std::vector<std::vector<arb_deliverable_event_data>>{
+        { {0, 1.0f}, {0, 1.1f}, {0, 1.2f}, {2, 1.3f} } };
+
+    // initialize event streams
+    auto lanes = util::subrange_view(events, 0u, events.size());
+    initialize(lanes, handles, divs, res.steps, res.streams);
+
+    return res;
+}
+
+template<typename Stream>
+result<Stream> multi_step() {
+    // number of events, cells, mechanisms and targets
+    std::size_t num_events = 500;
+    std::size_t num_cells = 20;
+    std::size_t num_mechanisms = 5;
+    std::size_t num_targets_per_mechanism_and_cell = 8;
+    std::size_t num_steps = 200;
+    double end_time = 1.0;
+
+    result<Stream> res {
+        timestep_range(0.0, end_time, end_time/num_steps),
+        {},
+        {}
+    };
+    for (std::size_t mech_id=0; mech_id<num_mechanisms; ++mech_id)
+        res.streams[mech_id] = Stream{};
+
+    // compute handles and divs
+    std::vector<std::size_t> divs(num_cells+1, 0u);
+    std::vector<target_handle> handles;
+    handles.reserve(num_cells*num_mechanisms*num_targets_per_mechanism_and_cell);
+    for (std::size_t cell=0; cell<num_cells; ++cell) {
+        for (std::size_t mech_id=0; mech_id<num_mechanisms; ++mech_id) {
+            for (std::size_t mech_index=0; mech_index<num_targets_per_mechanism_and_cell; ++mech_index) {
+                handles.emplace_back(static_cast<cell_local_size_type>(mech_id), static_cast<cell_local_size_type>(mech_index));
+            }
+        }
+        divs[cell+1] = divs[cell] + num_mechanisms*num_targets_per_mechanism_and_cell;
+    }
+
+    // events are binned by cell
+    std::vector<std::vector<spike_event>> events(num_cells);
+
+    // generate random events
+    std::mt19937 gen(42);
+    std::uniform_int_distribution<> cell_dist(0, num_cells-1);
+    std::uniform_int_distribution<> mech_id_dist(0, num_mechanisms-1);
+    std::uniform_int_distribution<> mech_index_dist(0, num_targets_per_mechanism_and_cell-1);
+    std::uniform_real_distribution<> time_dist(0.0, end_time);
+    for (std::size_t i=0; i<num_events; ++i) {
+        auto cell = cell_dist(gen);
+        auto mech_id = mech_id_dist(gen);
+        auto mech_index = mech_index_dist(gen);
+        auto target = mech_id*num_targets_per_mechanism_and_cell + mech_index;
+        auto time = time_dist(gen);
+        auto weight = 0.0f;
+        events[cell].emplace_back(static_cast<cell_lid_type>(target), time, weight);
+    }
+
+    // sort events by time
+    for (auto& v : events) {
+        std::stable_sort(v.begin(), v.end(), [](auto const& l, auto const& r) { return l.time < r.time; });
+    }
+
+    // compute expected order as permutation of a pair which indexes into events:
+    // first index is cell id, second index is item index
+    std::vector<std::pair<std::size_t, std::size_t>> expected_order;
+    expected_order.reserve(num_events);
+    for (std::size_t cell=0; cell<num_cells; ++cell) {
+        auto const& evts = events[cell];
+        for (std::size_t i=0; i<evts.size(); ++i) {
+            expected_order.push_back(std::make_pair(cell, i));
+        }
+    }
+    std::sort(expected_order.begin(), expected_order.end(), [&](auto const& l, auto const& r) {
+        auto [l_cell, l_idx] = l;
+        auto [r_cell, r_idx] = r;
+        auto const& l_event = events[l_cell][l_idx];
+        auto const& r_event = events[r_cell][r_idx];
+        auto l_t0 = res.steps.find(l_event.time)->t_begin();
+        auto r_t0 = res.steps.find(r_event.time)->t_begin();
+        auto const& l_handle = handles[divs[l_cell] + l_event.target];
+        auto const& r_handle = handles[divs[r_cell] + r_event.target];
+        auto l_mech_id = l_handle.mech_id;
+        auto r_mech_id = r_handle.mech_id;
+        auto l_mech_index = l_handle.mech_index;
+        auto r_mech_index = r_handle.mech_index;
+
+        // sort by mech_id
+        if (l_mech_id < r_mech_id) return true;
+        if (l_mech_id > r_mech_id) return false;
+        // if same mech_id, sort by step
+        if (l_t0 < r_t0) return true;
+        if (l_t0 > r_t0) return false;
+        // if same step, sort by mech_index
+        if (l_mech_index < r_mech_index) return true;
+        if (l_mech_index > r_mech_index) return false;
+        // if same mech_index sort by time
+        return l_event.time < r_event.time;
+    });
+
+    // expected results are now mapped by mechanism id -> vector of vector of deliverable event data
+    // the outer vector represents time step bins, the inner vector the ordered stream of events
+    for (std::size_t mech_id=0; mech_id<num_mechanisms; ++mech_id) {
+        res.expected[mech_id] = std::vector<std::vector<arb_deliverable_event_data>>(num_steps);
+    }
+
+    // create expected results from the previously defined expected order and choose unique event weight
+    std::size_t cc=0;
+    for (auto [cell, idx] : expected_order) {
+        auto& event = events[cell][idx];
+        auto step = res.steps.find(event.time) - res.steps.begin();
+        auto const& handle = handles[divs[cell] + event.target];
+        auto mech_id = handle.mech_id;
+        auto mech_index = handle.mech_index;
+        event.weight = cc++;
+        res.expected[mech_id][step].push_back(arb_deliverable_event_data{mech_index, event.weight});
+    }
+
+    // initialize event streams
+    auto lanes = util::subrange_view(events, 0u, events.size());
+    initialize(lanes, handles, divs, res.steps, res.streams);
+
+    return res;
+}
+
+}
diff --git a/test/unit/test_event_stream_gpu.cpp b/test/unit/test_event_stream_gpu.cpp
index 4b548f91b7..56b9c44a9a 100644
--- a/test/unit/test_event_stream_gpu.cpp
+++ b/test/unit/test_event_stream_gpu.cpp
@@ -1,125 +1,38 @@
-#include <vector>
-#include <gtest/gtest.h>
-
-#include "timestep_range.hpp"
-#include "backends/event.hpp"
 #include "backends/gpu/event_stream.hpp"
 #include "memory/memory.hpp"
 #include "memory/gpu_wrappers.hpp"
 #include "util/rangeutil.hpp"
-
-using namespace arb;
+#include "./test_event_stream.hpp"
 
 namespace {
-    constexpr cell_local_size_type mech = 13u;
-
-    target_handle handle[4] = {
-        target_handle(mech, 0u),
-        target_handle(mech, 1u),
-        target_handle(mech, 4u),
-        target_handle(mech, 2u)
-    };
-
-    std::vector<deliverable_event> common_events = {
-        deliverable_event(2.0, handle[1], 2.f),
-        deliverable_event(3.0, handle[3], 4.f),
-        deliverable_event(3.0, handle[0], 1.f),
-        deliverable_event(5.0, handle[2], 3.f),
-        deliverable_event(5.5, handle[2], 6.f)
-    };
-
-    bool event_matches(const arb_deliverable_event_data& e, unsigned i) {
-        const auto& expected = common_events[i];
-        return (e.weight == expected.weight);
-    }
 
-    template<typename T>
-    void cpy_d2h(T* dst, const T* src) {
-        memory::gpu_memcpy_d2h(dst, src, sizeof(T));
-    }
+template<typename T>
+void cpy_d2h(T* dst, const T* src, std::size_t n) {
+    memory::gpu_memcpy_d2h(dst, src, sizeof(T)*n);
 }
 
-TEST(event_stream_gpu, mark) {
-    using event_stream = gpu::event_stream<deliverable_event>;
-
-    auto thread_pool = std::make_shared<arb::threading::task_system>();
-
-    event_stream m(thread_pool);
-
-    arb_deliverable_event_stream s;
-    arb_deliverable_event_data d;
-
-    timestep_range dts{0, 6, 1.0};
-    EXPECT_EQ(dts.size(), 6u);
-
-    std::vector<std::vector<deliverable_event>> events(dts.size());
-    for (const auto& ev : common_events) {
-        events[dts.find(event_time(ev))-dts.begin()].push_back(ev);
+template<typename Result>
+void check(Result result) {
+    for (std::size_t step=0; step<result.steps.size(); ++step) {
+        for (auto& [mech_id, stream] :  result.streams) {
+            stream.mark();
+            auto marked = stream.marked_events();
+            std::vector<arb_deliverable_event_data> host_data(marked.end - marked.begin);
+            EXPECT_EQ(host_data.size(), result.expected[mech_id][step].size());
+            if (host_data.size()) {
+                cpy_d2h(host_data.data(), marked.begin, host_data.size());
+                check_result(host_data.data(), result.expected[mech_id][step]);
+            }
+        }
     }
-    arb_assert(util::sum_by(events, [] (const auto& v) {return v.size();}) == common_events.size());
-
-    m.init(events);
-
-    EXPECT_TRUE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 0u);
-
-    m.mark();
-    // current time is 0: no events
-    EXPECT_TRUE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 0u);
-
-    m.mark();
-    // current time is 1: no events
-    EXPECT_TRUE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 0u);
-
-    m.mark();
-    // current time is 2: 1 event at mech_index 1
-    EXPECT_FALSE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 1u);
-    cpy_d2h(&d, s.begin+0);
-    EXPECT_TRUE(event_matches(d, 0u));
-
-    m.mark();
-    // current time is 3: 2 events at mech_index 0 and 2
-    EXPECT_FALSE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 2u);
-    // the order of these 2 events is inverted on GPU due to sorting
-    cpy_d2h(&d, s.begin+0);
-    EXPECT_TRUE(event_matches(d, 2u));
-    cpy_d2h(&d, s.begin+1);
-    EXPECT_TRUE(event_matches(d, 1u));
-
-    m.mark();
-    // current time is 4: no events
-    EXPECT_TRUE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 0u);
+}
 
-    m.mark();
-    // current time is 5: 2 events at mech_index 4
-    EXPECT_FALSE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 2u);
-    cpy_d2h(&d, s.begin+0);
-    EXPECT_TRUE(event_matches(d, 3u));
-    cpy_d2h(&d, s.begin+1);
-    EXPECT_TRUE(event_matches(d, 4u));
+}
 
-    m.mark();
-    // current time is past time range
-    EXPECT_TRUE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 0u);
+TEST(event_stream_gpu, single_step) {
+    check(single_step<gpu::spike_event_stream>());
+}
 
-    m.clear();
-    // no events after clear
-    EXPECT_TRUE(m.empty());
-    s = m.marked_events();
-    EXPECT_EQ(s.end - s.begin, 0u);
+TEST(event_stream_gpu, multi_step) {
+    check(multi_step<gpu::spike_event_stream>());
 }