From b3dd7172618e935dedb5bc8c266239e99f1bb048 Mon Sep 17 00:00:00 2001 From: Julian Harbarth Date: Thu, 11 Nov 2021 18:05:39 +0100 Subject: [PATCH] RAPTOR (#28) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add MOTIS_GET_TIMING * fix warning * remove unused timing var * initial raptor module * some clang tidy fixes * clang tidy fixes * fix windows ci * add destination meta station capabilites to raptor reconstructor * fix domination for pretrip * remove debug output * add max travel duration * fix ci * remove debug info * add contains util * add print raptor route from station evas * add source meta station capabilities * add departure events for start meta stations * fix source meta station for pretrip * fix valid check for departure times in reconstructor * remove unused * adjust reconstructor for start metas * fix earliest arrival update * remove already completed todo * initial graptor * clean up * add print_footpaths for debuggin * remove unnecessary files * fix reconstructor * remove unused * remove streams from devices * add multiprocessor per query config * add memory store wip * remove utils * add debug util * clean up gpu raptor * finish memory store * refactor * delete devices * fix include * fix warning * remove global timetable on device * fix switchup * fix warnings * make memory store safer * switch to c++17 * rename cc to cuda_check to avoid name conflict * fix ub * fix ci * fix enter/exit flags in journey * fix journey reconstruction * fix enter/exit flags in journey * fix max queries per device for negative values * fix timekeeping in cpu raptor * raptor: update cmake file * raptor: refactor * raptor: remove for_each * raptor: append_vector -> utl::concat * update utl * raptor: fix build * raptor: get_raptor_schedule refactor move intermediate data structures to cc file * csa: rename gpucsa to gpu-csa * raptor: refactor (remove unused code) * refactor gpu raptor * add additional starts to gpu raptor * split gpu raptor into multiple files * add ontrip flag to raptor query * raptor: fix ub in departure range calculation * initialize ontrip flag * add gpu workflow * gpu ci: run tests, save deps * gpu: disable avx * always move * move code to .cc files * tidy up includes * remove unnecessary include * add routing test, fix bug in raptor * only compile if MOTIS_CUDA * check journey equal * refactor raptor module according to the pimpl idiom * remove designated initializers * fix format * fix raptor module destructor * fix raptor module destructor * fix segfault * remove unused includes * raptor: remove backward code, simplify departure track computation * add raptor print test * tuple -> pair * remove unused includes * rename raptor_schedule to raptor_meta_info * fix remaining renames * routing itest -> --tripbased.use_data_file=false * fix clang tidy errors * Merge branch 'raptor' of github.com:motis-project/motis into raptor * check formatting of cuda files * refactor * refactor * reorganize includes * always set footpath length to maximum of transfertimes or duration * add minct to set transfer time to 1 Co-authored-by: Felix Gündling --- .github/workflows/gpu.yml | 56 ++ .github/workflows/unix.yml | 13 +- .pkg | 2 +- .pkg.lock | 4 +- CMakeLists.txt | 16 +- base/core/CMakeLists.txt | 2 +- base/core/include/motis/core/common/timing.h | 14 +- .../core/include/motis/core/journey/journey.h | 12 +- .../include/motis/core/schedule/attribute.h | 5 +- base/core/src/print_journey.cc | 9 + base/loader/src/build_footpaths.cc | 6 +- base/loader/src/build_stations.cc | 2 +- modules/csa/CMakeLists.txt | 18 +- modules/csa/src/gpu/gpu_csa.cu | 36 +- modules/raptor/CMakeLists.txt | 36 ++ .../include/motis/raptor/additional_start.h | 23 + .../include/motis/raptor/cpu/cpu_raptor.h | 30 ++ .../include/motis/raptor/cpu/mark_store.h | 21 + .../motis/raptor/get_raptor_timetable.h | 11 + .../include/motis/raptor/gpu/cuda_util.h | 32 ++ .../include/motis/raptor/gpu/gpu_raptor.cuh | 50 ++ .../motis/raptor/gpu/gpu_timetable.cuh | 82 +++ .../include/motis/raptor/gpu/memory_store.h | 141 +++++ .../include/motis/raptor/print_raptor.h | 180 +++++++ modules/raptor/include/motis/raptor/raptor.h | 39 ++ .../include/motis/raptor/raptor_query.h | 89 ++++ .../include/motis/raptor/raptor_result.h | 85 +++ .../include/motis/raptor/raptor_search.h | 109 ++++ .../include/motis/raptor/raptor_statistics.h | 29 + .../include/motis/raptor/raptor_timetable.h | 191 +++++++ .../raptor/include/motis/raptor/raptor_util.h | 19 + .../include/motis/raptor/reconstructor.h | 495 ++++++++++++++++++ modules/raptor/src/additional_start.cc | 61 +++ modules/raptor/src/cpu/cpu_raptor.cc | 207 ++++++++ modules/raptor/src/cpu/mark_store.cc | 17 + modules/raptor/src/get_raptor_timetable.cc | 416 +++++++++++++++ modules/raptor/src/gpu/gpu_raptor.cu | 482 +++++++++++++++++ modules/raptor/src/gpu/gpu_timetable.cu | 96 ++++ modules/raptor/src/gpu/hybrid_raptor.cu | 68 +++ modules/raptor/src/gpu/memory_store.cc | 180 +++++++ modules/raptor/src/raptor.cc | 163 ++++++ modules/raptor/src/raptor_query.cc | 82 +++ modules/raptor/src/raptor_timetable.cc | 10 + modules/raptor/test/raptor_print_test.cc | 41 ++ modules/tripbased/src/tripbased.cc | 3 - .../stamm/infotext.101 | 2 + .../stamm/minct.csv | 3 + .../simple_realtime/stamm/infotext.101 | 4 + .../simple_realtime/stamm/metabhf.101 | 7 + test/schedule/simple_realtime/stamm/minct.csv | 3 + test/src/routing_test.cc | 55 ++ 51 files changed, 3700 insertions(+), 57 deletions(-) create mode 100644 .github/workflows/gpu.yml create mode 100644 modules/raptor/CMakeLists.txt create mode 100644 modules/raptor/include/motis/raptor/additional_start.h create mode 100644 modules/raptor/include/motis/raptor/cpu/cpu_raptor.h create mode 100644 modules/raptor/include/motis/raptor/cpu/mark_store.h create mode 100644 modules/raptor/include/motis/raptor/get_raptor_timetable.h create mode 100644 modules/raptor/include/motis/raptor/gpu/cuda_util.h create mode 100644 modules/raptor/include/motis/raptor/gpu/gpu_raptor.cuh create mode 100644 modules/raptor/include/motis/raptor/gpu/gpu_timetable.cuh create mode 100644 modules/raptor/include/motis/raptor/gpu/memory_store.h create mode 100644 modules/raptor/include/motis/raptor/print_raptor.h create mode 100644 modules/raptor/include/motis/raptor/raptor.h create mode 100644 modules/raptor/include/motis/raptor/raptor_query.h create mode 100644 modules/raptor/include/motis/raptor/raptor_result.h create mode 100644 modules/raptor/include/motis/raptor/raptor_search.h create mode 100644 modules/raptor/include/motis/raptor/raptor_statistics.h create mode 100644 modules/raptor/include/motis/raptor/raptor_timetable.h create mode 100644 modules/raptor/include/motis/raptor/raptor_util.h create mode 100644 modules/raptor/include/motis/raptor/reconstructor.h create mode 100644 modules/raptor/src/additional_start.cc create mode 100644 modules/raptor/src/cpu/cpu_raptor.cc create mode 100644 modules/raptor/src/cpu/mark_store.cc create mode 100644 modules/raptor/src/get_raptor_timetable.cc create mode 100644 modules/raptor/src/gpu/gpu_raptor.cu create mode 100644 modules/raptor/src/gpu/gpu_timetable.cu create mode 100644 modules/raptor/src/gpu/hybrid_raptor.cu create mode 100644 modules/raptor/src/gpu/memory_store.cc create mode 100644 modules/raptor/src/raptor.cc create mode 100644 modules/raptor/src/raptor_query.cc create mode 100644 modules/raptor/src/raptor_timetable.cc create mode 100644 modules/raptor/test/raptor_print_test.cc create mode 100644 test/src/routing_test.cc diff --git a/.github/workflows/gpu.yml b/.github/workflows/gpu.yml new file mode 100644 index 000000000..85c9b6b90 --- /dev/null +++ b/.github/workflows/gpu.yml @@ -0,0 +1,56 @@ +name: Linux GPU Build + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + release: + types: + - published + +jobs: + build: + runs-on: [self-hosted, linux, x64, gpu] + env: + DEBIAN_FRONTEND: noninteractive + BUILDCACHE_COMPRESS: true + BUILDCACHE_DIRECT_MODE: true + BUILDCACHE_ACCURACY: SLOPPY + UBSAN_OPTIONS: halt_on_error=1:abort_on_error=1 + CUDACXX: /usr/local/cuda/bin/nvcc + steps: + - uses: actions/checkout@v2 + + - name: Get deps + run: mkdir -p ~/deps && mv ~/deps . + + - name: CMake + run: | + cmake ${{ matrix.config.cross }} \ + -G Ninja -S . -B build \ + -DCMAKE_C_COMPILER=gcc-10 \ + -DCMAKE_CXX_COMPILER=g++-10 \ + -DCMAKE_BUILD_TYPE=Release \ + -DMOTIS_CUDA=On \ + -DMOTIS_AVX=Off \ + -DMOTIS_AVX2=Off \ + -DMOTIS_WITH_WEBUI=${{ matrix.config.webui }} + + - name: Build + run: | + ./build/buildcache/bin/buildcache -z + cmake --build build --target \ + motis \ + motis-test \ + motis-itest + ./build/buildcache/bin/buildcache -s + + - name: Save deps + run: mv deps ~ + + - name: Run Tests + run: ${{ matrix.config.emulator }} ./build/motis-test + + - name: Run Integration Tests + run: ${{ matrix.config.emulator }} ./build/motis-itest \ No newline at end of file diff --git a/.github/workflows/unix.yml b/.github/workflows/unix.yml index 851234496..cb6e27448 100644 --- a/.github/workflows/unix.yml +++ b/.github/workflows/unix.yml @@ -2,7 +2,7 @@ name: Unix Build on: schedule: - - cron: '0 3 * * 2,6' + - cron: '0 3 * * 2,6' push: branches: [ master ] pull_request: @@ -28,7 +28,10 @@ jobs: sudo apt-get install -y --no-install-recommends clang-format-12 - name: Format files - run: find base libs modules test -type f -a \( -name "*.cc" -o -name "*.h" \) -print0 | xargs -0 clang-format-12 -i + run: | + find base libs modules test \ + -type f -a \( -name "*.cc" -o -name "*.h" -o -name ".cuh" -o -name ".cu" \) \ + -print0 | xargs -0 clang-format-12 -i - name: Check for differences run: | @@ -228,9 +231,9 @@ jobs: path: ${{ github.workspace }}/.buildcache key: buildcache-${{ matrix.config.name }}-${{ hashFiles('.pkg') }}-${{ hashFiles('**/*.h') }}-${{ hashFiles('**/*.cc') }} restore-keys: | - buildcache-${{ matrix.config.name }}-${{ hashFiles('.pkg') }}-${{ hashFiles('**/*.h') }}- - buildcache-${{ matrix.config.name }}-${{ hashFiles('.pkg') }}- - buildcache-${{ matrix.config.name }}- + buildcache-${{ matrix.config.name }}-${{ hashFiles('.pkg') }}-${{ hashFiles('**/*.h') }}- + buildcache-${{ matrix.config.name }}-${{ hashFiles('.pkg') }}- + buildcache-${{ matrix.config.name }}- - name: Dependencies Cache uses: actions/cache@v2 diff --git a/.pkg b/.pkg index 4e65e0edb..3b7d1046b 100644 --- a/.pkg +++ b/.pkg @@ -77,7 +77,7 @@ [utl] url=git@github.com:motis-project/utl.git branch=master - commit=c7be2bfe0b81702078b3aaf9c620c658d8d1f658 + commit=a2375f6e2b20b6754335649857923e8b4a149ed9 [guess] url=git@github.com:motis-project/guess.git branch=master diff --git a/.pkg.lock b/.pkg.lock index b085fd258..3888a1f0a 100644 --- a/.pkg.lock +++ b/.pkg.lock @@ -1,4 +1,4 @@ -14695981039346656037 +5276275784285909422 cista eb1b0199eef401493db0d2dd735a14481b823083 zlib 1e1dfdedddb54a2e2cb8fec3b67f925233c495aa boost bca212ca286121db1e7eb2efa9ee8de626633481 @@ -10,7 +10,7 @@ libosmium e35f4f63facbc87a0a5bf388bce19e6c4ed1dca7 protozero 8c9f3fa97c2cfdceef86d0b61818ae98e9328f29 Catch2 e5c9a58d66ff0780e956b5447573af9d6b9b2ca3 fmt c68ab4be8f3cb0e5c6eb181b3f419622e15e02bd -utl c7be2bfe0b81702078b3aaf9c620c658d8d1f658 +utl a2375f6e2b20b6754335649857923e8b4a149ed9 address-typeahead 9b33a191c05ee3f489492ea9a89253eaa26b91d4 conf 4c809244b10de534f4423ce4b27aa1a371610e96 deboost.context e586729432e76f97eccb2cb75d8287cab47198f5 diff --git a/CMakeLists.txt b/CMakeLists.txt index 894052060..22e10c0a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,23 +40,20 @@ set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") option(MOTIS_MIMALLOC "use mimalloc" OFF) option(MOTIS_AVX "enable AVX functions" ON) option(MOTIS_AVX2 "enable AVX2 + FMA functions" ON) - option(MOTIS_CUDA "enable CUDA functions" OFF) +option(MOTIS_LINT "enable lint (clang-tidy) target" OFF) +option(MOTIS_COV "enable coverage (coverage) target" OFF) +option(MOTIS_WITH_WEBUI "enable motis-web-ui target" OFF) + if (MOTIS_CUDA) - set(MOTIS_CUDA_ARCH "-gencode arch=compute_75,code=sm_75 -gencode arch=compute_61,code=sm_61") if (MSVC) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MOTIS_CUDA_ARCH} -lcudadevrt") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lcudadevrt") else() - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MOTIS_CUDA_ARCH} --compiler-options -static") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options -static") endif() enable_language(CUDA) endif() -option(MOTIS_LINT "enable lint (clang-tidy) target" OFF) -option(MOTIS_COV "enable coverage (coverage) target" OFF) - -option(MOTIS_WITH_WEBUI "enable motis-web-ui target" OFF) - add_library(motis-generated INTERFACE) target_include_directories(motis-generated INTERFACE ${CMAKE_BINARY_DIR}/generated) @@ -238,7 +235,6 @@ endif() target_compile_options(motis-test PRIVATE ${MOTIS_CXX_FLAGS}) target_compile_definitions(motis-test PRIVATE ${MOTIS_COMPILE_DEFINITIONS}) target_link_libraries(motis-test - ${module-targets} ${motis-test-extra-dependencies} motis-bootstrap motis-core diff --git a/base/core/CMakeLists.txt b/base/core/CMakeLists.txt index 294efa0ba..95241d344 100644 --- a/base/core/CMakeLists.txt +++ b/base/core/CMakeLists.txt @@ -6,7 +6,7 @@ add_library(motis-core STATIC ${motis-core-files}) target_compile_features(motis-core PUBLIC cxx_std_17) target_include_directories(motis-core PUBLIC include) target_compile_definitions(motis-core PUBLIC ${MOTIS_COMPILE_DEFINITIONS}) -target_compile_options(motis-core PUBLIC ${MOTIS_CXX_FLAGS}) +target_compile_options(motis-core PRIVATE ${MOTIS_CXX_FLAGS}) target_link_libraries(motis-core utl date diff --git a/base/core/include/motis/core/common/timing.h b/base/core/include/motis/core/common/timing.h index 0a95eee07..965f8c218 100644 --- a/base/core/include/motis/core/common/timing.h +++ b/base/core/include/motis/core/common/timing.h @@ -3,8 +3,8 @@ #include #define MOTIS_START_TIMING(_X) \ - auto _X##_start = std::chrono::steady_clock::now(), _X##_stop = _X##_start -#define MOTIS_STOP_TIMING(_X) _X##_stop = std::chrono::steady_clock::now() + auto _X##_start = std::chrono::steady_clock::now() +#define MOTIS_STOP_TIMING(_X) auto _X##_stop = std::chrono::steady_clock::now() #define MOTIS_TIMING_MS(_X) \ (std::chrono::duration_cast(_X##_stop - \ _X##_start) \ @@ -13,3 +13,13 @@ (std::chrono::duration_cast(_X##_stop - \ _X##_start) \ .count()) + +#define MOTIS_GET_TIMING_MS(_X) \ + (std::chrono::duration_cast( \ + std::chrono::steady_clock::now() - _X##_start) \ + .count()) + +#define MOTIS_GET_TIMING_US(_X) \ + (std::chrono::duration_cast( \ + std::chrono::steady_clock::now() - _X##_start) \ + .count()) diff --git a/base/core/include/motis/core/journey/journey.h b/base/core/include/motis/core/journey/journey.h index 76e47adf2..d15f240b0 100644 --- a/base/core/include/motis/core/journey/journey.h +++ b/base/core/include/motis/core/journey/journey.h @@ -4,6 +4,8 @@ #include #include +#include "cista/reflection/comparable.h" + #include "motis/core/schedule/attribute.h" #include "motis/core/schedule/free_text.h" #include "motis/core/schedule/timestamp_reason.h" @@ -12,6 +14,8 @@ namespace motis { struct journey { + CISTA_COMPARABLE() + enum class connection_status : uint8_t { OK, INTERCHANGE_INVALID, @@ -26,6 +30,7 @@ struct journey { }; struct transport { + CISTA_COMPARABLE() unsigned from_{0}, to_{0}; bool is_walk_{false}; std::string name_; @@ -44,18 +49,20 @@ struct journey { }; struct trip { + CISTA_COMPARABLE() unsigned from_{0}, to_{0}; extern_trip extern_trip_; std::string debug_; }; struct stop { + CISTA_COMPARABLE() bool exit_{false}, enter_{false}; std::string name_; std::string eva_no_; double lat_{0}, lng_{0}; - struct event_info { + CISTA_COMPARABLE() bool valid_{false}; unixtime timestamp_{0}; unixtime schedule_timestamp_{0}; @@ -66,16 +73,19 @@ struct journey { }; struct ranged_attribute { + CISTA_COMPARABLE() unsigned from_{0}, to_{0}; attribute attr_; }; struct ranged_free_text { + CISTA_COMPARABLE() unsigned from_{0}, to_{0}; free_text text_; }; struct problem { + CISTA_COMPARABLE() problem_type type_{problem_type::NO_PROBLEM}; unsigned from_{0}, to_{0}; }; diff --git a/base/core/include/motis/core/schedule/attribute.h b/base/core/include/motis/core/schedule/attribute.h index b016c6a2c..42460fab6 100644 --- a/base/core/include/motis/core/schedule/attribute.h +++ b/base/core/include/motis/core/schedule/attribute.h @@ -1,12 +1,15 @@ #pragma once +#include "cista/reflection/comparable.h" + #include "motis/string.h" namespace motis { struct attribute { - mcd::string text_; + CISTA_COMPARABLE() mcd::string code_; + mcd::string text_; }; } // namespace motis diff --git a/base/core/src/print_journey.cc b/base/core/src/print_journey.cc index 50b9ce85e..02bdf6eee 100644 --- a/base/core/src/print_journey.cc +++ b/base/core/src/print_journey.cc @@ -173,6 +173,15 @@ bool print_journey(journey const& j, std::ostream& out, bool local_time, << trp.line_id_ << " " << j.trips_[i].debug_ << std::endl; } + out << "\nAttributes:" << std::endl; + for (auto i = 0UL; i < j.attributes_.size(); ++i) { + auto const& attribute = j.attributes_[i]; + out << std::right << std::setw(2) << i << ": " << std::left << std::setw(2) + << attribute.from_ << " -> " << std::left << std::setw(2) + << attribute.to_ << " {" << attribute.attr_.code_ << " " + << attribute.attr_.text_ << "}" << std::endl; + } + auto const report_error = [&](bool first_error) -> std::ostream& { if (first_error) { out << "\nWARNING: Journey is broken:" << std::endl; diff --git a/base/loader/src/build_footpaths.cc b/base/loader/src/build_footpaths.cc index 53ff70928..8b88af562 100644 --- a/base/loader/src/build_footpaths.cc +++ b/base/loader/src/build_footpaths.cc @@ -58,11 +58,13 @@ struct footpath_builder { continue; } - auto duration = static_cast(footpath->duration()); auto const from_node = get_station("from", footpath->from()); auto const to_node = get_station("to", footpath->to()); auto& from_station = sched_.stations_.at(from_node->id_); auto& to_station = sched_.stations_.at(to_node->id_); + auto duration = + std::max({from_station->transfer_time_, to_station->transfer_time_, + static_cast(footpath->duration())}); if (from_node == to_node) { LOG(ml::warn) << "Footpath loop at station " << from_station->eva_nr_ @@ -71,8 +73,6 @@ struct footpath_builder { } if (opt_.adjust_footpaths_) { - duration = std::max({from_station->transfer_time_, - to_station->transfer_time_, duration}); auto const distance = get_distance(*from_station, *to_station) * 1000; auto adjusted_duration = adjust_footpath_duration(duration, distance); diff --git a/base/loader/src/build_stations.cc b/base/loader/src/build_stations.cc index 5b0cc7d2c..eed020230 100644 --- a/base/loader/src/build_stations.cc +++ b/base/loader/src/build_stations.cc @@ -65,7 +65,7 @@ struct stations_builder { s->length_ = fbs_station->lng(); s->eva_nr_ = std::string{sched_.prefixes_[source_schedule]} + fbs_station->id()->str(); - s->transfer_time_ = std::max(2, fbs_station->interchange_time()); + s->transfer_time_ = std::max(1, fbs_station->interchange_time()); s->timez_ = fbs_station->timezone() != nullptr ? get_or_create_timezone(fbs_station->timezone()) : nullptr; diff --git a/modules/csa/CMakeLists.txt b/modules/csa/CMakeLists.txt index 76bed449d..57ec35254 100644 --- a/modules/csa/CMakeLists.txt +++ b/modules/csa/CMakeLists.txt @@ -1,8 +1,6 @@ cmake_minimum_required(VERSION 3.10) project(motis) -include_directories(include) - file(GLOB_RECURSE motis-csa-files src/*.cc) add_library(motis-csa STATIC ${motis-csa-files}) target_include_directories(motis-csa PUBLIC include) @@ -12,20 +10,20 @@ target_link_libraries(motis-csa motis-routing motis-module motis-core -) + ) target_compile_options(motis-csa PRIVATE ${MOTIS_CXX_FLAGS}) target_compile_definitions(motis-csa PRIVATE ${MOTIS_COMPILE_DEFINITIONS}) if (MOTIS_CUDA) add_library(gpucsa SHARED src/gpu/gpu_csa.cu) set_target_properties(gpucsa PROPERTIES - WINDOWS_EXPORT_ALL_SYMBOLS ON - CUDA_SEPARABLE_COMPILATION ON - CUDA_STANDARD 14 - RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} - LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} - INSTALL_RPATH "$ORIGIN/../lib:$ORIGIN/") + WINDOWS_EXPORT_ALL_SYMBOLS ON + CUDA_SEPARABLE_COMPILATION ON + CUDA_STANDARD 17 + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} + INSTALL_RPATH "$ORIGIN/../lib:$ORIGIN/") target_include_directories(gpucsa PUBLIC include) set_property(TARGET gpucsa PROPERTY CUDA_ARCHITECTURES 75 61) target_link_libraries(motis-csa gpucsa) -endif() +endif () diff --git a/modules/csa/src/gpu/gpu_csa.cu b/modules/csa/src/gpu/gpu_csa.cu index 8ab0f655c..5abe92854 100644 --- a/modules/csa/src/gpu/gpu_csa.cu +++ b/modules/csa/src/gpu/gpu_csa.cu @@ -15,15 +15,15 @@ extern "C" { #define STR(s) #s #define FMT_HUMAN_READABLE "%.1f%s" -#define HUMAN_READABLE(size) \ - ((size) > 1024 * 1024 * 1024) \ - ? (((float)(size)) / 1024 / 1024 / 1024) \ - : ((size) > 1024 * 1024) \ - ? (((float)(size)) / 1024 / 1024) \ - : ((size) > 1024) ? (((float)(size)) / 1024) : ((float)(size)), \ - ((size) > 1024 * 1024 * 1024) \ - ? "GB" \ - : ((size) > 1024 * 1024) ? "MB" : ((size) > 1024) ? "kb" : "b" +#define HUMAN_READABLE(size) \ + ((size) > 1024 * 1024 * 1024) ? (((float)(size)) / 1024 / 1024 / 1024) \ + : ((size) > 1024 * 1024) ? (((float)(size)) / 1024 / 1024) \ + : ((size) > 1024) ? (((float)(size)) / 1024) \ + : ((float)(size)), \ + ((size) > 1024 * 1024 * 1024) ? "GB" \ + : ((size) > 1024 * 1024) ? "MB" \ + : ((size) > 1024) ? "kb" \ + : "b" #define CUDA_CALL(call) \ if ((code = call) != cudaSuccess) { \ @@ -46,8 +46,8 @@ __host__ __device__ inline int divup(int a, int b) { // TIMETABLE //------------------------------------------------------------------------------ struct gpu_timetable { - struct gpu_csa_con* conns_; - uint32_t* bucket_starts_; + struct gpu_csa_con* conns_{nullptr}; + uint32_t* bucket_starts_{nullptr}; uint32_t station_count_, trip_count_, bucket_count_; }; @@ -59,6 +59,10 @@ struct gpu_timetable* create_csa_gpu_timetable( cudaError_t code; gpu_timetable* tt = static_cast(malloc(sizeof(gpu_timetable))); + if (tt == nullptr) { + printf("csa: malloc for gpu_timetable failed\n"); + return nullptr; + } tt->station_count_ = station_count; tt->trip_count_ = trip_count; @@ -69,15 +73,11 @@ struct gpu_timetable* create_csa_gpu_timetable( bucket_count); CUDA_COPY_TO_DEVICE(struct gpu_csa_con, tt->conns_, conns, conn_count); - printf("Schedule size on GPU: " FMT_HUMAN_READABLE "\n", - HUMAN_READABLE(device_bytes)); - return tt; fail: - if (tt != nullptr) { - cudaFree(tt->conns_); - } + cudaFree(tt->conns_); + cudaFree(tt->bucket_starts_); free(tt); return nullptr; } @@ -87,7 +87,9 @@ void free_csa_gpu_timetable(struct gpu_timetable* tt) { return; } cudaFree(tt->conns_); + cudaFree(tt->bucket_starts_); tt->conns_ = nullptr; + tt->bucket_starts_ = nullptr; tt->station_count_ = 0U; tt->trip_count_ = 0U; free(tt); diff --git a/modules/raptor/CMakeLists.txt b/modules/raptor/CMakeLists.txt new file mode 100644 index 000000000..6ba7b817a --- /dev/null +++ b/modules/raptor/CMakeLists.txt @@ -0,0 +1,36 @@ +cmake_minimum_required(VERSION 3.20) +project(motis) + +file(GLOB_RECURSE motis-raptor-files src/*.cc) +add_library(motis-raptor STATIC ${motis-raptor-files}) +target_include_directories(motis-raptor PUBLIC include) +target_compile_features(motis-raptor PUBLIC cxx_std_17) +target_link_libraries(motis-raptor + motis-routing + motis-module + motis-core + ) +target_compile_options(motis-raptor PRIVATE ${MOTIS_CXX_FLAGS}) +target_compile_definitions(motis-raptor PRIVATE ${MOTIS_COMPILE_DEFINITIONS}) + +if (MOTIS_CUDA) + include(FindCUDAToolkit) + + file(GLOB_RECURSE gpu-raptor-files src/gpu/*.cu) + add_library(gpu-raptor SHARED ${gpu-raptor-files}) + + set_target_properties(gpu-raptor PROPERTIES + WINDOWS_EXPORT_ALL_SYMBOLS ON + CUDA_SEPARABLE_COMPILATION ON + CUDA_STANDARD 17 + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} + INSTALL_RPATH "$ORIGIN/../lib:$ORIGIN/" + ) + target_compile_definitions(gpu-raptor PRIVATE MOTIS_CUDA) + target_link_libraries(gpu-raptor utl motis-core) + target_include_directories(gpu-raptor PRIVATE include) + set_property(TARGET gpu-raptor PROPERTY CUDA_ARCHITECTURES 75 61) + target_compile_options(gpu-raptor PRIVATE "--expt-relaxed-constexpr") + target_link_libraries(motis-raptor gpu-raptor CUDA::cudart_static) +endif () \ No newline at end of file diff --git a/modules/raptor/include/motis/raptor/additional_start.h b/modules/raptor/include/motis/raptor/additional_start.h new file mode 100644 index 000000000..750a2a30b --- /dev/null +++ b/modules/raptor/include/motis/raptor/additional_start.h @@ -0,0 +1,23 @@ +#pragma once + +#include "motis/raptor/raptor_timetable.h" + +namespace motis::raptor { + +struct additional_start { + additional_start() = delete; + + stop_id s_id_; + time offset_; +}; + +std::vector get_add_starts(raptor_meta_info const& meta_info, + stop_id source, + bool use_start_footpaths, + bool use_start_metas); + +// returns the maximum amount of additional starts for a raptor query +// which is from a query using use_source_metas and use_start_footpaths +size_t get_max_add_starts(raptor_meta_info const& meta_info); + +} // namespace motis::raptor diff --git a/modules/raptor/include/motis/raptor/cpu/cpu_raptor.h b/modules/raptor/include/motis/raptor/cpu/cpu_raptor.h new file mode 100644 index 000000000..b84d5c5fa --- /dev/null +++ b/modules/raptor/include/motis/raptor/cpu/cpu_raptor.h @@ -0,0 +1,30 @@ +#pragma once + +#include "motis/raptor/cpu/mark_store.h" + +#include "motis/raptor/raptor_query.h" +#include "motis/raptor/raptor_result.h" +#include "motis/raptor/raptor_statistics.h" +#include "motis/raptor/raptor_timetable.h" + +namespace motis::raptor { + +trip_count get_earliest_trip(raptor_timetable const& tt, + raptor_route const& route, + time const* prev_arrivals, + stop_times_index r_stop_offset); + +void init_arrivals(raptor_result& result, raptor_query const& q, + cpu_mark_store& station_marks); + +void update_route(raptor_timetable const& tt, route_id r_id, + time const* prev_arrivals, time* current_round, + earliest_arrivals& ea, cpu_mark_store& station_marks); + +void update_footpaths(raptor_timetable const& tt, time* current_round, + earliest_arrivals const& ea, + cpu_mark_store& station_marks); + +void invoke_cpu_raptor(raptor_query const& query, raptor_statistics&); + +} // namespace motis::raptor \ No newline at end of file diff --git a/modules/raptor/include/motis/raptor/cpu/mark_store.h b/modules/raptor/include/motis/raptor/cpu/mark_store.h new file mode 100644 index 000000000..89ec4e99b --- /dev/null +++ b/modules/raptor/include/motis/raptor/cpu/mark_store.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include + +namespace motis::raptor { + +using mark_index = uint32_t; + +struct cpu_mark_store { + explicit cpu_mark_store(mark_index size); + + void mark(mark_index index); + bool marked(mark_index index) const; + void reset(); + +private: + std::vector marks_; +}; + +} // namespace motis::raptor \ No newline at end of file diff --git a/modules/raptor/include/motis/raptor/get_raptor_timetable.h b/modules/raptor/include/motis/raptor/get_raptor_timetable.h new file mode 100644 index 000000000..f9eea4a9c --- /dev/null +++ b/modules/raptor/include/motis/raptor/get_raptor_timetable.h @@ -0,0 +1,11 @@ +#pragma once + +#include "motis/core/schedule/schedule.h" +#include "motis/raptor/raptor_timetable.h" + +namespace motis::raptor { + +std::pair, std::unique_ptr> +get_raptor_timetable(schedule const& sched); + +} // namespace motis::raptor diff --git a/modules/raptor/include/motis/raptor/gpu/cuda_util.h b/modules/raptor/include/motis/raptor/gpu/cuda_util.h new file mode 100644 index 000000000..3071ba41f --- /dev/null +++ b/modules/raptor/include/motis/raptor/gpu/cuda_util.h @@ -0,0 +1,32 @@ +#pragma once + +#include + +#include "cuda_runtime.h" + +namespace motis::raptor { + +#define cucheck_dev(call) \ + { \ + cudaError_t cucheck_err = (call); \ + if (cucheck_err != cudaSuccess) { \ + const char* err_str = cudaGetErrorString(cucheck_err); \ + printf("%s (%d): %s\n", __FILE__, __LINE__, err_str); \ + } \ + } + +#define cuda_check() \ + { cucheck_dev(cudaGetLastError()); } + +inline void cuda_sync_stream(cudaStream_t const& stream) { + cudaEvent_t event{}; + cudaEventCreateWithFlags(&event, + cudaEventBlockingSync | cudaEventDisableTiming); + cudaEventRecord(event, stream); + cudaEventSynchronize(event); + cudaEventDestroy(event); + + cuda_check(); +} + +} // namespace motis::raptor \ No newline at end of file diff --git a/modules/raptor/include/motis/raptor/gpu/gpu_raptor.cuh b/modules/raptor/include/motis/raptor/gpu/gpu_raptor.cuh new file mode 100644 index 000000000..211390c24 --- /dev/null +++ b/modules/raptor/include/motis/raptor/gpu/gpu_raptor.cuh @@ -0,0 +1,50 @@ +#pragma once + +#include "motis/raptor/raptor_query.h" + +namespace motis::raptor { + +template +void inline launch_kernel(Kernel kernel, void** args, + device_context const& device, cudaStream_t s) { + cudaSetDevice(device.id_); + + cudaLaunchCooperativeKernel((void*)kernel, device.grid_, // NOLINT + device.threads_per_block_, args, 0, s); + cuda_check(); +} + +inline void fetch_arrivals_async(d_query const& dq, cudaStream_t s) { + cudaMemcpyAsync( + dq.mem_->host_.result_->data(), dq.mem_->device_.result_.front(), + dq.mem_->host_.result_->byte_size(), cudaMemcpyDeviceToHost, s); + cuda_check(); +} + +inline void fetch_arrivals_async(d_query const& dq, raptor_round const round_k, + cudaStream_t s) { + cudaMemcpyAsync((*dq.mem_->host_.result_)[round_k], + dq.mem_->device_.result_[round_k], + dq.mem_->host_.result_->stop_count_ * sizeof(time), + cudaMemcpyDeviceToHost, s); + cuda_check(); +} + +__device__ void init_arrivals_dev(base_query const& query, + device_memory const& device_mem, + device_gpu_timetable const& tt); + +__device__ void update_routes_dev(time const* prev_arrivals, time* arrivals, + unsigned int* station_marks, + unsigned int* route_marks, + bool* any_station_marked, + device_gpu_timetable const& tt); + +__device__ void update_footpaths_dev(device_memory const& device_mem, + raptor_round round_k, + device_gpu_timetable const& tt); + +void invoke_gpu_raptor(d_query const&); +void invoke_hybrid_raptor(d_query const&); + +} // namespace motis::raptor \ No newline at end of file diff --git a/modules/raptor/include/motis/raptor/gpu/gpu_timetable.cuh b/modules/raptor/include/motis/raptor/gpu/gpu_timetable.cuh new file mode 100644 index 000000000..a09fc877b --- /dev/null +++ b/modules/raptor/include/motis/raptor/gpu/gpu_timetable.cuh @@ -0,0 +1,82 @@ +#pragma once + +#include + +#include "motis/raptor/raptor_timetable.h" + +namespace motis::raptor { + +using gpu_route = raptor_route; +using gpu_stop = raptor_stop; +using gpu_stop_time = stop_time; + +struct gpu_footpath { + gpu_footpath() + : from_{invalid}, + to_{-1}, + duration_{invalid} {} + + gpu_footpath(stop_id const from, stop_id const to, motis::time const duration) + : from_{from}, to_{to}, duration_{static_cast(duration)} { + utl::verify(duration < std::numeric_limits::max(), + "Footpath duration too long to fit inside time8"); + } + + stop_id from_; + stop_id to_ : 24; + time8 duration_; +}; + +struct host_gpu_timetable { + host_gpu_timetable() = default; + + // subtract the sentinel + auto stop_count() const { return stops_.size() - 1; } + auto route_count() const { return routes_.size() - 1; } + + std::vector stops_; + std::vector routes_; + std::vector footpaths_; + + std::vector stop_times_; + + std::vector route_stops_; + std::vector stop_routes_; + + std::vector stop_departures_; + std::vector stop_arrivals_; + + std::vector