From 73a124cfb5636fbaec2794a7c43147cade4f5278 Mon Sep 17 00:00:00 2001 From: ur4t <46435411+ur4t@users.noreply.github.com> Date: Thu, 19 Oct 2023 18:10:18 +0800 Subject: [PATCH] Support portable boot files on more platforms --- c/build.zuo | 2 +- c/externs.h | 3 + c/scheme.c | 65 ++++----- c/self-exe.c | 377 ++++++++++++++++++++++++++++++++++++++++++++++++++ csug/use.stex | 6 +- 5 files changed, 414 insertions(+), 39 deletions(-) create mode 100644 c/self-exe.c diff --git a/c/build.zuo b/c/build.zuo index f0e3a72f0..a92875cbe 100644 --- a/c/build.zuo +++ b/c/build.zuo @@ -79,7 +79,7 @@ "number.c" "schsig.c" "io.c" "new-io.c" "print.c" "fasl.c" "vfasl.c" "stats.c" "foreign.c" "prim.c" "prim5.c" "flushcache.c" "schlib.c" "thread.c" "expeditor.c" "scheme.c" "compress-io.c" - "random.c" "ffi.c" + "random.c" "ffi.c" "self-exe.c" arch.c))) (define kernel-srcs ((add-extra-c-sources at-dir mboot) diff --git a/c/externs.h b/c/externs.h index 0dee58e5c..229e1fad4 100644 --- a/c/externs.h +++ b/c/externs.h @@ -478,5 +478,8 @@ extern void S_ffi_call(ptr types, ptr proc, ptr *stack); extern ptr S_ffi_closure(ptr types, ptr proc); #endif +/* self-exe.c */ +extern char *S_get_process_executable_path(const char *execpath); + /* statics.c */ extern void scheme_statics(void); diff --git a/c/scheme.c b/c/scheme.c index bb2ec4871..8b0077c42 100644 --- a/c/scheme.c +++ b/c/scheme.c @@ -460,7 +460,8 @@ void S_generic_invoke(ptr tc, ptr code) { /* MISCELLANEOUS HELPERS */ /* locally defined functions */ -static IBOOL next_path(char *path, const char *name, const char *ext, const char **sp, const char **dsp); +static IBOOL next_path(const char *execpath, char *path, const char *name, + const char *ext, const char **sp, const char **dsp); static const char *path_last(const char *path); static char *get_defaultheapdirs(void); @@ -526,7 +527,8 @@ static char *get_defaultheapdirs() { * the search path. path should be a pointer to an unoccupied buffer * BOOT_PATH_MAX characters long. either or both of sp/dsp may be empty, * but neither may be null, i.e., (char *)0. */ -static IBOOL next_path(char *path, const char *name, const char *ext, +static IBOOL next_path(const char *execpath, char *path, + const char *name, const char *ext, const char **sp, const char **dsp) { char *p; const char *s, *t; @@ -542,26 +544,23 @@ static IBOOL next_path(char *path, const char *name, const char *ext, case '%': s += 1; switch (*s) { -#ifdef WIN32 case 'x': { - wchar_t exepath[BOOT_PATH_MAX]; DWORD n; s += 1; - n = GetModuleFileNameW(NULL, exepath, BOOT_PATH_MAX); - if (n == 0 || (n == BOOT_PATH_MAX && GetLastError() == ERROR_INSUFFICIENT_BUFFER)) { - fprintf(stderr, "warning: executable path is too long; ignoring %%x\n"); - } else { - char *tstart; - const char *tend; - tstart = Swide_to_utf8(exepath); - t = tstart; - tend = path_last(t); - if (tend != t) tend -= 1; /* back up to directory separator */ - while (t != tend) setp(*t++); - free(tstart); + char *tstart = S_get_process_executable_path(execpath); + if (tstart == NULL) { +#ifdef WIN32 + fprintf(stderr, "warning: failed to get executable path (%s); ignoring %%x\n", "Path is too long"); +#else + fprintf(stderr, "warning: failed to get executable path (%s); ignoring %%x\n", strerror(errno)); +#endif } + const char *tend = path_last(tstart); + t = tstart; + if (tend != t) tend -= 1; /* back up to directory separator */ + while (t != tend) setp(*t++); + free(tstart); break; } -#endif case 'm': s += 1; t = MACHINE_TYPE; @@ -704,7 +703,7 @@ static void finish_dependencies_header(int fd, const char *path, int c) { } } -static IBOOL find_boot(const char *name, const char *ext, IBOOL direct_pathp, +static IBOOL find_boot(const char *execpath, const char *name, const char *ext, IBOOL direct_pathp, int fd, IBOOL errorp) { char pathbuf[BOOT_PATH_MAX], buf[BOOT_PATH_MAX]; @@ -746,7 +745,7 @@ static IBOOL find_boot(const char *name, const char *ext, IBOOL direct_pathp, path = pathbuf; while (1) { - if (!next_path(pathbuf, name, ext, &sp, &dsp)) { + if (!next_path(execpath, pathbuf, name, ext, &sp, &dsp)) { if (errorp) { fprintf(stderr, "cannot find compatible boot file %s%s in search path:\n \"%s%s\"\n", name, ext, @@ -793,7 +792,7 @@ static IBOOL find_boot(const char *name, const char *ext, IBOOL direct_pathp, CLOSE(fd); S_abnormal_exit(); } - if (find_boot(buf, ".boot", 0, -1, 0)) break; + if (find_boot(execpath, buf, ".boot", 0, -1, 0)) break; if (c == ')') { char *sep; char *wastebuf[8]; fprintf(stderr, "cannot find subordinate boot file"); @@ -1014,7 +1013,7 @@ extern void Sscheme_init(void (*abnormal_exit)(void)) { S_pagesize = GETPAGESIZE(); idiot_checks(); - + switch (current_state) { case RUNNING: fprintf(stderr, "error (Sscheme_init): call Sscheme_deinit first to terminate\n"); @@ -1084,17 +1083,17 @@ static void check_boot_file_state(const char *who) { extern void Sregister_boot_file(const char *name) { check_boot_file_state("Sregister_boot_file"); - find_boot(name, "", 0, -1, 1); + find_boot("", name, "", 0, -1, 1); } extern void Sregister_boot_direct_file(const char *name) { check_boot_file_state("Sregister_boot_direct_file"); - find_boot(name, "", 1, -1, 1); + find_boot("", name, "", 1, -1, 1); } extern void Sregister_boot_file_fd(const char *name, int fd) { check_boot_file_state("Sregister_boot_file_fd"); - find_boot(name, "", 1, fd, 1); + find_boot("", name, "", 1, fd, 1); } extern void Sregister_boot_file_fd_region(const char *name, @@ -1123,14 +1122,10 @@ extern void Sregister_heap_file(UNUSED const char *path) { S_abnormal_exit(); } -extern void Sbuild_heap(const char *kernel, void (*custom_init)(void)) { +extern void Sbuild_heap(const char *execpath, void (*custom_init)(void)) { ptr tc = Svoid; /* initialize to make gcc happy */ ptr p; -#if defined(ALWAYS_USE_BOOT_FILE) - kernel = ALWAYS_USE_BOOT_FILE; -#endif - switch (current_state) { case UNINITIALIZED: case DEINITIALIZED: @@ -1149,14 +1144,14 @@ extern void Sbuild_heap(const char *kernel, void (*custom_init)(void)) { S_boot_time = 1; if (boot_count == 0) { - const char *name; - - if (!kernel) { + const char *name = path_last(execpath); +#if defined(ALWAYS_USE_BOOT_FILE) + name = ALWAYS_USE_BOOT_FILE; +#endif + if (!name) { fprintf(stderr, "no boot file or executable name specified\n"); S_abnormal_exit(); } - - name = path_last(kernel); if (strlen(name) >= BOOT_PATH_MAX) { fprintf(stderr, "executable name too long: %s\n", name); S_abnormal_exit(); @@ -1176,7 +1171,7 @@ extern void Sbuild_heap(const char *kernel, void (*custom_init)(void)) { } #endif - if (!find_boot(name, ".boot", 0, -1, 0)) { + if (!find_boot(execpath, name, ".boot", 0, -1, 0)) { fprintf(stderr, "cannot find compatible %s.boot in search path\n \"%s%s\"\n", name, Sschemeheapdirs, Sdefaultheapdirs); diff --git a/c/self-exe.c b/c/self-exe.c new file mode 100644 index 000000000..f8a7c0bf2 --- /dev/null +++ b/c/self-exe.c @@ -0,0 +1,377 @@ +/* + This file is meant to be standalone and suitable for use in + programs other than Chez Scheme. + + Defines + + self_exe_t get_process_executable_path(const char *exec_file); + + which takes argv[0] as supplied to `main` and returns an improved + representation of the containing executable. Not all platforms use + the `exec_file` argument; in particular, it is not used on Windows + or macOS. At worst, on Unix, uses `PATH` to convert `exec_file` + into a path. + + The result type `self_exe_t` is normally `char *`, but on Windows + it's `wchar_t *` when `SELF_EXE_WINDOWS_AS_UTF16` is defined. In + all cases, the result is `malloc`ed, so it can be `free`d. + + If `SELF_EXE_MAIN` is defined, then `main` is defined to call + and print the result from `get_process_executable_path`, which + is useful for testing. + + Parts of the implementaiton here are from the LLVM Project under + the Apache License v2.0 with LLVM Exceptions. +*/ + +/* `undef`ed below if not needed */ +#define USE_GENERIC_GET_SELF_PATH + +#include +#include + +#ifndef WIN32 +# if defined(_MSC_VER) || defined(__MINGW32__) +# define WIN32 +# endif +#endif + +#ifdef WIN32 +# include +#endif + +#if defined(WIN32) && defined(SELF_EXE_WINDOWS_AS_UTF16) +typedef wchar_t *self_exe_t; +#else +typedef char *self_exe_t; +#endif + +#if defined(__linux__) || defined(__GNU__) +# define USE_PROC_SELF_EXE_FILESYSTEM_PATH "/proc/self/exe" +#endif + +/* From LLVM: (but removed OpenBSD, which doesn't have "/proc") */ +#if defined(__NetBSD__) || defined(__minix) || \ + defined(__DragonFly__) || defined(__FreeBSD_kernel__) || defined(_AIX) +# define USE_PROC_SELF_EXE_FILESYSTEM_PATH "/proc/curproc/file" +#endif + +#if defined(USE_PROC_SELF_EXE_FILESYSTEM_PATH) +# include +# include +# define GENERIC_GET_SELF_PATH_NAME generic_get_self_path +static char *GENERIC_GET_SELF_PATH_NAME(const char *exec_file); +static char *get_process_executable_path(const char *exec_file) +{ + char *s; + ssize_t len, blen = 256; + + s = malloc(blen); + + while (1) { + len = readlink(USE_PROC_SELF_EXE_FILESYSTEM_PATH, s, blen-1); + if (len == (blen-1)) { + free(s); + blen *= 2; + s = malloc(blen); + } else if (len < 0) { + /* possibly in a chroot environment where "/proc" is not + available, so fall back to generic approach: */ + free(s); + return generic_get_self_path(exec_file); + } else + break; + } + s[len] = 0; + +#if defined(__GNU__) + /* From LLVM comments: */ + /* [...] on GNU/Hurd, /proc/self/exe is a symlink to the path that was used to start + the program, and not the eventual binary file. Therefore, call realpath [...] */ + { + char *link_path; + link_path = realpath(s, NULL); + if (link_path) { + free(s); + return link_path; + } + } +#endif + + return s; +} +#endif + +#if defined(__FreeBSD__) +# include +# if __FreeBSD_version >= 1300057 +# include +# include +# else +# include +extern char **environ; +# endif +# define GENERIC_GET_SELF_PATH_NAME generic_get_self_path +static char *GENERIC_GET_SELF_PATH_NAME(const char *exec_file); +static char *get_process_executable_path(const char *exec_file) +{ + /* From LLVM, including most comments: */ + + /* On FreeBSD if the exec path specified in ELF auxiliary vectors is + preferred, if available. /proc/curproc/file and the KERN_PROC_PATHNAME + sysctl may not return the desired path if there are multiple hardlinks + to the file. */ +#if __FreeBSD_version >= 1300057 + char exe_path[PATH_MAX]; + if (elf_aux_info(AT_EXECPATH, exe_path, sizeof(exe_path)) == 0) { + char *link_path; + link_path = realpath(exe_path, NULL); + if (link_path) + return link_path; + } +#else + /* elf_aux_info(AT_EXECPATH, ... is not available in all supported versions, + fall back to finding the ELF auxiliary vectors after the process's + environment. */ + char **p = environ; + while (*p++ != 0) + ; + /* Iterate through auxiliary vectors for AT_EXECPATH. */ + for (Elf_Auxinfo *aux = (Elf_Auxinfo *)p; aux->a_type != AT_NULL; aux++) { + if (aux->a_type == AT_EXECPATH) { + char *link_path; + link_path = realpath((char *)aux->a_un.a_ptr, NULL); + if (link_path) + return link_path; + } + } +#endif + /* Fallback: */ + return generic_get_self_path(exec_file); +} +#endif + +#if defined(__APPLE__) && defined(__MACH__) +static char *get_process_executable_path(const char *exec_file) +{ + char buf[1024], *s; + uint32_t size = sizeof(buf); + int r; + + r = _NSGetExecutablePath(buf, &size); + if (!r) + return strdup(buf); + else { + s = malloc(size); + r = _NSGetExecutablePath(s, &size); + if (!r) + return s; + return strdup(exec_file); + } +} +# undef USE_GENERIC_GET_SELF_PATH +#endif + +#ifdef WIN32 +self_exe_t get_process_executable_path(const char *exec_file) +{ + wchar_t *path; + DWORD r, sz = 1024; + + while (1) { + path = (wchar_t *)malloc(sz * sizeof(wchar_t)); + r = GetModuleFileNameW(NULL, path, sz); + if ((r == sz) + && (GetLastError() == ERROR_INSUFFICIENT_BUFFER)) { + free(path); + sz = 2 * sz; + } else + break; + } + +#ifndef SELF_EXE_WINDOWS_AS_UTF16 + { + char *r; + int len; + len = WideCharToMultiByte(CP_UTF8, 0, path, -1, NULL, 0, NULL, NULL); + r = malloc(len); + len = WideCharToMultiByte(CP_UTF8, 0, path, -1, r, len, NULL, NULL); + free(path); + return r; + } +#else + return path; +#endif +} +# undef USE_GENERIC_GET_SELF_PATH +#endif + +#if defined(USE_GENERIC_GET_SELF_PATH) || defined(USE_EXE_LOOKUP_VIA_PATH) + +/* Get executable path via argv[0] and the `PATH` environment variable */ + +# include +# include + +static int has_slash(const char *s) +{ + while (*s) { + if (s[0] == '/') + return 1; + s++; + } + return 0; +} + +static char *do_path_append(const char *s1, int l1, const char *s2) +{ + int l2; + char *s; + + l2 = strlen(s2); + + s = (char *)malloc(l1 + l2 + 2); + + memcpy(s, s1, l1); + if (s[l1 - 1] != '/') { + s[l1++] = '/'; + } + + memcpy(s + l1, s2, l2); + s[l1 + l2] = 0; + + return s; +} + +static char *path_append(const char *s1, const char *s2) +{ + return do_path_append(s1, strlen(s1), s2); +} + +static char *copy_string(const char *s1) +{ + if (!s1) return NULL; + return strdup(s1); +} + +static int executable_exists(const char *path) +{ + return (access(path, X_OK) == 0); +} + +static char *get_current_directory() +{ + char *dir; + + dir = getcwd(NULL, 0); + + if (dir == NULL) { + /* Probably an old system where you have to allocate space yourself */ + char *s; + int len = 256; + + s = malloc(len); + while (1) { + dir = getcwd(s, len); + if (dir != NULL) + break; + if (errno == ERANGE) { + free(s); + len *= 2; + s = malloc(len); + } else + break; + } + + if (dir == NULL) { + /* Still failed, so give up with "." as the path */ + s[0] = '.'; + s[1] = 0; + dir = s; + } + } + + return dir; +} + +static char *lookup_exe_via_path(const char *exec_file) +{ + if (exec_file[0] == '/') { + /* Absolute path */ + return copy_string(exec_file); + } else if (has_slash(exec_file)) { + /* Relative path with a directory: */ + char *dir, *r; + dir = get_current_directory(); + r = path_append(dir, exec_file); + free(dir); + return r; + } else { + /* We have to find the executable by searching PATH: */ + char *path = copy_string(getenv("PATH")), *p, *m, *saved_path = path; + int more; + + if (!path) { + path = ""; + } + + while (1) { + /* Try each element of path: */ + for (p = path; *p && (*p != ':'); p++) { } + if (*p) { + *p = 0; + more = 1; + } else + more = 0; + + if (!*path) + break; + + m = path_append(path, exec_file); + + if (executable_exists(m)) { + if (m[0] != '/') { + char *old_m = m; + m = path_append(get_current_directory(), m); + free(old_m); + } + if (saved_path) free(saved_path); + return m; + } + free(m); + + if (more) + path = p + 1; + else + break; + } + + if (saved_path) free(saved_path); + + return copy_string(exec_file); + } +} +#endif + +#ifdef USE_GENERIC_GET_SELF_PATH +# ifndef GENERIC_GET_SELF_PATH_NAME +# define GENERIC_GET_SELF_PATH_NAME get_process_executable_path +# endif +static char *GENERIC_GET_SELF_PATH_NAME(const char *exec_file) +{ + return lookup_exe_via_path(exec_file); +} +#endif + +char *S_get_process_executable_path(const char *exec_file) +{ + return get_process_executable_path(exec_file); +} + +#ifdef SELF_EXE_MAIN +# include +int main(int argc, char **argv) +{ + printf("%s\n", get_process_executable_path(argv[0])); +} +#endif diff --git a/csug/use.stex b/csug/use.stex index 549e04d83..3dd553c02 100644 --- a/csug/use.stex +++ b/csug/use.stex @@ -1844,6 +1844,8 @@ the order in which they should be searched. Within each directory, the two-character escape sequence ``\scheme{%v}'' is replaced by the current version, and the two-character escape sequence ``\scheme{%m}'' is replaced by the machine type. +On supported platforms, the two-character escape sequence ``\scheme{%x}'' +is replaced by the directory in which the executable file resides. A percent followed by any other character is replaced by the second character; in particular, ``\scheme{%%}'' is replaced by ``\scheme{%}'', and ``\scheme{%:}'' is replaced by ``\scheme{:}''. @@ -1851,9 +1853,7 @@ If \scheme{SCHEMEHEAPDIRS} ends in a non-escaped colon, the default directories searched after those in \scheme{SCHEMEHEAPDIRS}; otherwise, only those listed in \scheme{SCHEMEHEAPDIRS} are searched. -Under Windows, semi-colons are used in place of colons, and one additional -escape is recognized: ``\scheme{%x},'' which is replaced by the directory in -which the executable file resides. +Under Windows, semi-colons are used in place of colons. The default search path under Windows consists of ``\scheme{%x}'' and ``\scheme{%x\..\..\boot\%m}.'' The registry key \scheme{HeapSearchPath} in