diff --git a/src/core/elf.c b/src/core/elf.c index 575e1c7..b203703 100644 --- a/src/core/elf.c +++ b/src/core/elf.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "core/elf.h" #include "debug/log.h" @@ -427,3 +428,90 @@ void elf_resolve_interp(const char *sysroot, /* Strategy 3: use interp_path as-is */ str_copy_trunc(out, interp_path, out_sz); } + +int elf_parse_shebang(const char *host_path, + char *interp_out, + size_t interp_sz, + char *arg_out, + size_t arg_sz) +{ + int fd = open(host_path, O_RDONLY); + if (fd < 0) { + return -errno; + } + + char buf[512]; + ssize_t nread = read(fd, buf, sizeof(buf) - 1); + close(fd); + + if (nread < 0) { + return -errno; + } + if (nread < 2 || buf[0] != '#' || buf[1] != '!') { + return 0; /* Not a shebang script */ + } + buf[nread] = '\0'; + + /* Ignore script bytes after the first line (find \n or \r) */ + char *eol = strchr(buf + 2, '\n'); + if (eol) { + *eol = '\0'; + } + eol = strchr(buf + 2, '\r'); + if (eol) { + *eol = '\0'; + } + + char *ptr = buf + 2; + while (*ptr == ' ' || *ptr == '\t') { + ptr++; + } + + /* Strip trailing whitespace/newlines of the whole shebang line */ + size_t len = strlen(ptr); + while (len > 0 && (ptr[len - 1] == ' ' || ptr[len - 1] == '\t' || + ptr[len - 1] == '\r' || ptr[len - 1] == '\n')) { + ptr[--len] = '\0'; + } + + if (len == 0) { + return -ENOEXEC; /* Empty shebang interpreter */ + } + + /* Parse interpreter path and single optional argument */ + char *interp = ptr; + char *space = strpbrk(ptr, " \t"); + char *arg = NULL; + if (space) { + *space = '\0'; + arg = space + 1; + /* Strip leading space of the argument */ + while (*arg == ' ' || *arg == '\t') { + arg++; + } + /* Strip trailing space/newlines/tabs of the argument */ + size_t arg_len = strlen(arg); + while (arg_len > 0 && + (arg[arg_len - 1] == ' ' || arg[arg_len - 1] == '\t' || + arg[arg_len - 1] == '\r' || arg[arg_len - 1] == '\n')) { + arg[--arg_len] = '\0'; + } + if (strlen(arg) == 0) { + arg = NULL; + } + } + + if (str_copy_trunc(interp_out, interp, interp_sz) >= interp_sz) { + return -ENOEXEC; /* Buffer too small */ + } + + if (arg) { + if (str_copy_trunc(arg_out, arg, arg_sz) >= arg_sz) { + return -ENOEXEC; /* Buffer too small */ + } + } else { + arg_out[0] = '\0'; + } + + return 1; /* Successfully parsed shebang */ +} diff --git a/src/core/elf.h b/src/core/elf.h index a8ce7ce..8af286c 100644 --- a/src/core/elf.h +++ b/src/core/elf.h @@ -136,6 +136,21 @@ void elf_resolve_interp(const char *sysroot, char *out, size_t out_sz); +/* Read and parse a shebang script header from host_path. + * Writes interpreter path to interp_out and the single optional argument + * (if present) to arg_out. arg_out will be set to an empty string if there + * is no optional argument. + * Returns: + * 1 if a shebang script was successfully parsed + * 0 if the file is not a shebang script + * Negative errno on failure (e.g. -ENOENT, -ENOEXEC, or buffer overflows) + */ +int elf_parse_shebang(const char *host_path, + char *interp_out, + size_t interp_sz, + char *arg_out, + size_t arg_sz); + /* Translate ELF program-header flags (PF_R=4, PF_W=2, PF_X=1) into the * R=1/W=2/X=4 bitset shared by both MEM_PERM_R/W/X (page-table permissions) and * LINUX_PROT_READ/WRITE/EXEC (mmap prot bits). diff --git a/src/main.c b/src/main.c index 4d66e3f..eac1b1a 100644 --- a/src/main.c +++ b/src/main.c @@ -440,16 +440,125 @@ int main(int argc, char **argv) } proc_set_sysroot(sysroot); - if (resolve_guest_elf_host_path(elf_path, elf_host_path, - sizeof(elf_host_path), - &elf_host_temp) < 0) { - log_error("failed to resolve ELF path %s: %s", elf_path, - strerror(errno)); + + int shebang_depth = 0; + const int max_shebang_depth = 5; + + while (shebang_depth < max_shebang_depth) { + if (resolve_guest_elf_host_path(elf_path, elf_host_path, + sizeof(elf_host_path), + &elf_host_temp) < 0) { + log_error("failed to resolve ELF path %s: %s", elf_path, + strerror(errno)); + cleanup_main_resources(&g, guest_initialized, &sysroot_mount, + have_host_cwd ? host_cwd : NULL, guest_argv, + guest_argc, elf_path, sysroot_path); + if (elf_host_temp) + unlink(elf_host_path); + return 1; + } + + /* Check if the file starts with "#!" */ + char interp[LINUX_PATH_MAX]; + char arg[LINUX_PATH_MAX]; + int rc = elf_parse_shebang(elf_host_path, interp, sizeof(interp), arg, + sizeof(arg)); + if (rc == 0 || rc == -ENOENT) { + /* Not a shebang script, proceed to boot */ + break; + } + + if (rc < 0) { + log_error("empty or invalid shebang interpreter in %s", elf_path); + cleanup_main_resources(&g, guest_initialized, &sysroot_mount, + have_host_cwd ? host_cwd : NULL, guest_argv, + guest_argc, elf_path, sysroot_path); + if (elf_host_temp) + unlink(elf_host_path); + return 1; + } + + shebang_depth++; + + /* Prepend interpreter (and argument if present) to guest_argv */ + bool has_arg = (arg[0] != '\0'); + int add_count = has_arg ? 2 : 1; + int new_argc = guest_argc + add_count; + const char **new_argv = + (const char **) calloc((size_t) new_argc, sizeof(char *)); + if (!new_argv) { + log_error("out of memory"); + cleanup_main_resources(&g, guest_initialized, &sysroot_mount, + have_host_cwd ? host_cwd : NULL, guest_argv, + guest_argc, elf_path, sysroot_path); + if (elf_host_temp) + unlink(elf_host_path); + return 1; + } + + new_argv[0] = strdup(interp); + if (!new_argv[0]) { + log_error("out of memory"); + free((void *) new_argv); + cleanup_main_resources(&g, guest_initialized, &sysroot_mount, + have_host_cwd ? host_cwd : NULL, guest_argv, + guest_argc, elf_path, sysroot_path); + if (elf_host_temp) + unlink(elf_host_path); + return 1; + } + if (has_arg) { + new_argv[1] = strdup(arg); + if (!new_argv[1]) { + log_error("out of memory"); + free((void *) new_argv[0]); + free((void *) new_argv); + cleanup_main_resources(&g, guest_initialized, &sysroot_mount, + have_host_cwd ? host_cwd : NULL, + guest_argv, guest_argc, elf_path, + sysroot_path); + if (elf_host_temp) + unlink(elf_host_path); + return 1; + } + } + + /* Transfer ownership of the previous guest_argv elements */ + for (int i = 0; i < guest_argc; i++) { + new_argv[i + add_count] = guest_argv[i]; + } + + free((void *) guest_argv); + guest_argv = new_argv; + guest_argc = new_argc; + + /* Update elf_path to point to the interpreter path */ + char *new_elf_path = strdup(interp); + if (!new_elf_path) { + log_error("out of memory"); + cleanup_main_resources(&g, guest_initialized, &sysroot_mount, + have_host_cwd ? host_cwd : NULL, guest_argv, + guest_argc, elf_path, sysroot_path); + if (elf_host_temp) + unlink(elf_host_path); + return 1; + } + free(elf_path); + elf_path = new_elf_path; + + /* Clean up any materialized temp file before resolving the next path */ + if (elf_host_temp) { + unlink(elf_host_path); + elf_host_temp = false; + } + } + + if (shebang_depth >= max_shebang_depth) { + log_error("too many levels of shebang recursion (max %d) resolving %s", + max_shebang_depth, argv[arg_start]); cleanup_main_resources(&g, guest_initialized, &sysroot_mount, have_host_cwd ? host_cwd : NULL, guest_argv, guest_argc, elf_path, sysroot_path); - if (elf_host_temp) - unlink(elf_host_path); return 1; } diff --git a/src/syscall/exec.c b/src/syscall/exec.c index ef57f7f..d1eb58d 100644 --- a/src/syscall/exec.c +++ b/src/syscall/exec.c @@ -332,69 +332,33 @@ int64_t sys_execve(hv_vcpu_t vcpu, /* Not a valid ELF. Check if it's a script with a shebang line. Read the * first 256 bytes and look for "#!" at the start. */ - int script_fd = open(path_host, O_RDONLY); - if (script_fd < 0) { - err = -LINUX_ENOENT; + char interp_start[256]; + char interp_arg[256]; + int rc = + elf_parse_shebang(path_host, interp_start, sizeof(interp_start), + interp_arg, sizeof(interp_arg)); + if (rc < 0) { + if (rc == -ENOENT) { + err = -LINUX_ENOENT; + } else { + err = -LINUX_ENOEXEC; + } goto fail; } - char shebang_buf[256]; - ssize_t nread = read(script_fd, shebang_buf, sizeof(shebang_buf) - 1); - close(script_fd); - - if (nread < 2 || shebang_buf[0] != '#' || shebang_buf[1] != '!') { + if (rc == 0) { err = -LINUX_ENOEXEC; goto fail; } - shebang_buf[nread] = '\0'; - - /* Ignore script bytes after the first line; only the shebang line - * contributes interpreter arguments. - */ - char *eol = strchr(shebang_buf + 2, '\n'); - if (eol) - *eol = '\0'; - /* Parse interpreter path and optional argument. Format: "#! - * /path/to/interpreter [optional-arg]" - */ - char *interp_start = shebang_buf + 2; - while (*interp_start == ' ' || *interp_start == '\t') - interp_start++; - if (*interp_start == '\0') { - err = -LINUX_ENOEXEC; - goto fail; - } - - /* Linux preserves one optional shebang argument as a single argv - * element, without shell-style splitting. - */ - char *interp_arg = NULL; - char *space = interp_start; - while (*space && *space != ' ' && *space != '\t') - space++; - if (*space) { - *space = '\0'; - interp_arg = space + 1; - while (*interp_arg == ' ' || *interp_arg == '\t') - interp_arg++; - if (*interp_arg == '\0') - interp_arg = NULL; - /* Trim the line ending from the optional argument. */ - if (interp_arg) { - char *end = interp_arg + strlen(interp_arg) - 1; - while (end > interp_arg && - (*end == ' ' || *end == '\t' || *end == '\r')) - *end-- = '\0'; - } - } + bool has_arg = (interp_arg[0] != '\0'); log_debug("execve: shebang interp=\"%s\" arg=\"%s\" script=\"%s\"", - interp_start, interp_arg ? interp_arg : "(none)", path); + interp_start, has_arg ? interp_arg : "(none)", path); /* Rebuild argv: [interpreter, optional-arg, script-path, * original-argv[1:]] */ - int new_argc = 1 + (interp_arg ? 1 : 0) + 1 + (argc > 1 ? argc - 1 : 0); + int new_argc = 1 + (has_arg ? 1 : 0) + 1 + (argc > 1 ? argc - 1 : 0); if (new_argc > MAX_ARGS) { err = -LINUX_E2BIG; goto fail; @@ -407,7 +371,7 @@ int64_t sys_execve(hv_vcpu_t vcpu, char *new_argv[MAX_ARGS + 3]; int ni = 0; new_argv[ni++] = interp_start; - if (interp_arg) + if (has_arg) new_argv[ni++] = interp_arg; new_argv[ni++] = path; for (int i = 1; i < argc; i++) diff --git a/src/syscall/net.c b/src/syscall/net.c index e50a10d..2aabd92 100644 --- a/src/syscall/net.c +++ b/src/syscall/net.c @@ -92,6 +92,11 @@ int64_t sys_socket(guest_t *g, int domain, int type, int protocol) int nonblock = extract_sock_nonblock(type); int cloexec = extract_sock_cloexec(type); + int original_type = real_type; + if (mac_domain == AF_UNIX && real_type == LINUX_SOCK_SEQPACKET) { + real_type = SOCK_STREAM; + } + /* Rosetta opens AF_UNIX SOCK_SEQPACKET to talk to rosettad. macOS does not * support SOCK_SEQPACKET on AF_UNIX, so while the translator process is * active we create an unconnected SOCK_STREAM placeholder instead. @@ -100,7 +105,7 @@ int64_t sys_socket(guest_t *g, int domain, int type, int protocol) * so unrelated Unix IPC is not silently downgraded to STREAM. */ if (rosetta_socket_shim_enabled(g) && mac_domain == AF_UNIX && - real_type == LINUX_SOCK_SEQPACKET) { + original_type == LINUX_SOCK_SEQPACKET) { int fd = socket(AF_UNIX, SOCK_STREAM, 0); if (fd < 0) return linux_errno(); @@ -119,7 +124,7 @@ int64_t sys_socket(guest_t *g, int domain, int type, int protocol) } if (cloexec) fd_table[gfd].linux_flags |= LINUX_O_CLOEXEC; - net_socket_cache_init_defaults(gfd, domain, real_type); + net_socket_cache_init_defaults(gfd, domain, original_type); return gfd; } @@ -148,7 +153,7 @@ int64_t sys_socket(guest_t *g, int domain, int type, int protocol) if (cloexec) linux_flags |= LINUX_O_CLOEXEC; fd_table[gfd].linux_flags = linux_flags; - net_socket_cache_init_defaults(gfd, domain, real_type); + net_socket_cache_init_defaults(gfd, domain, original_type); return gfd; } @@ -164,6 +169,11 @@ int64_t sys_socketpair(guest_t *g, int nonblock = extract_sock_nonblock(type); int cloexec = extract_sock_cloexec(type); + int original_type = real_type; + if (mac_domain == AF_UNIX && real_type == LINUX_SOCK_SEQPACKET) { + real_type = SOCK_DGRAM; + } + int fds[2]; if (socketpair(mac_domain, real_type, protocol, fds) < 0) return linux_errno(); @@ -197,8 +207,8 @@ int64_t sys_socketpair(guest_t *g, int linux_flags = cloexec ? LINUX_O_CLOEXEC : 0; fd_table[gfd0].linux_flags = linux_flags; fd_table[gfd1].linux_flags = linux_flags; - net_socket_cache_init_defaults(gfd0, domain, real_type); - net_socket_cache_init_defaults(gfd1, domain, real_type); + net_socket_cache_init_defaults(gfd0, domain, original_type); + net_socket_cache_init_defaults(gfd1, domain, original_type); int32_t guest_fds[2] = {gfd0, gfd1}; if (guest_write_small(g, sv_gva, guest_fds, sizeof(guest_fds)) < 0) { diff --git a/tests/test-socket.c b/tests/test-socket.c index 1567a1c..ac3fd83 100644 --- a/tests/test-socket.c +++ b/tests/test-socket.c @@ -382,6 +382,75 @@ int main(void) close(sv[0]); close(sv[1]); + /* Test 12: SOCK_SEQPACKET UNIX socketpair */ + printf("test-socket: 12. socketpair(AF_UNIX, SOCK_SEQPACKET)... "); + int seq_sv[2]; + if (socketpair(AF_UNIX, SOCK_SEQPACKET, 0, seq_sv) < 0) { + printf("FAIL (socketpair: %m)\n"); + failures++; + } else { + const char *seq_msg1 = "msg1"; + const char *seq_msg2 = "longer_msg2"; + if (write(seq_sv[0], seq_msg1, strlen(seq_msg1)) != + (ssize_t) strlen(seq_msg1) || + write(seq_sv[0], seq_msg2, strlen(seq_msg2)) != + (ssize_t) strlen(seq_msg2)) { + printf("FAIL (write)\n"); + failures++; + } else { + char seq_buf1[64] = {0}; + char seq_buf2[64] = {0}; + ssize_t seq_n1 = read(seq_sv[1], seq_buf1, sizeof(seq_buf1) - 1); + ssize_t seq_n2 = read(seq_sv[1], seq_buf2, sizeof(seq_buf2) - 1); + if (seq_n1 == (ssize_t) strlen(seq_msg1) && + !memcmp(seq_buf1, seq_msg1, strlen(seq_msg1)) && + seq_n2 == (ssize_t) strlen(seq_msg2) && + !memcmp(seq_buf2, seq_msg2, strlen(seq_msg2))) { + int seq_type = 0; + socklen_t seq_optlen = sizeof(seq_type); + if (getsockopt(seq_sv[0], SOL_SOCKET, SO_TYPE, &seq_type, + &seq_optlen) < 0) { + printf("FAIL (getsockopt SO_TYPE: %m)\n"); + failures++; + } else if (seq_type == SOCK_SEQPACKET) { + printf("PASS\n"); + } else { + printf("FAIL (type=%d, expected %d)\n", seq_type, + SOCK_SEQPACKET); + failures++; + } + } else { + printf("FAIL (read: n1=%zd got '%s', n2=%zd got '%s')\n", + seq_n1, seq_buf1, seq_n2, seq_buf2); + failures++; + } + } + close(seq_sv[0]); + close(seq_sv[1]); + } + + /* Test 13: socket(AF_UNIX, SOCK_SEQPACKET, 0) */ + printf("test-socket: 13. socket(AF_UNIX, SOCK_SEQPACKET)... "); + int seq_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); + if (seq_fd < 0) { + printf("FAIL (socket: %m)\n"); + failures++; + } else { + int seq_type = 0; + socklen_t seq_optlen = sizeof(seq_type); + if (getsockopt(seq_fd, SOL_SOCKET, SO_TYPE, &seq_type, &seq_optlen) < + 0) { + printf("FAIL (getsockopt: %m)\n"); + failures++; + } else if (seq_type == SOCK_SEQPACKET) { + printf("PASS\n"); + } else { + printf("FAIL (type=%d, expected %d)\n", seq_type, SOCK_SEQPACKET); + failures++; + } + close(seq_fd); + } + if (failures == 0) { printf("test-socket: all tests passed -- PASS\n"); return 0;