Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions src/core/elf.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include <errno.h>

#include "core/elf.h"
#include "debug/log.h"
Expand Down Expand Up @@ -427,3 +428,90 @@ void elf_resolve_interp(const char *sysroot,
/* Strategy 3: use interp_path as-is */
str_copy_trunc(out, interp_path, out_sz);
}

int elf_parse_shebang(const char *host_path,
char *interp_out,
size_t interp_sz,
char *arg_out,
size_t arg_sz)
{
int fd = open(host_path, O_RDONLY);
if (fd < 0) {
return -errno;
}

char buf[512];
ssize_t nread = read(fd, buf, sizeof(buf) - 1);
close(fd);

if (nread < 0) {
return -errno;
}
if (nread < 2 || buf[0] != '#' || buf[1] != '!') {
return 0; /* Not a shebang script */
}
buf[nread] = '\0';

/* Ignore script bytes after the first line (find \n or \r) */
char *eol = strchr(buf + 2, '\n');
if (eol) {
*eol = '\0';
}
eol = strchr(buf + 2, '\r');
if (eol) {
*eol = '\0';
}

char *ptr = buf + 2;
while (*ptr == ' ' || *ptr == '\t') {
ptr++;
}

/* Strip trailing whitespace/newlines of the whole shebang line */
size_t len = strlen(ptr);
while (len > 0 && (ptr[len - 1] == ' ' || ptr[len - 1] == '\t' ||
ptr[len - 1] == '\r' || ptr[len - 1] == '\n')) {
ptr[--len] = '\0';
}

if (len == 0) {
return -ENOEXEC; /* Empty shebang interpreter */
}

/* Parse interpreter path and single optional argument */
char *interp = ptr;
char *space = strpbrk(ptr, " \t");
char *arg = NULL;
if (space) {
*space = '\0';
arg = space + 1;
/* Strip leading space of the argument */
while (*arg == ' ' || *arg == '\t') {
arg++;
}
/* Strip trailing space/newlines/tabs of the argument */
size_t arg_len = strlen(arg);
while (arg_len > 0 &&
(arg[arg_len - 1] == ' ' || arg[arg_len - 1] == '\t' ||
arg[arg_len - 1] == '\r' || arg[arg_len - 1] == '\n')) {
arg[--arg_len] = '\0';
}
if (strlen(arg) == 0) {
arg = NULL;
}
}

if (str_copy_trunc(interp_out, interp, interp_sz) >= interp_sz) {
return -ENOEXEC; /* Buffer too small */
}

if (arg) {
if (str_copy_trunc(arg_out, arg, arg_sz) >= arg_sz) {
return -ENOEXEC; /* Buffer too small */
}
} else {
arg_out[0] = '\0';
}

return 1; /* Successfully parsed shebang */
}
15 changes: 15 additions & 0 deletions src/core/elf.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,21 @@ void elf_resolve_interp(const char *sysroot,
char *out,
size_t out_sz);

/* Read and parse a shebang script header from host_path.
* Writes interpreter path to interp_out and the single optional argument
* (if present) to arg_out. arg_out will be set to an empty string if there
* is no optional argument.
* Returns:
* 1 if a shebang script was successfully parsed
* 0 if the file is not a shebang script
* Negative errno on failure (e.g. -ENOENT, -ENOEXEC, or buffer overflows)
*/
int elf_parse_shebang(const char *host_path,
char *interp_out,
size_t interp_sz,
char *arg_out,
size_t arg_sz);

/* Translate ELF program-header flags (PF_R=4, PF_W=2, PF_X=1) into the
* R=1/W=2/X=4 bitset shared by both MEM_PERM_R/W/X (page-table permissions) and
* LINUX_PROT_READ/WRITE/EXEC (mmap prot bits).
Expand Down
123 changes: 116 additions & 7 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -440,16 +440,125 @@ int main(int argc, char **argv)
}

proc_set_sysroot(sysroot);
if (resolve_guest_elf_host_path(elf_path, elf_host_path,
sizeof(elf_host_path),
&elf_host_temp) < 0) {
log_error("failed to resolve ELF path %s: %s", elf_path,
strerror(errno));

int shebang_depth = 0;
const int max_shebang_depth = 5;

while (shebang_depth < max_shebang_depth) {
if (resolve_guest_elf_host_path(elf_path, elf_host_path,
sizeof(elf_host_path),
&elf_host_temp) < 0) {
log_error("failed to resolve ELF path %s: %s", elf_path,
strerror(errno));
cleanup_main_resources(&g, guest_initialized, &sysroot_mount,
have_host_cwd ? host_cwd : NULL, guest_argv,
guest_argc, elf_path, sysroot_path);
if (elf_host_temp)
unlink(elf_host_path);
return 1;
}

/* Check if the file starts with "#!" */
char interp[LINUX_PATH_MAX];
char arg[LINUX_PATH_MAX];
int rc = elf_parse_shebang(elf_host_path, interp, sizeof(interp), arg,
sizeof(arg));
if (rc == 0 || rc == -ENOENT) {
/* Not a shebang script, proceed to boot */
break;
}

if (rc < 0) {
log_error("empty or invalid shebang interpreter in %s", elf_path);
cleanup_main_resources(&g, guest_initialized, &sysroot_mount,
have_host_cwd ? host_cwd : NULL, guest_argv,
guest_argc, elf_path, sysroot_path);
if (elf_host_temp)
unlink(elf_host_path);
return 1;
}

shebang_depth++;

/* Prepend interpreter (and argument if present) to guest_argv */
bool has_arg = (arg[0] != '\0');
int add_count = has_arg ? 2 : 1;
int new_argc = guest_argc + add_count;
const char **new_argv =
(const char **) calloc((size_t) new_argc, sizeof(char *));
if (!new_argv) {
log_error("out of memory");
cleanup_main_resources(&g, guest_initialized, &sysroot_mount,
have_host_cwd ? host_cwd : NULL, guest_argv,
guest_argc, elf_path, sysroot_path);
if (elf_host_temp)
unlink(elf_host_path);
return 1;
}

new_argv[0] = strdup(interp);
if (!new_argv[0]) {
log_error("out of memory");
free((void *) new_argv);
cleanup_main_resources(&g, guest_initialized, &sysroot_mount,
have_host_cwd ? host_cwd : NULL, guest_argv,
guest_argc, elf_path, sysroot_path);
if (elf_host_temp)
unlink(elf_host_path);
return 1;
}
if (has_arg) {
new_argv[1] = strdup(arg);
if (!new_argv[1]) {
log_error("out of memory");
free((void *) new_argv[0]);
free((void *) new_argv);
cleanup_main_resources(&g, guest_initialized, &sysroot_mount,
have_host_cwd ? host_cwd : NULL,
guest_argv, guest_argc, elf_path,
sysroot_path);
if (elf_host_temp)
unlink(elf_host_path);
return 1;
}
}
Comment thread
doanbaotrung marked this conversation as resolved.

/* Transfer ownership of the previous guest_argv elements */
for (int i = 0; i < guest_argc; i++) {
new_argv[i + add_count] = guest_argv[i];
}

free((void *) guest_argv);
guest_argv = new_argv;
guest_argc = new_argc;

/* Update elf_path to point to the interpreter path */
char *new_elf_path = strdup(interp);
if (!new_elf_path) {
log_error("out of memory");
cleanup_main_resources(&g, guest_initialized, &sysroot_mount,
have_host_cwd ? host_cwd : NULL, guest_argv,
guest_argc, elf_path, sysroot_path);
if (elf_host_temp)
unlink(elf_host_path);
return 1;
}
free(elf_path);
elf_path = new_elf_path;
Comment thread
doanbaotrung marked this conversation as resolved.

/* Clean up any materialized temp file before resolving the next path */
if (elf_host_temp) {
unlink(elf_host_path);
elf_host_temp = false;
}
}

if (shebang_depth >= max_shebang_depth) {
Comment thread
cubic-dev-ai[bot] marked this conversation as resolved.
log_error("too many levels of shebang recursion (max %d) resolving %s",
max_shebang_depth, argv[arg_start]);
cleanup_main_resources(&g, guest_initialized, &sysroot_mount,
have_host_cwd ? host_cwd : NULL, guest_argv,
guest_argc, elf_path, sysroot_path);
if (elf_host_temp)
unlink(elf_host_path);
return 1;
}

Expand Down
68 changes: 16 additions & 52 deletions src/syscall/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -332,69 +332,33 @@ int64_t sys_execve(hv_vcpu_t vcpu,
/* Not a valid ELF. Check if it's a script with a shebang line. Read the
* first 256 bytes and look for "#!" at the start.
*/
int script_fd = open(path_host, O_RDONLY);
if (script_fd < 0) {
err = -LINUX_ENOENT;
char interp_start[256];
char interp_arg[256];
int rc =
elf_parse_shebang(path_host, interp_start, sizeof(interp_start),
interp_arg, sizeof(interp_arg));
if (rc < 0) {
if (rc == -ENOENT) {
err = -LINUX_ENOENT;
} else {
err = -LINUX_ENOEXEC;
}
goto fail;
}
char shebang_buf[256];
ssize_t nread = read(script_fd, shebang_buf, sizeof(shebang_buf) - 1);
close(script_fd);

if (nread < 2 || shebang_buf[0] != '#' || shebang_buf[1] != '!') {
if (rc == 0) {
err = -LINUX_ENOEXEC;
goto fail;
}
shebang_buf[nread] = '\0';

/* Ignore script bytes after the first line; only the shebang line
* contributes interpreter arguments.
*/
char *eol = strchr(shebang_buf + 2, '\n');
if (eol)
*eol = '\0';

/* Parse interpreter path and optional argument. Format: "#!
* /path/to/interpreter [optional-arg]"
*/
char *interp_start = shebang_buf + 2;
while (*interp_start == ' ' || *interp_start == '\t')
interp_start++;
if (*interp_start == '\0') {
err = -LINUX_ENOEXEC;
goto fail;
}

/* Linux preserves one optional shebang argument as a single argv
* element, without shell-style splitting.
*/
char *interp_arg = NULL;
char *space = interp_start;
while (*space && *space != ' ' && *space != '\t')
space++;
if (*space) {
*space = '\0';
interp_arg = space + 1;
while (*interp_arg == ' ' || *interp_arg == '\t')
interp_arg++;
if (*interp_arg == '\0')
interp_arg = NULL;
/* Trim the line ending from the optional argument. */
if (interp_arg) {
char *end = interp_arg + strlen(interp_arg) - 1;
while (end > interp_arg &&
(*end == ' ' || *end == '\t' || *end == '\r'))
*end-- = '\0';
}
}
bool has_arg = (interp_arg[0] != '\0');

log_debug("execve: shebang interp=\"%s\" arg=\"%s\" script=\"%s\"",
interp_start, interp_arg ? interp_arg : "(none)", path);
interp_start, has_arg ? interp_arg : "(none)", path);

/* Rebuild argv: [interpreter, optional-arg, script-path,
* original-argv[1:]]
*/
int new_argc = 1 + (interp_arg ? 1 : 0) + 1 + (argc > 1 ? argc - 1 : 0);
int new_argc = 1 + (has_arg ? 1 : 0) + 1 + (argc > 1 ? argc - 1 : 0);
if (new_argc > MAX_ARGS) {
err = -LINUX_E2BIG;
goto fail;
Expand All @@ -407,7 +371,7 @@ int64_t sys_execve(hv_vcpu_t vcpu,
char *new_argv[MAX_ARGS + 3];
int ni = 0;
new_argv[ni++] = interp_start;
if (interp_arg)
if (has_arg)
new_argv[ni++] = interp_arg;
new_argv[ni++] = path;
for (int i = 1; i < argc; i++)
Expand Down
Loading
Loading