From bc965a309df83f0234bfb81b4965104defdf5919 Mon Sep 17 00:00:00 2001 From: Trung Date: Thu, 25 Jun 2026 20:57:51 +0700 Subject: [PATCH] Dangling Symlink Resolution during Rename/Link: In fs.c, the sys_renameat2 and sys_linkat system calls were using the resolving PATH_TR_NONE translation mode for the source paths. This caused elfuse to follow the symlink leaf names to their targets, causing dangling symlinks during package extraction to fail with ENOENT (No such file or directory). We updated these to use PATH_TR_NOFOLLOW for the leaf components, preventing path resolution from walking past the symlink itself. Missing sync_file_range and syncfs System Calls: We registered and defined SYS_sync_file_range (84) and SYS_syncfs (267) in abi.h and dispatch.tbl, and implemented them in syscall.c as aliases of sc_fsync_common. --- src/syscall/abi.h | 2 + src/syscall/dispatch.tbl | 2 + src/syscall/fs.c | 12 ++- src/syscall/syscall.c | 93 +++++++++++++++++++++++ tests/test-syscall-smoke.c | 147 +++++++++++++++++++++++++++++++++++++ 5 files changed, 252 insertions(+), 4 deletions(-) diff --git a/src/syscall/abi.h b/src/syscall/abi.h index 8154281..fa8c391 100644 --- a/src/syscall/abi.h +++ b/src/syscall/abi.h @@ -75,6 +75,7 @@ #define SYS_sync 81 #define SYS_fsync 82 #define SYS_fdatasync 83 +#define SYS_sync_file_range 84 #define SYS_utimensat 88 #define SYS_exit 93 #define SYS_exit_group 94 @@ -165,6 +166,7 @@ #define SYS_madvise 233 #define SYS_wait4 260 #define SYS_prlimit64 261 +#define SYS_syncfs 267 #define SYS_renameat2 276 #define SYS_getrandom 278 #define SYS_execveat 281 diff --git a/src/syscall/dispatch.tbl b/src/syscall/dispatch.tbl index 3bb0156..76a7392 100644 --- a/src/syscall/dispatch.tbl +++ b/src/syscall/dispatch.tbl @@ -100,6 +100,8 @@ SYS_fremovexattr sc_fremovexattr 1 SYS_sync sc_sync 1 SYS_fsync sc_fsync 1 SYS_fdatasync sc_fdatasync 1 +SYS_sync_file_range sc_sync_file_range 1 +SYS_syncfs sc_syncfs 0 SYS_msync sc_msync 0 SYS_membarrier sc_membarrier 0 diff --git a/src/syscall/fs.c b/src/syscall/fs.c index 9a43e3e..8cf1b50 100644 --- a/src/syscall/fs.c +++ b/src/syscall/fs.c @@ -1651,8 +1651,9 @@ int64_t sys_renameat2(guest_t *g, if (sidecar_rc != SIDECAR_NOT_HANDLED) return sidecar_rc; - if (path_translate_at(olddirfd, oldpath, PATH_TR_NONE, &old_tx) < 0 || - path_translate_at(newdirfd, newpath, PATH_TR_CREATE, &new_tx) < 0) + if (path_translate_at(olddirfd, oldpath, PATH_TR_NOFOLLOW, &old_tx) < 0 || + path_translate_at(newdirfd, newpath, PATH_TR_CREATE | PATH_TR_NOFOLLOW, + &new_tx) < 0) return linux_errno(); if (old_tx.fuse_path || new_tx.fuse_path) return -LINUX_ENOSYS; @@ -1839,8 +1840,11 @@ int64_t sys_linkat(guest_t *g, if (sidecar_rc != SIDECAR_NOT_HANDLED) return sidecar_rc; - if (path_translate_at(olddirfd, oldpath, PATH_TR_NONE, &old_tx) < 0 || - path_translate_at(newdirfd, newpath, PATH_TR_CREATE, &new_tx) < 0) + unsigned int old_flags = + (flags & LINUX_AT_SYMLINK_FOLLOW) ? PATH_TR_NONE : PATH_TR_NOFOLLOW; + if (path_translate_at(olddirfd, oldpath, old_flags, &old_tx) < 0 || + path_translate_at(newdirfd, newpath, PATH_TR_CREATE | PATH_TR_NOFOLLOW, + &new_tx) < 0) return linux_errno(); if (old_tx.fuse_path || new_tx.fuse_path) return -LINUX_ENOSYS; diff --git a/src/syscall/syscall.c b/src/syscall/syscall.c index 6f97fc0..7117f20 100644 --- a/src/syscall/syscall.c +++ b/src/syscall/syscall.c @@ -1236,6 +1236,99 @@ static int64_t sc_fsync_common(guest_t *g, #define sc_fsync sc_fsync_common #define sc_fdatasync sc_fsync_common +static int64_t sc_sync_file_range(guest_t *g, + uint64_t x0, + uint64_t x1, + uint64_t x2, + uint64_t x3, + uint64_t x4, + uint64_t x5, + bool verbose) +{ + (void) g; + (void) x4; + (void) x5; + (void) verbose; + + int fd = (int) x0; + int64_t offset = (int64_t) x1; + int64_t nbytes = (int64_t) x2; + unsigned int flags = (unsigned int) x3; + + if (offset < 0 || nbytes < 0 || INT64_MAX - offset < nbytes) + return -LINUX_EINVAL; + + host_fd_ref_t host_ref; + int64_t err = host_fd_ref_open_io(fd, &host_ref); + if (err < 0) + return err; + + struct stat st; + if (fstat(host_ref.fd, &st) == 0 && !S_ISREG(st.st_mode) && + !S_ISBLK(st.st_mode) && !S_ISDIR(st.st_mode)) { + host_fd_ref_close(&host_ref); + return -LINUX_ESPIPE; + } + + /* Validate flags: only bits 1, 2, 4 are valid */ + if (flags & ~7u) { + host_fd_ref_close(&host_ref); + return -LINUX_EINVAL; + } + + /* + * If the flags only ask to initiate asynchronous write-out without waiting + * (i.e. SYNC_FILE_RANGE_WRITE (2)), we return 0 immediately to avoid + * blocking. The host OS's background page-out daemon will handle flushing + * dirty buffers. WAIT_BEFORE (1) and WAIT_AFTER (4) require blocking until + * writes complete. + * + * Note on macOS/Darwin: macOS does not provide a system call equivalent to + * Linux's sync_file_range(2) that can synchronize file data without writing + * back metadata. Therefore, we use fsync() to accomplish the + * synchronization, which will also write back metadata, unlike native Linux + * sync_file_range(2). + */ + int64_t ret = 0; + if (flags & (1u | 4u)) { + ret = (fsync(host_ref.fd) < 0) ? linux_errno() : 0; + } + + host_fd_ref_close(&host_ref); + return ret; +} + +static int64_t sc_syncfs(guest_t *g, + uint64_t x0, + uint64_t x1, + uint64_t x2, + uint64_t x3, + uint64_t x4, + uint64_t x5, + bool verbose) +{ + (void) g; + (void) x1; + (void) x2; + (void) x3; + (void) x4; + (void) x5; + (void) verbose; + + int fd = (int) x0; + host_fd_ref_t host_ref; + int64_t err = host_fd_ref_open_io(fd, &host_ref); + if (err < 0) + return err; + host_fd_ref_close(&host_ref); + + /* macOS does not have syncfs. We fall back to sync() which synchronizes + * all mounted filesystems, satisfying the filesystem-level consistency + * guarantee of syncfs. */ + sync(); + return 0; +} + /* getresuid/getresgid: write emulated real/effective/saved IDs to guest ptrs */ static int64_t sc_getresid_write(guest_t *g, uint64_t x0, diff --git a/tests/test-syscall-smoke.c b/tests/test-syscall-smoke.c index b232046..b3ec5f1 100644 --- a/tests/test-syscall-smoke.c +++ b/tests/test-syscall-smoke.c @@ -66,6 +66,14 @@ #define SYS_set_tid_address 96 #endif +#ifndef SYS_sync_file_range +#define SYS_sync_file_range 84 +#endif + +#ifndef SYS_syncfs +#define SYS_syncfs 267 +#endif + int passes = 0, fails = 0; extern char **environ; @@ -884,6 +892,143 @@ static void test_urandom_open_flags(void) PASS(); } +static void test_sync_file_range(void) +{ + TEST("sync_file_range"); + char path[] = "/tmp/elfuse-sync-file-range-XXXXXX"; + int fd = mkstemp(path); + if (fd < 0) { + FAIL("mkstemp"); + return; + } + unlink(path); + + /* Write some data first */ + const char *msg = "hello sync_file_range"; + if (write(fd, msg, strlen(msg)) != (ssize_t) strlen(msg)) { + FAIL("write"); + close(fd); + return; + } + + /* 1. Test basic success case with valid flags (SYNC_FILE_RANGE_WRITE, etc.) + */ + /* Note: SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | + * SYNC_FILE_RANGE_WAIT_AFTER is 7 */ + if (syscall(SYS_sync_file_range, fd, (off64_t) 0, (off64_t) 0, 7) != 0) { + FAIL("sync_file_range basic"); + close(fd); + return; + } + + /* 2. Test invalid flags (only bits 1, 2, 4 are valid) */ + errno = 0; + if (syscall(SYS_sync_file_range, fd, (off64_t) 0, (off64_t) 0, 8) != -1 || + errno != EINVAL) { + FAIL("sync_file_range invalid flags"); + close(fd); + return; + } + + /* 3. Test invalid offset (offset < 0) */ + errno = 0; + if (syscall(SYS_sync_file_range, fd, (off64_t) -1, (off64_t) 0, 7) != -1 || + errno != EINVAL) { + FAIL("sync_file_range negative offset"); + close(fd); + return; + } + + /* 4. Test invalid nbytes (nbytes < 0) */ + errno = 0; + if (syscall(SYS_sync_file_range, fd, (off64_t) 0, (off64_t) -1, 7) != -1 || + errno != EINVAL) { + FAIL("sync_file_range negative nbytes"); + close(fd); + return; + } + + /* 5. Test offset + nbytes overflow */ + errno = 0; + off64_t huge_offset = 0x7fffffffffffffffLL; + off64_t huge_nbytes = 1; + if (syscall(SYS_sync_file_range, fd, huge_offset, huge_nbytes, 7) != -1 || + errno != EINVAL) { + FAIL("sync_file_range offset+nbytes overflow"); + close(fd); + return; + } + + /* 6. SYNC_FILE_RANGE_WRITE only (async hint) returns 0 without blocking. + * Covers the deliberate-divergence branch. */ + if (syscall(SYS_sync_file_range, fd, (off64_t) 0, (off64_t) 0, 2) != 0) { + FAIL("sync_file_range write-only"); + close(fd); + return; + } + + /* 7. Bad fd → EBADF. */ + errno = 0; + if (syscall(SYS_sync_file_range, -1, (off64_t) 0, (off64_t) 0, 7) != -1 || + errno != EBADF) { + FAIL("sync_file_range bad fd"); + close(fd); + return; + } + + /* 8. Unsupported file type (pipe) → ESPIPE. */ + int pipefds[2]; + if (pipe(pipefds) == 0) { + errno = 0; + if (syscall(SYS_sync_file_range, pipefds[0], (off64_t) 0, (off64_t) 0, + 7) != -1 || + errno != ESPIPE) { + FAIL("sync_file_range pipe ESPIPE"); + close(pipefds[0]); + close(pipefds[1]); + close(fd); + return; + } + close(pipefds[0]); + close(pipefds[1]); + } else { + FAIL("pipe creation failed"); + } + + close(fd); + PASS(); +} + +static void test_syncfs(void) +{ + TEST("syncfs"); + char path[] = "/tmp/elfuse-syncfs-XXXXXX"; + int fd = mkstemp(path); + if (fd < 0) { + FAIL("mkstemp"); + return; + } + unlink(path); + + /* 1. Test basic success case */ + if (syscall(SYS_syncfs, fd) != 0) { + FAIL("syncfs basic"); + close(fd); + return; + } + + /* 2. Test invalid fd case (should return EBADF) */ + errno = 0; + if (syscall(SYS_syncfs, -1) != -1 || errno != EBADF) { + FAIL("syncfs invalid fd"); + close(fd); + return; + } + + close(fd); + PASS(); +} + int main(int argc, char **argv) { printf("test-syscall-smoke: direct syscall smoke coverage\n\n"); @@ -905,6 +1050,8 @@ int main(int argc, char **argv) test_sysv_semaphore_ops(); test_urandom_byte_reads(); test_urandom_open_flags(); + test_sync_file_range(); + test_syncfs(); SUMMARY("test-syscall-smoke"); return fails > 0 ? 1 : 0;