Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/syscall/abi.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
#define SYS_sync 81
#define SYS_fsync 82
#define SYS_fdatasync 83
#define SYS_sync_file_range 84
#define SYS_utimensat 88
#define SYS_exit 93
#define SYS_exit_group 94
Expand Down Expand Up @@ -165,6 +166,7 @@
#define SYS_madvise 233
#define SYS_wait4 260
#define SYS_prlimit64 261
#define SYS_syncfs 267
#define SYS_renameat2 276
#define SYS_getrandom 278
#define SYS_execveat 281
Expand Down
2 changes: 2 additions & 0 deletions src/syscall/dispatch.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ SYS_fremovexattr sc_fremovexattr 1
SYS_sync sc_sync 1
SYS_fsync sc_fsync 1
SYS_fdatasync sc_fdatasync 1
SYS_sync_file_range sc_sync_file_range 1
SYS_syncfs sc_syncfs 0
SYS_msync sc_msync 0
SYS_membarrier sc_membarrier 0

Expand Down
12 changes: 8 additions & 4 deletions src/syscall/fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1651,8 +1651,9 @@ int64_t sys_renameat2(guest_t *g,
if (sidecar_rc != SIDECAR_NOT_HANDLED)
return sidecar_rc;

if (path_translate_at(olddirfd, oldpath, PATH_TR_NONE, &old_tx) < 0 ||
path_translate_at(newdirfd, newpath, PATH_TR_CREATE, &new_tx) < 0)
if (path_translate_at(olddirfd, oldpath, PATH_TR_NOFOLLOW, &old_tx) < 0 ||
path_translate_at(newdirfd, newpath, PATH_TR_CREATE | PATH_TR_NOFOLLOW,
&new_tx) < 0)
return linux_errno();
if (old_tx.fuse_path || new_tx.fuse_path)
return -LINUX_ENOSYS;
Expand Down Expand Up @@ -1839,8 +1840,11 @@ int64_t sys_linkat(guest_t *g,
if (sidecar_rc != SIDECAR_NOT_HANDLED)
return sidecar_rc;

if (path_translate_at(olddirfd, oldpath, PATH_TR_NONE, &old_tx) < 0 ||
path_translate_at(newdirfd, newpath, PATH_TR_CREATE, &new_tx) < 0)
unsigned int old_flags =
(flags & LINUX_AT_SYMLINK_FOLLOW) ? PATH_TR_NONE : PATH_TR_NOFOLLOW;
if (path_translate_at(olddirfd, oldpath, old_flags, &old_tx) < 0 ||
path_translate_at(newdirfd, newpath, PATH_TR_CREATE | PATH_TR_NOFOLLOW,
&new_tx) < 0)
return linux_errno();
if (old_tx.fuse_path || new_tx.fuse_path)
return -LINUX_ENOSYS;
Expand Down
93 changes: 93 additions & 0 deletions src/syscall/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -1236,6 +1236,99 @@ static int64_t sc_fsync_common(guest_t *g,
#define sc_fsync sc_fsync_common
#define sc_fdatasync sc_fsync_common

static int64_t sc_sync_file_range(guest_t *g,
uint64_t x0,
uint64_t x1,
uint64_t x2,
uint64_t x3,
uint64_t x4,
uint64_t x5,
bool verbose)
{
(void) g;
(void) x4;
(void) x5;
(void) verbose;

int fd = (int) x0;
int64_t offset = (int64_t) x1;
int64_t nbytes = (int64_t) x2;
unsigned int flags = (unsigned int) x3;

if (offset < 0 || nbytes < 0 || INT64_MAX - offset < nbytes)
return -LINUX_EINVAL;

host_fd_ref_t host_ref;
int64_t err = host_fd_ref_open_io(fd, &host_ref);

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mainline Linux rejects sync_file_range() on a descriptor that is not a
regular file, block device, or directory, returning -ESPIPE.
The current shim does not
classify the fd, so calling it on a pipe / socket / fifo / char device. Linux returns -ESPIPE in both cases.

Suggested fix (after host_fd_ref_open_io succeeds):

struct stat st;
if (fstat(host_ref.fd, &st) == 0 &&
    !S_ISREG(st.st_mode) && !S_ISBLK(st.st_mode) && !S_ISDIR(st.st_mode)) {
    host_fd_ref_close(&host_ref);
    return -LINUX_ESPIPE;
}

if (err < 0)
return err;

struct stat st;
if (fstat(host_ref.fd, &st) == 0 && !S_ISREG(st.st_mode) &&
!S_ISBLK(st.st_mode) && !S_ISDIR(st.st_mode)) {
host_fd_ref_close(&host_ref);
return -LINUX_ESPIPE;
}

/* Validate flags: only bits 1, 2, 4 are valid */
if (flags & ~7u) {

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add offset, nbytes invalid check

if (offset < 0 || nbytes < 0 || offset + nbytes < offset)
    return -LINUX_EINVAL;

host_fd_ref_close(&host_ref);
return -LINUX_EINVAL;
}

/*
* If the flags only ask to initiate asynchronous write-out without waiting
* (i.e. SYNC_FILE_RANGE_WRITE (2)), we return 0 immediately to avoid
* blocking. The host OS's background page-out daemon will handle flushing
* dirty buffers. WAIT_BEFORE (1) and WAIT_AFTER (4) require blocking until
* writes complete.
*
* Note on macOS/Darwin: macOS does not provide a system call equivalent to
* Linux's sync_file_range(2) that can synchronize file data without writing
* back metadata. Therefore, we use fsync() to accomplish the
* synchronization, which will also write back metadata, unlike native Linux
* sync_file_range(2).
*/
int64_t ret = 0;
if (flags & (1u | 4u)) {
ret = (fsync(host_ref.fd) < 0) ? linux_errno() : 0;

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sync_file_range(2) on native Linux don't write back metadata. But MacOS don't provide syscall to accomplish such feature. fsync() will write back metadata.
So please add comment for mentioning different behavior comparing to Linux sync_file_range(2)

}

host_fd_ref_close(&host_ref);
return ret;
}

static int64_t sc_syncfs(guest_t *g,
uint64_t x0,
uint64_t x1,
uint64_t x2,
uint64_t x3,
uint64_t x4,
uint64_t x5,
bool verbose)
{
(void) g;
(void) x1;
(void) x2;
(void) x3;
(void) x4;
(void) x5;
(void) verbose;

int fd = (int) x0;
host_fd_ref_t host_ref;
int64_t err = host_fd_ref_open_io(fd, &host_ref);
if (err < 0)
return err;
host_fd_ref_close(&host_ref);

/* macOS does not have syncfs. We fall back to sync() which synchronizes
* all mounted filesystems, satisfying the filesystem-level consistency
* guarantee of syncfs. */
sync();
return 0;
}

/* getresuid/getresgid: write emulated real/effective/saved IDs to guest ptrs */
static int64_t sc_getresid_write(guest_t *g,
uint64_t x0,
Expand Down
147 changes: 147 additions & 0 deletions tests/test-syscall-smoke.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,14 @@
#define SYS_set_tid_address 96
#endif

#ifndef SYS_sync_file_range
#define SYS_sync_file_range 84
#endif

#ifndef SYS_syncfs
#define SYS_syncfs 267
#endif

int passes = 0, fails = 0;
extern char **environ;

Expand Down Expand Up @@ -884,6 +892,143 @@ static void test_urandom_open_flags(void)
PASS();
}

static void test_sync_file_range(void)
{
TEST("sync_file_range");
char path[] = "/tmp/elfuse-sync-file-range-XXXXXX";
int fd = mkstemp(path);
if (fd < 0) {
FAIL("mkstemp");
return;
}
unlink(path);

/* Write some data first */
const char *msg = "hello sync_file_range";
if (write(fd, msg, strlen(msg)) != (ssize_t) strlen(msg)) {
FAIL("write");
close(fd);
return;
}

/* 1. Test basic success case with valid flags (SYNC_FILE_RANGE_WRITE, etc.)
*/
/* Note: SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
* SYNC_FILE_RANGE_WAIT_AFTER is 7 */
if (syscall(SYS_sync_file_range, fd, (off64_t) 0, (off64_t) 0, 7) != 0) {
FAIL("sync_file_range basic");
close(fd);
return;
}

/* 2. Test invalid flags (only bits 1, 2, 4 are valid) */
errno = 0;
if (syscall(SYS_sync_file_range, fd, (off64_t) 0, (off64_t) 0, 8) != -1 ||
errno != EINVAL) {
FAIL("sync_file_range invalid flags");
close(fd);
return;
}

/* 3. Test invalid offset (offset < 0) */
errno = 0;
if (syscall(SYS_sync_file_range, fd, (off64_t) -1, (off64_t) 0, 7) != -1 ||
errno != EINVAL) {
FAIL("sync_file_range negative offset");
close(fd);
return;
}

/* 4. Test invalid nbytes (nbytes < 0) */
errno = 0;
if (syscall(SYS_sync_file_range, fd, (off64_t) 0, (off64_t) -1, 7) != -1 ||
errno != EINVAL) {
FAIL("sync_file_range negative nbytes");
close(fd);
return;
}

/* 5. Test offset + nbytes overflow */
errno = 0;
off64_t huge_offset = 0x7fffffffffffffffLL;
off64_t huge_nbytes = 1;
if (syscall(SYS_sync_file_range, fd, huge_offset, huge_nbytes, 7) != -1 ||
errno != EINVAL) {
FAIL("sync_file_range offset+nbytes overflow");
close(fd);
return;
}

/* 6. SYNC_FILE_RANGE_WRITE only (async hint) returns 0 without blocking.
* Covers the deliberate-divergence branch. */
if (syscall(SYS_sync_file_range, fd, (off64_t) 0, (off64_t) 0, 2) != 0) {
FAIL("sync_file_range write-only");
close(fd);
return;
}

/* 7. Bad fd → EBADF. */
errno = 0;
if (syscall(SYS_sync_file_range, -1, (off64_t) 0, (off64_t) 0, 7) != -1 ||
errno != EBADF) {
FAIL("sync_file_range bad fd");
close(fd);
return;
}

/* 8. Unsupported file type (pipe) → ESPIPE. */
int pipefds[2];
if (pipe(pipefds) == 0) {
errno = 0;
if (syscall(SYS_sync_file_range, pipefds[0], (off64_t) 0, (off64_t) 0,
7) != -1 ||
errno != ESPIPE) {
FAIL("sync_file_range pipe ESPIPE");
close(pipefds[0]);
close(pipefds[1]);
close(fd);
return;
}
close(pipefds[0]);
close(pipefds[1]);
} else {
FAIL("pipe creation failed");
}

close(fd);
PASS();
}

static void test_syncfs(void)
{
TEST("syncfs");
char path[] = "/tmp/elfuse-syncfs-XXXXXX";
int fd = mkstemp(path);
if (fd < 0) {
FAIL("mkstemp");
return;
}
unlink(path);

/* 1. Test basic success case */
if (syscall(SYS_syncfs, fd) != 0) {
FAIL("syncfs basic");
close(fd);
return;
}

/* 2. Test invalid fd case (should return EBADF) */
errno = 0;
if (syscall(SYS_syncfs, -1) != -1 || errno != EBADF) {
FAIL("syncfs invalid fd");
close(fd);
return;
}

close(fd);
PASS();
}

int main(int argc, char **argv)
{
printf("test-syscall-smoke: direct syscall smoke coverage\n\n");
Expand All @@ -905,6 +1050,8 @@ int main(int argc, char **argv)
test_sysv_semaphore_ops();
test_urandom_byte_reads();
test_urandom_open_flags();
test_sync_file_range();
test_syncfs();

SUMMARY("test-syscall-smoke");
return fails > 0 ? 1 : 0;
Expand Down
Loading