diff --git a/.github/scripts/matrix.py b/.github/scripts/matrix.py index dd25c290..b88d2d48 100644 --- a/.github/scripts/matrix.py +++ b/.github/scripts/matrix.py @@ -185,8 +185,8 @@ def tests(self) -> Dict[str, Any]: if self.llvm_version >= 18: tests_list.append("test_progs_cpuv4") - # if self.arch in [Arch.X86_64, Arch.AARCH64] and not self.is_netdev: - # tests_list.append("sched_ext") + if self.arch in [Arch.X86_64, Arch.AARCH64] and not self.is_netdev: + tests_list.append("sched_ext") # Don't run GCC BPF runner, because too many tests are failing # See: https://lore.kernel.org/bpf/87bjw6qpje.fsf@oracle.com/ diff --git a/ci/diffs/20260421-selftests-sched_ext-Fix-flaky-numa-test-by-removing-.patch b/ci/diffs/20260421-selftests-sched_ext-Fix-flaky-numa-test-by-removing-.patch new file mode 100644 index 00000000..b515022c --- /dev/null +++ b/ci/diffs/20260421-selftests-sched_ext-Fix-flaky-numa-test-by-removing-.patch @@ -0,0 +1,64 @@ +From bded7ac722ec1b225eda0e023aee8fa985793be4 Mon Sep 17 00:00:00 2001 +From: Ihor Solodrai +Date: Tue, 21 Apr 2026 09:20:41 -0700 +Subject: [PATCH 1/2] selftests/sched_ext: Fix flaky numa test by removing racy + idle check + +The numa test intermittently fails in CI with: + + scx_bpf_error("CPU 2 should be marked as busy") + +The is_cpu_idle() check in numa_select_cpu is inherently racy: between +scx_bpf_pick_idle_cpu_node() atomically clearing a CPU's idle bit and +the subsequent is_cpu_idle() re-check, the CPU can legitimately +transition back to idle via __scx_update_idle() -> update_builtin_idle(). + +This is a classic TOCTOU race that cannot be fixed without holding a lock +across both operations, which is not possible from BPF context. + +Remove the is_cpu_idle() helper and its call. The remaining NUMA node +membership check (scx_bpf_cpu_node(cpu) != node) is not racy since a +CPU's NUMA node is a static hardware property. + +Fixes: 3034f3b053b5 ("selftests/sched_ext: Add NUMA-aware test") +Signed-off-by: Ihor Solodrai +--- + tools/testing/selftests/sched_ext/numa.bpf.c | 15 --------------- + 1 file changed, 15 deletions(-) + +diff --git a/tools/testing/selftests/sched_ext/numa.bpf.c b/tools/testing/selftests/sched_ext/numa.bpf.c +index 78cc49a7f9a6..89b29acef022 100644 +--- a/tools/testing/selftests/sched_ext/numa.bpf.c ++++ b/tools/testing/selftests/sched_ext/numa.bpf.c +@@ -19,18 +19,6 @@ UEI_DEFINE(uei); + + const volatile unsigned int __COMPAT_SCX_PICK_IDLE_IN_NODE; + +-static bool is_cpu_idle(s32 cpu, int node) +-{ +- const struct cpumask *idle_cpumask; +- bool idle; +- +- idle_cpumask = __COMPAT_scx_bpf_get_idle_cpumask_node(node); +- idle = bpf_cpumask_test_cpu(cpu, idle_cpumask); +- scx_bpf_put_cpumask(idle_cpumask); +- +- return idle; +-} +- + s32 BPF_STRUCT_OPS(numa_select_cpu, + struct task_struct *p, s32 prev_cpu, u64 wake_flags) + { +@@ -48,9 +36,6 @@ s32 BPF_STRUCT_OPS(numa_select_cpu, + cpu = __COMPAT_scx_bpf_pick_any_cpu_node(p->cpus_ptr, node, + __COMPAT_SCX_PICK_IDLE_IN_NODE); + +- if (is_cpu_idle(cpu, node)) +- scx_bpf_error("CPU %d should be marked as busy", cpu); +- + if (__COMPAT_scx_bpf_cpu_node(cpu) != node) + scx_bpf_error("CPU %d should be in node %d", cpu, node); + +-- +2.53.0 + diff --git a/ci/diffs/20260421-selftests-sched_ext-Fix-rt_stall-flaky-measurement-w.patch b/ci/diffs/20260421-selftests-sched_ext-Fix-rt_stall-flaky-measurement-w.patch new file mode 100644 index 00000000..5dd49bd4 --- /dev/null +++ b/ci/diffs/20260421-selftests-sched_ext-Fix-rt_stall-flaky-measurement-w.patch @@ -0,0 +1,87 @@ +From 0f2eb195e797359c9c6027a27adda606c226490d Mon Sep 17 00:00:00 2001 +From: Ihor Solodrai +Date: Tue, 21 Apr 2026 09:21:25 -0700 +Subject: [PATCH 2/2] selftests/sched_ext: Fix rt_stall flaky measurement + window +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The rt_stall test fails intermittently in CI because it measures total +CPU time since fork rather than the delta during the sleep(RUN_TIME) +measurement window. + +Children start busy-looping immediately after signal_ready(), but the +parent still needs to process both pipe reads before calling sleep(). +During this gap, both children accumulate CPU time — with the RT child +dominating. This inflates the RT denominator so the FAIR/EXT ratio drops +below the 4% threshold. This was observed in CI with the RT task +accumulating 5.69s of CPU time in a 5-second window. + +Fix by taking before/after snapshots of each child's CPU time around the +sleep(RUN_TIME) window and computing deltas. This eliminates the +pre-measurement bias regardless of how long the parent takes between +wait_ready() and sleep(). + +Fixes: 0b82cc331d2e ("selftests/sched_ext: Fix rt_stall flaky failure") +Signed-off-by: Ihor Solodrai +--- + tools/testing/selftests/sched_ext/rt_stall.c | 22 +++++++++++++++++--- + 1 file changed, 19 insertions(+), 3 deletions(-) + +diff --git a/tools/testing/selftests/sched_ext/rt_stall.c b/tools/testing/selftests/sched_ext/rt_stall.c +index a5041fc2e44f..ad14712e0127 100644 +--- a/tools/testing/selftests/sched_ext/rt_stall.c ++++ b/tools/testing/selftests/sched_ext/rt_stall.c +@@ -148,7 +148,7 @@ static bool sched_stress_test(bool is_ext) + const float expected_min_ratio = 0.04; /* 4% */ + const char *class_str = is_ext ? "EXT" : "FAIR"; + +- float ext_runtime, rt_runtime, actual_ratio; ++ float ext_runtime, rt_runtime, ext_before, rt_before, actual_ratio; + int ext_pid, rt_pid; + int ext_ready[2], rt_ready[2]; + +@@ -202,21 +202,37 @@ static bool sched_stress_test(bool is_ext) + wait_ready(ext_ready[0]); + wait_ready(rt_ready[0]); + ++ /* ++ * Snapshot CPU times before the measurement window. Children ++ * start busy-looping right after signal_ready(), so they may ++ * accumulate CPU time while the parent is still processing ++ * pipe reads. Measuring deltas eliminates this bias. ++ */ ++ ext_before = get_process_runtime(ext_pid); ++ if (ext_before == -1) ++ ksft_exit_fail_msg("Error getting pre-sleep runtime for %s task (PID %d)\n", ++ class_str, ext_pid); ++ rt_before = get_process_runtime(rt_pid); ++ if (rt_before == -1) ++ ksft_exit_fail_msg("Error getting pre-sleep runtime for RT task (PID %d)\n", ++ rt_pid); ++ + /* Let the processes run for the specified time */ + sleep(RUN_TIME); + +- /* Get runtime for the EXT task */ ++ /* Get runtime deltas for the measurement window */ + ext_runtime = get_process_runtime(ext_pid); + if (ext_runtime == -1) + ksft_exit_fail_msg("Error getting runtime for %s task (PID %d)\n", + class_str, ext_pid); ++ ext_runtime -= ext_before; + ksft_print_msg("Runtime of %s task (PID %d) is %f seconds\n", + class_str, ext_pid, ext_runtime); + +- /* Get runtime for the RT task */ + rt_runtime = get_process_runtime(rt_pid); + if (rt_runtime == -1) + ksft_exit_fail_msg("Error getting runtime for RT task (PID %d)\n", rt_pid); ++ rt_runtime -= rt_before; + ksft_print_msg("Runtime of RT task (PID %d) is %f seconds\n", rt_pid, rt_runtime); + + /* Kill the processes */ +-- +2.53.0 +