From cccaf7f83f6dfd324a300b79f57d0f65efb82749 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Geyslan=20Greg=C3=B3rio?= Date: Mon, 27 Jan 2025 14:29:22 -0300 Subject: [PATCH] fix(ebpf): treat sched_process_exit corner cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sched_process_exit event may be triggered by a standard exit, such as a syscall, or by alternative kernel paths, making it unsafe to assume that it is always associated with a syscall exit. do_exit and do_exit_group, while typically invoked by the exit and exit_group syscalls, can also be reached through internal kernel mechanisms such as signal handling. A concrete example of this occurs when a syscall returns, enters signal handling, and subsequently calls do_exit after get_signal. Both get_signal and do_exit involve tracepoints. A real execution flow illustrating this scenario in the kernel is as follows: entry_SYSCALL_64 ├── do_syscall_64 ├── syscall_exit_to_user_mode ├── __syscall_exit_to_user_mode_work ├── exit_to_user_mode_prepare ├── exit_to_user_mode_loop ├── arch_do_signal_or_restart ├── get_signal (has signal_deliver tracepoint) ├── do_group_exit └── do_exit (has sched_process_exit tracepoint) --- pkg/ebpf/c/tracee.bpf.c | 18 ++++++++++++++++++ pkg/ebpf/c/vmlinux_missing.h | 3 ++- pkg/ebpf/events_pipeline.go | 3 +-- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/pkg/ebpf/c/tracee.bpf.c b/pkg/ebpf/c/tracee.bpf.c index 77407bf3285a..031447c5e2e4 100644 --- a/pkg/ebpf/c/tracee.bpf.c +++ b/pkg/ebpf/c/tracee.bpf.c @@ -1521,6 +1521,24 @@ int tracepoint__sched__sched_process_exit(struct bpf_raw_tracepoint_args *ctx) if (!init_program_data(&p, ctx, SCHED_PROCESS_EXIT)) return 0; + // The syscall number cannot be trusted in the following cases: + // + // 1. If the task was terminated due to a signal (PF_SIGNALED is set), the syscall + // context may be inconsistent. + // + // 2. If the task was not signaled: + // - A kernel thread (PF_KTHREAD is set) is not expected to have a valid syscall context, so + // the function init_program_data has already set its syscall number as NO_SYSCALL (-1). + // - If PF_KTHREAD is not set but the syscall value is negative, it may be due to + // an invalid or clobbered context. + // + // In any of these cases, we explicitly mark the syscall number as NO_SYSCALL (-1) to avoid + // misinterpretation. + int task_flags = get_task_flags(p.event->task); + if ((task_flags & PF_SIGNALED) || + (!(task_flags & PF_KTHREAD) && (p.event->context.syscall < 0))) + p.event->context.syscall = NO_SYSCALL; + // evaluate matched_policies before removing this pid from the maps evaluate_scope_filters(&p); diff --git a/pkg/ebpf/c/vmlinux_missing.h b/pkg/ebpf/c/vmlinux_missing.h index eb634720b3ff..8c1698485dff 100644 --- a/pkg/ebpf/c/vmlinux_missing.h +++ b/pkg/ebpf/c/vmlinux_missing.h @@ -48,7 +48,8 @@ #define ICMPV6_ECHO_REQUEST 128 -#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ +#define PF_SIGNALED 0x00000400 /* Killed by a signal */ +#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define TASK_COMM_LEN 16 diff --git a/pkg/ebpf/events_pipeline.go b/pkg/ebpf/events_pipeline.go index 9be900639bcd..f043b07aa82e 100644 --- a/pkg/ebpf/events_pipeline.go +++ b/pkg/ebpf/events_pipeline.go @@ -221,10 +221,9 @@ func (t *Tracee) decodeEvents(ctx context.Context, sourceChan chan []byte) (<-ch id := events.ID(eCtx.Syscall) syscallDef := events.Core.GetDefinitionByID(id) if syscallDef.NotValid() { - // This should never fail, as the translation used in eBPF relies on the same event definitions commStr := string(eCtx.Comm[:bytes.IndexByte(eCtx.Comm[:], 0)]) utsNameStr := string(eCtx.UtsName[:bytes.IndexByte(eCtx.UtsName[:], 0)]) - logger.Errorw( + logger.Debugw( fmt.Sprintf("Event %s with an invalid syscall id %d", evtName, id), "Comm", commStr, "UtsName", utsNameStr,