From 897447e05a95602132b7b199f9befa38ae084786 Mon Sep 17 00:00:00 2001 From: Roberto Scolaro Date: Mon, 2 Oct 2023 08:46:02 +0200 Subject: [PATCH] fix(driver): null ptr deref on kernel >=6.5 Signed-off-by: Roberto Scolaro --- driver/bpf/fillers.h | 8 +- .../modern_bpf/definitions/struct_flavors.h | 5 + .../helpers/extract/extract_from_kernel.h | 31 ++- .../attached/events/sched_process_exec.bpf.c | 2 +- .../syscall_dispatched_events/execve.bpf.c | 2 +- .../syscall_dispatched_events/execveat.bpf.c | 2 +- driver/ppm_fillers.c | 14 +- .../syscall_exit_suite/execve_x.cpp | 197 ++++++++++++++++++ 8 files changed, 248 insertions(+), 13 deletions(-) diff --git a/driver/bpf/fillers.h b/driver/bpf/fillers.h index 4fdfcfae901..0e91be7e6bc 100644 --- a/driver/bpf/fillers.h +++ b/driver/bpf/fillers.h @@ -2234,12 +2234,16 @@ static __always_inline bool get_exe_upper_layer(struct dentry *dentry, struct su return true; } - struct ovl_entry *oe = (struct ovl_entry*)_READ(dentry->d_fsdata); #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) + struct ovl_entry *oe = (struct ovl_entry*)_READ(dentry->d_fsdata); unsigned long has_upper = (unsigned long)_READ(oe->has_upper); -#else +#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 5, 0) + struct ovl_entry *oe = (struct ovl_entry*)_READ(dentry->d_fsdata); unsigned long flags = _READ(oe->flags); unsigned long has_upper = (flags & (1U << (OVL_E_UPPER_ALIAS))); +#else + unsigned long flags = (unsigned long)_READ(dentry->d_fsdata); + unsigned long has_upper = (flags & (1U << (OVL_E_UPPER_ALIAS))); #endif if(has_upper) diff --git a/driver/modern_bpf/definitions/struct_flavors.h b/driver/modern_bpf/definitions/struct_flavors.h index db512258d53..ee6dc78ffe1 100644 --- a/driver/modern_bpf/definitions/struct_flavors.h +++ b/driver/modern_bpf/definitions/struct_flavors.h @@ -48,6 +48,11 @@ struct inode___v6_6 { struct timespec64 __i_ctime; }; +struct ovl_entry___before_v6_5 +{ + long unsigned int flags; +}; + #ifndef BPF_NO_PRESERVE_ACCESS_INDEX #pragma clang attribute pop #endif diff --git a/driver/modern_bpf/helpers/extract/extract_from_kernel.h b/driver/modern_bpf/helpers/extract/extract_from_kernel.h index 62c143e254d..5e68f3d0d77 100644 --- a/driver/modern_bpf/helpers/extract/extract_from_kernel.h +++ b/driver/modern_bpf/helpers/extract/extract_from_kernel.h @@ -799,7 +799,7 @@ static __always_inline void extract__egid(struct task_struct *task, u32 *egid) // EXECVE FLAGS EXTRACTION //////////////////////// -static __always_inline bool extract__exe_upper_layer(struct inode *inode) +static __always_inline bool extract__exe_upper_layer(struct inode *inode, struct file *exe_file) { unsigned long sb_magic = BPF_CORE_READ(inode, i_sb, s_magic); @@ -815,10 +815,37 @@ static __always_inline bool extract__exe_upper_layer(struct inode *inode) bpf_probe_read_kernel(&upper_dentry, sizeof(upper_dentry), vfs_inode + inode_size); - if(upper_dentry) + if(!upper_dentry) + { + return false; + } + + struct dentry *dentry = (struct dentry *)BPF_CORE_READ(exe_file, f_path.dentry); + + unsigned int d_flags = BPF_CORE_READ(dentry, d_flags); + // DCACHE_DISCONNECTED = 0x20 + bool disconnected = (d_flags & 0x20); + if(disconnected) { return true; } + + // In kernels >=6.5 d_fsdata represents an ovl_entry_flag. + unsigned long flags = (unsigned long)BPF_CORE_READ(dentry, d_fsdata); + if(bpf_core_field_exists(((struct ovl_entry___before_v6_5*)0)->flags)) + { + // kernel <6.5 + struct ovl_entry___before_v6_5 *oe = (struct ovl_entry___before_v6_5*)BPF_CORE_READ(dentry, d_fsdata); + flags = (unsigned long)BPF_CORE_READ(oe, flags); + } + + // OVL_E_UPPER_ALIAS = 0 + unsigned long has_upper = (flags & (1U << (0))); + if(has_upper) + { + return true; + } + } return false; diff --git a/driver/modern_bpf/programs/attached/events/sched_process_exec.bpf.c b/driver/modern_bpf/programs/attached/events/sched_process_exec.bpf.c index 0bcbf5dbe30..9e0f707fdbb 100644 --- a/driver/modern_bpf/programs/attached/events/sched_process_exec.bpf.c +++ b/driver/modern_bpf/programs/attached/events/sched_process_exec.bpf.c @@ -184,7 +184,7 @@ int BPF_PROG(t1_sched_p_exec, { flags |= PPM_EXE_WRITABLE; } - if(extract__exe_upper_layer(exe_inode)) + if(extract__exe_upper_layer(exe_inode, exe_file)) { flags |= PPM_EXE_UPPER_LAYER; } diff --git a/driver/modern_bpf/programs/tail_called/events/syscall_dispatched_events/execve.bpf.c b/driver/modern_bpf/programs/tail_called/events/syscall_dispatched_events/execve.bpf.c index f7070e9c701..304bd090e09 100644 --- a/driver/modern_bpf/programs/tail_called/events/syscall_dispatched_events/execve.bpf.c +++ b/driver/modern_bpf/programs/tail_called/events/syscall_dispatched_events/execve.bpf.c @@ -248,7 +248,7 @@ int BPF_PROG(t1_execve_x, { flags |= PPM_EXE_WRITABLE; } - if(extract__exe_upper_layer(exe_inode)) + if(extract__exe_upper_layer(exe_inode, exe_file)) { flags |= PPM_EXE_UPPER_LAYER; } diff --git a/driver/modern_bpf/programs/tail_called/events/syscall_dispatched_events/execveat.bpf.c b/driver/modern_bpf/programs/tail_called/events/syscall_dispatched_events/execveat.bpf.c index f65a4cbd757..eec3b0c75a7 100644 --- a/driver/modern_bpf/programs/tail_called/events/syscall_dispatched_events/execveat.bpf.c +++ b/driver/modern_bpf/programs/tail_called/events/syscall_dispatched_events/execveat.bpf.c @@ -265,7 +265,7 @@ int BPF_PROG(t1_execveat_x, { flags |= PPM_EXE_WRITABLE; } - if(extract__exe_upper_layer(exe_inode)) + if(extract__exe_upper_layer(exe_inode, exe_file)) { flags |= PPM_EXE_UPPER_LAYER; } diff --git a/driver/ppm_fillers.c b/driver/ppm_fillers.c index 04ceaa76ab2..a9044263a76 100644 --- a/driver/ppm_fillers.c +++ b/driver/ppm_fillers.c @@ -934,9 +934,9 @@ bool ppm_is_upper_layer(struct file *exe_file){ if(sb) { sb_magic = sb->s_magic; - if(sb_magic == PPM_OVERLAYFS_SUPER_MAGIC) + struct ovl_entry *oe = (struct ovl_entry*)(exe_file->f_path.dentry->d_fsdata); + if(sb_magic == PPM_OVERLAYFS_SUPER_MAGIC && oe) { - struct ovl_entry *oe = (struct ovl_entry*)(exe_file->f_path.dentry->d_fsdata); unsigned long has_upper = 0; #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) if(oe->__upperdentry) @@ -944,22 +944,24 @@ bool ppm_is_upper_layer(struct file *exe_file){ return true; } #else - struct dentry **upper_dentry = NULL; + struct dentry *upper_dentry = NULL; unsigned int d_flags = exe_file->f_path.dentry->d_flags; bool disconnected = (d_flags & DCACHE_DISCONNECTED); // Pointer arithmetics due to unexported ovl_inode struct // warning: this works if and only if the dentry pointer // is placed right after the inode struct - upper_dentry = (struct dentry **)((char *)exe_file->f_path.dentry->d_inode + sizeof(struct inode)); + upper_dentry = (struct dentry *)((char *)exe_file->f_path.dentry->d_inode + sizeof(struct inode)); #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) has_upper = oe->has_upper; -#else +#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 5, 0) has_upper = test_bit(OVL_E_UPPER_ALIAS, &(oe->flags)); +#else + has_upper = test_bit(OVL_E_UPPER_ALIAS, (unsigned long*)&oe); #endif - if(*upper_dentry && (has_upper || disconnected)) + if(upper_dentry && (has_upper || disconnected)) { return true; } diff --git a/test/drivers/test_suites/syscall_exit_suite/execve_x.cpp b/test/drivers/test_suites/syscall_exit_suite/execve_x.cpp index 985c00f4bf5..72821fa56ae 100644 --- a/test/drivers/test_suites/syscall_exit_suite/execve_x.cpp +++ b/test/drivers/test_suites/syscall_exit_suite/execve_x.cpp @@ -278,6 +278,203 @@ TEST(SyscallExit, execveX_success) evt_test->assert_num_params_pushed(28); } +TEST(SyscallExit, execveX_not_upperlayer) +{ + auto evt_test = get_syscall_event_test(__NR_execve, EXIT_EVENT); + + evt_test->enable_capture(); + + /*=============================== TRIGGER SYSCALL ===========================*/ + + const char lowerdir[] = "/bin"; + const char upperdir[] = "/tmp/upper"; + const char target[] = "/tmp/merged"; + char tmp_template[] = "/tmp/tmpdir.XXXXXX"; + char *mntopts; + + /* Create a temporary directory for the work layer */ + char *workdir = mkdtemp(tmp_template); + + /* Create the overlay mount target directory */ + mkdir(upperdir, 0777); + mkdir(target, 0777); + + /* Construct the mount options string */ + if(asprintf(&mntopts, "lowerdir=%s,upperdir=%s,workdir=%s", lowerdir, upperdir, workdir) == -1){ + FAIL() << "Cannot construct mount options string"; + }; + + /* Mount the overlayfs */ + if (mount("overlay", target, "overlay", MS_MGC_VAL, mntopts) != 0) + { + FAIL() << "Cannot mount overlay." << std::endl; + } + + /* Copy /bin/true to /tmp/merged/uppertrue in the overlay file system */ + char true_path[1024], upper_exe_path[1024]; + sprintf(true_path, "%s/true", lowerdir); + sprintf(upper_exe_path, "%s/uppertrue", target); + + int true_fd = open(true_path, O_RDONLY); + if (true_fd == -1) + { + FAIL() << "Cannot open /bin/true." << std::endl; + } + + int upper_exe_fd = open(upper_exe_path, O_WRONLY|O_CREAT, 0777); + if (upper_exe_fd == -1) + { + FAIL() << "Cannot open /tmp/merged/uppertrue." << std::endl; + } + + char buf[1024]; + ssize_t bytes_read; + while ((bytes_read = read(true_fd, buf, sizeof(buf))) > 0) + { + if (write(upper_exe_fd, buf, bytes_read) != bytes_read) + { + FAIL() << "Cannot write /tmp/merged/uppertrue." << std::endl; + } + } + + if (bytes_read == -1) + { + FAIL() << "Error copying /bin/true" << std::endl; + } + + if (close(true_fd) == -1) + { + FAIL() << "Error closing /bin/true" << std::endl; + } + + if (close(upper_exe_fd) == -1) + { + FAIL() << "Error closing /tmp/merged/uppertrue" << std::endl; + } + + /* Prepare the execve args */ + const char *pathname = true_path; + const char *comm = "true"; + const char *argv[] = {true_path, "randomarg", NULL}; + const char *envp[] = {"IN_TEST=yes", "3_ARGUMENT=yes", "2_ARGUMENT=no", NULL}; + + /* We need to use `SIGCHLD` otherwise the parent won't receive any signal + * when the child terminates. + */ + struct clone_args cl_args = {0}; + cl_args.exit_signal = SIGCHLD; + pid_t ret_pid = syscall(__NR_clone3, &cl_args, sizeof(cl_args)); + + /* + * Call the `execve` + */ + if(ret_pid == 0) + { + syscall(__NR_execve, pathname, argv, envp); + exit(EXIT_FAILURE); + } + + assert_syscall_state(SYSCALL_SUCCESS, "clone3", ret_pid, NOT_EQUAL, -1); + + /* Catch the child before doing anything else. */ + int status = 0; + int options = 0; + assert_syscall_state(SYSCALL_SUCCESS, "wait4", syscall(__NR_wait4, ret_pid, &status, options, NULL), NOT_EQUAL, -1); + + if(__WEXITSTATUS(status) == EXIT_FAILURE || __WIFSIGNALED(status) != 0) + { + FAIL() << "The child execve failed." << std::endl; + } + + /*=============================== TRIGGER SYSCALL ===========================*/ + + evt_test->disable_capture(); + + /* Unmount the overlay file system */ + if (umount(target)) + { + FAIL() << "Cannot unmount target dir." << std::endl; + } + + /* Remove the upper and work directories */ + rmdir(upperdir); + rmdir(workdir); + rmdir(target); + + /* We search for a child event. */ + evt_test->assert_event_presence(ret_pid); + + if(HasFatalFailure()) + { + return; + } + + evt_test->parse_event(); + + evt_test->assert_header(); + + /*=============================== ASSERT PARAMETERS ===========================*/ + + /* Please note here we cannot assert all the params, we check only the possible ones. */ + + /* Parameter 1: res (type: PT_ERRNO)*/ + evt_test->assert_numeric_param(1, (int64_t)0); + + /* Parameter 2: exe (type: PT_CHARBUF) */ + evt_test->assert_charbuf_param(2, pathname); + + /* Parameter 3: args (type: PT_CHARBUFARRAY) */ + /* Starting from `1` because the first is `exe`. */ + evt_test->assert_charbuf_array_param(3, &argv[1]); + + /* Parameter 4: tid (type: PT_PID) */ + evt_test->assert_numeric_param(4, (int64_t)ret_pid); + + /* Parameter 5: pid (type: PT_PID) */ + /* We are the main thread of the process so it's equal to `tid`. */ + evt_test->assert_numeric_param(5, (int64_t)ret_pid); + + /* Parameter 6: ptid (type: PT_PID) */ + evt_test->assert_numeric_param(6, (int64_t)::getpid()); + + /* Parameter 7: cwd (type: PT_CHARBUF) */ + /* leave the current working directory empty like in the old probe. */ + evt_test->assert_empty_param(7); + + /* Parameter 14: comm (type: PT_CHARBUF) */ + evt_test->assert_charbuf_param(14, comm); + + /* Parameter 15: cgroups (type: PT_CHARBUFARRAY) */ + evt_test->assert_cgroup_param(15); + + /* Parameter 16: env (type: PT_CHARBUFARRAY) */ + evt_test->assert_charbuf_array_param(16, &envp[0]); + + /* PPM_EXE_WRITABLE is set when the user that executed a process can also write to the executable + * file that is used to spawn it or is its owner or otherwise capable. + */ + evt_test->assert_numeric_param(20, (uint32_t)PPM_EXE_WRITABLE, EQUAL); + + /* Parameter 24: exe_file ino (type: PT_UINT64) */ + evt_test->assert_numeric_param(24, (uint64_t)1, GREATER_EQUAL); + + /* Parameter 25: exe_file ctime (last status change time, epoch value in nanoseconds) (type: PT_ABSTIME) */ + evt_test->assert_numeric_param(25, (uint64_t)1000000000000000000, GREATER_EQUAL); + + /* Parameter 26: exe_file mtime (last modifitrueion time, epoch value in nanoseconds) (type: PT_ABSTIME) */ + evt_test->assert_numeric_param(26, (uint64_t)1000000000000000000, GREATER_EQUAL); + + /* Parameter 27: euid (type: PT_UID) */ + evt_test->assert_numeric_param(27, (uint32_t)geteuid(), EQUAL); + + /* Parameter 28: trusted_exepath (type: PT_FSPATH) */ + evt_test->assert_charbuf_param(28, "/usr/bin/true"); + + /*=============================== ASSERT PARAMETERS ===========================*/ + + evt_test->assert_num_params_pushed(28); +} + TEST(SyscallExit, execveX_upperlayer_success) { auto evt_test = get_syscall_event_test(__NR_execve, EXIT_EVENT);