Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

new(scap,pman): add new per-CPU driver metrics #1998

Merged
merged 2 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions test/libscap/test_suites/engines/bpf/bpf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,60 @@ TEST(bpf, double_scap_stats_call)
scap_close(h);
}

TEST(bpf, metrics_v2_check_per_CPU_stats)
{
char error_buffer[FILENAME_MAX] = {0};
int ret = 0;
scap_t* h = open_bpf_engine(error_buffer, &ret, 4 * 4096, LIBSCAP_TEST_BPF_PROBE_PATH);
ASSERT_FALSE(!h || ret != SCAP_SUCCESS) << "unable to open bpf engine: " << error_buffer << std::endl;

ssize_t num_possible_CPUs = num_possible_cpus();

// We want to check our CPUs counters
uint32_t flags = METRICS_V2_KERNEL_COUNTERS;
uint32_t nstats = 0;
int32_t rc = 0;
const metrics_v2* stats_v2 = scap_get_stats_v2(h, flags, &nstats, &rc);
ASSERT_EQ(rc, SCAP_SUCCESS);
ASSERT_TRUE(stats_v2);
ASSERT_GT(nstats, 0);

uint32_t i = 0;
ssize_t found = 0;
char expected_name[METRIC_NAME_MAX] = "";
snprintf(expected_name, METRIC_NAME_MAX, N_EVENTS_PER_CPU_PREFIX"%ld", found);

while(i < nstats)
{
// `sizeof(N_EVENTS_PER_CPU_PREFIX)-1` because we need to exclude the `\0`
if(strncmp(stats_v2[i].name, N_EVENTS_PER_CPU_PREFIX, sizeof(N_EVENTS_PER_CPU_PREFIX)-1) == 0)
{
i++;
// The next metric should be the number of drops
snprintf(expected_name, METRIC_NAME_MAX, N_DROPS_PER_CPU_PREFIX"%ld", found);
if(strncmp(stats_v2[i].name, N_DROPS_PER_CPU_PREFIX, sizeof(N_DROPS_PER_CPU_PREFIX)-1) == 0)
{
i++;
found++;
}
else
{
FAIL() << "Missing CPU drops for CPU " << found;
}
}
else
{
i++;
}
}

// This test could fail in case of rare race conditions in which the number of available CPUs changes
// between the scap_open and the `num_possible_cpus` function. In CI we shouldn't have hot plugs so probably we
// can live with this.
ASSERT_EQ(num_possible_CPUs, found) << "We didn't find the stats for all the CPUs";
scap_close(h);
}

TEST(bpf, metrics_v2_check_results)
{
char error_buffer[SCAP_LASTERR_SIZE] = {0};
Expand Down
54 changes: 54 additions & 0 deletions test/libscap/test_suites/engines/kmod/kmod.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,60 @@ TEST(kmod, double_scap_stats_call)
scap_close(h);
}

TEST(kmod, metrics_v2_check_per_CPU_stats)
{
char error_buffer[FILENAME_MAX] = {0};
int ret = 0;
scap_t* h = open_kmod_engine(error_buffer, &ret, 4 * 4096, LIBSCAP_TEST_KERNEL_MODULE_PATH);
ASSERT_FALSE(!h || ret != SCAP_SUCCESS) << "unable to open kmod engine: " << error_buffer << std::endl;

ssize_t num_online_CPUs = sysconf(_SC_NPROCESSORS_ONLN);

// We want to check our CPUs counters
uint32_t flags = METRICS_V2_KERNEL_COUNTERS;
uint32_t nstats = 0;
int32_t rc = 0;
const metrics_v2* stats_v2 = scap_get_stats_v2(h, flags, &nstats, &rc);
ASSERT_EQ(rc, SCAP_SUCCESS);
ASSERT_TRUE(stats_v2);
ASSERT_GT(nstats, 0);

uint32_t i = 0;
ssize_t found = 0;
char expected_name[METRIC_NAME_MAX] = "";
snprintf(expected_name, METRIC_NAME_MAX, N_EVENTS_PER_DEVICE_PREFIX"%ld", found);

while(i < nstats)
{
// `sizeof(N_EVENTS_PER_DEVICE_PREFIX)-1` because we need to exclude the `\0`
if(strncmp(stats_v2[i].name, N_EVENTS_PER_DEVICE_PREFIX, sizeof(N_EVENTS_PER_DEVICE_PREFIX)-1) == 0)
{
i++;
// The next metric should be the number of drops
snprintf(expected_name, METRIC_NAME_MAX, N_DROPS_PER_DEVICE_PREFIX"%ld", found);
if(strncmp(stats_v2[i].name, N_DROPS_PER_DEVICE_PREFIX, sizeof(N_DROPS_PER_DEVICE_PREFIX)-1) == 0)
{
i++;
found++;
}
else
{
FAIL() << "Missing CPU drops for CPU " << found;
}
}
else
{
i++;
}
}

// This test could fail in case of rare race conditions in which the number of online CPUs changes
// between the scap_open and the `sysconf(_SC_NPROCESSORS_ONLN)` function. In CI we shouldn't have hot plugs so probably we
// can live with this.
ASSERT_EQ(num_online_CPUs, found) << "We didn't find the stats for all the CPUs";
scap_close(h);
}

TEST(kmod, metrics_v2_check_results)
{
char error_buffer[SCAP_LASTERR_SIZE] = {0};
Expand Down
54 changes: 54 additions & 0 deletions test/libscap/test_suites/engines/modern_bpf/modern_bpf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,60 @@ TEST(modern_bpf, double_scap_stats_call)
scap_close(h);
}

TEST(modern_bpf, metrics_v2_check_per_CPU_stats)
{
char error_buffer[FILENAME_MAX] = {0};
int ret = 0;
scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 1 * 1024 * 1024, 0, false);
ASSERT_EQ(!h || ret != SCAP_SUCCESS, false) << "unable to open modern bpf engine with one single shared ring buffer: " << error_buffer << std::endl;

ssize_t num_possible_CPUs = num_possible_cpus();

// We want to check our CPUs counters
uint32_t flags = METRICS_V2_KERNEL_COUNTERS;
uint32_t nstats = 0;
int32_t rc = 0;
const metrics_v2* stats_v2 = scap_get_stats_v2(h, flags, &nstats, &rc);
ASSERT_EQ(rc, SCAP_SUCCESS);
ASSERT_TRUE(stats_v2);
ASSERT_GT(nstats, 0);

uint32_t i = 0;
ssize_t found = 0;
char expected_name[METRIC_NAME_MAX] = "";
snprintf(expected_name, METRIC_NAME_MAX, N_EVENTS_PER_CPU_PREFIX"%ld", found);

while(i < nstats)
{
// `sizeof(N_EVENTS_PER_CPU_PREFIX)-1` because we need to exclude the `\0`
if(strncmp(stats_v2[i].name, N_EVENTS_PER_CPU_PREFIX, sizeof(N_EVENTS_PER_CPU_PREFIX)-1) == 0)
{
i++;
// The next metric should be the number of drops
snprintf(expected_name, METRIC_NAME_MAX, N_DROPS_PER_CPU_PREFIX"%ld", found);
if(strncmp(stats_v2[i].name, N_DROPS_PER_CPU_PREFIX, sizeof(N_DROPS_PER_CPU_PREFIX)-1) == 0)
{
i++;
found++;
}
else
{
FAIL() << "Missing CPU drops for CPU " << found;
}
}
else
{
i++;
}
}

// This test could fail in case of rare race conditions in which the number of available CPUs changes
// between the scap_open and the `num_possible_cpus` function. In CI we shouldn't have hot plugs so probably we
// can live with this.
ASSERT_EQ(num_possible_CPUs, found) << "We didn't find the stats for all the CPUs";
scap_close(h);
}

TEST(modern_bpf, metrics_v2_check_results)
{
char error_buffer[FILENAME_MAX] = {0};
Expand Down
8 changes: 7 additions & 1 deletion userspace/libpman/src/configuration.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,14 @@ void pman_clear_state()
g_state.buffer_bytes_dim = 0;
g_state.last_ring_read = -1;
g_state.last_event_size = 0;
g_state.n_attached_progs = 0;

for(int j = 0; j < MODERN_BPF_PROG_ATTACHED_MAX; j++)
{
g_state.attached_progs_fds[j] = -1;
}

g_state.stats = NULL;
g_state.nstats = 0;
g_state.log_fn = NULL;
if(g_state.log_buf)
{
Expand Down
16 changes: 3 additions & 13 deletions userspace/libpman/src/lifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ int pman_open_probe()

static void pman_save_attached_progs()
{
g_state.n_attached_progs = 0;
g_state.attached_progs_fds[0] = bpf_program__fd(g_state.skel->progs.sys_enter);
g_state.attached_progs_fds[1] = bpf_program__fd(g_state.skel->progs.sys_exit);
g_state.attached_progs_fds[2] = bpf_program__fd(g_state.skel->progs.sched_proc_exit);
Expand All @@ -48,18 +47,6 @@ static void pman_save_attached_progs()
g_state.attached_progs_fds[7] = bpf_program__fd(g_state.skel->progs.pf_kernel);
#endif
g_state.attached_progs_fds[8] = bpf_program__fd(g_state.skel->progs.signal_deliver);

for(int j = 0; j < MODERN_BPF_PROG_ATTACHED_MAX; j++)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Andreagit97 mind getting me up to speed wrt the reason for changing the logic above to

for(int j = 0; j < MODERN_BPF_PROG_ATTACHED_MAX; j++)
{
      g_state.attached_progs_fds[j] = -1;
}

and below we have

for(int j = 0; j < MODERN_BPF_PROG_ATTACHED_MAX; j++)
{
	if(g_state.attached_progs_fds[j] != -1)
	{
		nprogs_attached++;
	}
}

Besides this question, LGTM!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ei! The idea here was to move all the logic inside pman_get_metrics_v2 in this way we could avoid creating a global variable g_state.n_attached_progs and just use a local variable. In the end, the only place where we need this information regarding attached_progs is inside pman_get_metrics_v2. Now the modern ebpf and the legacy one do the same loop in the same place so it should be easily to maintain in the future

{
if(g_state.attached_progs_fds[j] < 1)
{
g_state.attached_progs_fds[j] = -1;
}
else
{
g_state.n_attached_progs++;
}
}
}

int pman_load_probe()
Expand All @@ -85,16 +72,19 @@ void pman_close_probe()
if(g_state.stats)
{
free(g_state.stats);
g_state.stats = NULL;
}

if(g_state.cons_pos)
{
free(g_state.cons_pos);
g_state.cons_pos = NULL;
}

if(g_state.prod_pos)
{
free(g_state.prod_pos);
g_state.prod_pos = NULL;
}

if(g_state.skel)
Expand Down
2 changes: 1 addition & 1 deletion userspace/libpman/src/state.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ struct internal_state
/* Stats v2 utilities */
int32_t attached_progs_fds[MODERN_BPF_PROG_ATTACHED_MAX]; /* file descriptors of attached programs, used to
collect stats */
uint16_t n_attached_progs; /* number of attached progs */
struct metrics_v2* stats; /* array of stats collected by libpman */
uint32_t nstats; /* number of stats */
char* log_buf; /* buffer used to store logs before sending them to the log_fn */
size_t log_buf_size; /* size of the log buffer */
falcosecurity_log_fn log_fn;
Expand Down
70 changes: 51 additions & 19 deletions userspace/libpman/src/stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -140,27 +140,51 @@ int pman_get_scap_stats(struct scap_stats *stats)
return errno;
}

static void set_u64_monotonic_kernel_counter(uint32_t pos, uint64_t val)
{
g_state.stats[pos].type = METRIC_VALUE_TYPE_U64;
g_state.stats[pos].flags = METRICS_V2_KERNEL_COUNTERS;
g_state.stats[pos].unit = METRIC_VALUE_UNIT_COUNT;
g_state.stats[pos].metric_type = METRIC_VALUE_METRIC_TYPE_MONOTONIC;
g_state.stats[pos].value.u64 = val;
}

struct metrics_v2 *pman_get_metrics_v2(uint32_t flags, uint32_t *nstats, int32_t *rc)
{
*rc = SCAP_FAILURE;
/* This is the expected number of stats */
*nstats = (MODERN_BPF_MAX_KERNEL_COUNTERS_STATS + (g_state.n_attached_progs * MODERN_BPF_MAX_LIBBPF_STATS));
/* offset in stats buffer */
int offset = 0;
*nstats = 0;

/* If it is the first time we call this function we populate the stats */
// If it is the first time we call this function we populate the stats
if(g_state.stats == NULL)
{
g_state.stats = (metrics_v2 *)calloc(*nstats, sizeof(metrics_v2));
if(g_state.stats == NULL)
int nprogs_attached = 0;
for(int j = 0; j < MODERN_BPF_PROG_ATTACHED_MAX; j++)
{
if(g_state.attached_progs_fds[j] != -1)
{
nprogs_attached++;
}
}

// At the moment for each available CPU we want:
// - the number of events.
// - the number of drops.
uint32_t per_cpu_stats = g_state.n_possible_cpus* 2;

g_state.nstats = MODERN_BPF_MAX_KERNEL_COUNTERS_STATS + per_cpu_stats + (nprogs_attached * MODERN_BPF_MAX_LIBBPF_STATS);
g_state.stats = (metrics_v2 *)calloc(g_state.nstats, sizeof(metrics_v2));
if(!g_state.stats)
{
g_state.nstats = 0;
pman_print_error("unable to allocate memory for 'metrics_v2' array");
return NULL;
}
}

/* KERNEL COUNTER STATS */
// offset in stats buffer
int offset = 0;

/* KERNEL COUNTER STATS */
if(flags & METRICS_V2_KERNEL_COUNTERS)
{
char error_message[MAX_ERROR_MESSAGE_LEN];
Expand All @@ -173,18 +197,15 @@ struct metrics_v2 *pman_get_metrics_v2(uint32_t flags, uint32_t *nstats, int32_t

for(uint32_t stat = 0; stat < MODERN_BPF_MAX_KERNEL_COUNTERS_STATS; stat++)
{
g_state.stats[stat].type = METRIC_VALUE_TYPE_U64;
g_state.stats[stat].flags = METRICS_V2_KERNEL_COUNTERS;
g_state.stats[stat].unit = METRIC_VALUE_UNIT_COUNT;
g_state.stats[stat].metric_type = METRIC_VALUE_METRIC_TYPE_MONOTONIC;
g_state.stats[stat].value.u64 = 0;
strlcpy(g_state.stats[stat].name, modern_bpf_kernel_counters_stats_names[stat], METRIC_NAME_MAX);
set_u64_monotonic_kernel_counter(stat, 0);
strlcpy(g_state.stats[stat].name, (char*)modern_bpf_kernel_counters_stats_names[stat], METRIC_NAME_MAX);
}

/* We always take statistics from all the CPUs, even if some of them are not online.
* If the CPU is not online the counter map will be empty.
*/
struct counter_map cnt_map;
struct counter_map cnt_map = {};
uint32_t pos = MODERN_BPF_MAX_KERNEL_COUNTERS_STATS;
for(uint32_t index = 0; index < g_state.n_possible_cpus; index++)
{
if(bpf_map_lookup_elem(counter_maps_fd, &index, &cnt_map) < 0)
Expand Down Expand Up @@ -212,8 +233,18 @@ struct metrics_v2 *pman_get_metrics_v2(uint32_t flags, uint32_t *nstats, int32_t
g_state.stats[MODERN_BPF_N_DROPS_BUFFER_PROC_EXIT].value.u64 += cnt_map.n_drops_buffer_proc_exit;
g_state.stats[MODERN_BPF_N_DROPS_SCRATCH_MAP].value.u64 += cnt_map.n_drops_max_event_size;
g_state.stats[MODERN_BPF_N_DROPS].value.u64 += (cnt_map.n_drops_buffer + cnt_map.n_drops_max_event_size);

// We set the num events for that CPU.
set_u64_monotonic_kernel_counter(pos, cnt_map.n_evts);
snprintf(g_state.stats[pos].name, METRIC_NAME_MAX, N_EVENTS_PER_CPU_PREFIX"%d", index);
pos++;

// We set the drops for that CPU.
set_u64_monotonic_kernel_counter(pos, cnt_map.n_drops_buffer + cnt_map.n_drops_max_event_size);
snprintf(g_state.stats[pos].name, METRIC_NAME_MAX, N_DROPS_PER_CPU_PREFIX"%d", index);
pos++;
}
offset = MODERN_BPF_MAX_KERNEL_COUNTERS_STATS;
offset = pos;
}

/* LIBBPF STATS */
Expand All @@ -226,9 +257,10 @@ struct metrics_v2 *pman_get_metrics_v2(uint32_t flags, uint32_t *nstats, int32_t
*/
if((flags & METRICS_V2_LIBBPF_STATS))
{
int fd = 0;
for(int bpf_prog = 0; bpf_prog < MODERN_BPF_PROG_ATTACHED_MAX; bpf_prog++)
{
int fd = g_state.attached_progs_fds[bpf_prog];
fd = g_state.attached_progs_fds[bpf_prog];
if(fd < 0)
{
/* landing here means prog was not attached */
Expand All @@ -244,9 +276,9 @@ struct metrics_v2 *pman_get_metrics_v2(uint32_t flags, uint32_t *nstats, int32_t

for(int stat = 0; stat < MODERN_BPF_MAX_LIBBPF_STATS; stat++)
{
if(offset >= *nstats)
if(offset >= g_state.nstats)
{
/* This should never happen we are reading something wrong */
/* This should never happen, we are doing something wrong */
pman_print_error("no enough space for all the stats");
return NULL;
}
Expand Down
Loading
Loading