Deep Dive into EventPipe: A Linux Kernel-to-User Space Event Notification System
Event Handler Registration
void register_event_handler(handler_t *handler) {
handler->category = next_category_id++;
TAILQ_INSERT_TAIL(&global_handler_list, handler, list_node);
}
Event Processing Logic
static int dispatch_event(event_t *evt, size_t data_len) {
handler_t *target_handler = find_event_handler(evt->category);
return target_handler->process_func(evt, target_handler->private_data);
}
EventPipe Initialization
int initialize_event_pipe(event_pipe_t *pipe_inst, size_t per_cpu_queue_size) {
uint32_t cpu_idx;
int error;
if (debug_mode) {
pipe_inst->bpf_map_fd = 0xffff;
return 0;
}
pipe_inst->cpu_count = sysconf(_SC_NPROCESSORS_ONLN);
pipe_inst->bpf_map_fd = bpf_map_create(BPF_MAP_TYPE_PERF_EVENT_ARRAY,
sizeof(uint32_t), sizeof(int), pipe_inst->cpu_count);
pipe_inst->cpu_queues = calloc(pipe_inst->cpu_count, sizeof(*pipe_inst->cpu_queues));
pipe_inst->poll_fds = calloc(pipe_inst->cpu_count, sizeof(*pipe_inst->poll_fds));
for (cpu_idx = 0; cpu_idx < pipe_inst->cpu_count; cpu_idx++) {
error = initialize_per_cpu_event_queue(pipe_inst, cpu_idx, per_cpu_queue_size);
if (error != 0) {
return error;
}
}
return 0;
}
Per-CPU Event Queue Initialization
int initialize_per_cpu_event_queue(event_pipe_t *pipe_inst, uint32_t cpu_idx, size_t queue_size) {
struct perf_event_attr perf_attr = {0};
struct per_cpu_event_queue *queue = &pipe_inst->cpu_queues[cpu_idx];
int error;
perf_attr.type = PERF_TYPE_SOFTWARE;
perf_attr.config = PERF_COUNT_SW_BPF_OUTPUT;
perf_attr.sample_type = PERF_SAMPLE_RAW;
perf_attr.wakeup_events = 1;
queue->perf_fd = perf_event_open(&perf_attr, -1, cpu_idx, -1, 0);
error = bpf_map_update(pipe_inst->bpf_map_fd, &cpu_idx, &queue->perf_fd, BPF_ANY);
if (error != 0) {
return error;
}
const long page_size = sysconf(_SC_PAGESIZE);
queue_size += page_size;
queue->shared_memory = mmap(NULL, queue_size, PROT_READ | PROT_WRITE, MAP_SHARED, queue->perf_fd, 0);
if (queue->shared_memory == MAP_FAILED) {
return errno;
}
pipe_inst->poll_fds[cpu_idx].fd = queue->perf_fd;
pipe_inst->poll_fds[cpu_idx].events = POLLIN;
return 0;
}
Kernel-to-User Space Communication Mechanism
Linux perf events offer a lightweight, kernel-supported channel for BPF programs to push raw data to user space. BPF_MAP_TYPE_PERF_EVENT_ARRAY serves as a per-CPU index for perf event file descriptors, ensuring efficient CPU-local event delivery. Events triggered by BPF's bpf_perf_event_output helper are written to per-CPU shared memory regions, which user space polls via POLLIN events and reads without context switching overhead.