O’Reilly Report What is eBPF? - tutorial 2

tr4ce 2023. 7. 20. 23:42

2023. 7. 20. 23:42

본 게시글은 개인적으로 공부하기 위해 작성된 글입니다.

저번과 같은 곳에서 진행된다.

terminal 1에서 readelf 명령을 통해 make 명령을 통해 빌드한 일부 BPF object file을 분석한다.

readelf --section-details --headers .output/opensnoop.bpf.o

object file은 ELF형식 중 하나이다. ELF(Executable and Linkable Format)는 executable file, object code, shared library, core dump에 대한 공통 표준 파일 형식을 나타낸다. 또한 x86 processor의 binary file에 대한 표준 파일 형식이기도 하다.

● 중점적으로 봐야할 사항들은 아래와 같다.

- 현재 실습하는 machine은 Linux BPF이다. 하기에 이 binary code는 kernel 내의 BPF 가상머신 내에서 실행되는 것이다.

- 이 파일에는 BTF 정보가 포함되어 있다. BTF란, BPF program/map과 관련된 debug정보를 encoding하는 metadata 형식이다. 이 디버그 정보는 map pretty print나 fuction signatures 등에 사용된다.

- table중 .text section header 뒤에 tracepoint로 시작하는 4개의 실행 가능한 section header가 있다. 이들은 4개의 BPF program에 해당된다.

이제 4개의program들을 BPF source code에서 찾아볼 것이다.

두번째 탭에 있는 editor를 통해 opensnoop.bpf.c로 접근한다.

// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
// Copyright (c) 2020 Netflix
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include "opensnoop.h"

const volatile pid_t targ_pid = 0;
const volatile pid_t targ_tgid = 0;
const volatile uid_t targ_uid = 0;
const volatile bool targ_failed = false;

struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__uint(max_entries, 10240);
	__type(key, u32);
	__type(value, struct args_t);
} start SEC(".maps");

struct {
	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
	__uint(key_size, sizeof(u32));
	__uint(value_size, sizeof(u32));
} events SEC(".maps");

static __always_inline bool valid_uid(uid_t uid) {
	return uid != INVALID_UID;
}

static __always_inline
bool trace_allowed(u32 tgid, u32 pid)
{
	u32 uid;

	/* filters */
	if (targ_tgid && targ_tgid != tgid)
		return false;
	if (targ_pid && targ_pid != pid)
		return false;
	if (valid_uid(targ_uid)) {
		uid = (u32)bpf_get_current_uid_gid();
		if (targ_uid != uid) {
			return false;
		}
	}
	return true;
}

SEC("tracepoint/syscalls/sys_enter_open")
int tracepoint__syscalls__sys_enter_open(struct trace_event_raw_sys_enter* ctx) //here
{
	u64 id = bpf_get_current_pid_tgid();
	/* use kernel terminology here for tgid/pid: */
	u32 tgid = id >> 32;
	u32 pid = id;

	/* store arg info for later lookup */
	if (trace_allowed(tgid, pid)) {
		struct args_t args = {};
		args.fname = (const char *)ctx->args[0];
		args.flags = (int)ctx->args[1];
		bpf_map_update_elem(&start, &pid, &args, 0);
	}
	return 0;
}

SEC("tracepoint/syscalls/sys_enter_openat")
int tracepoint__syscalls__sys_enter_openat(struct trace_event_raw_sys_enter* ctx) //here
{
	u64 id = bpf_get_current_pid_tgid();
	/* use kernel terminology here for tgid/pid: */
	u32 tgid = id >> 32;
	u32 pid = id;

	/* store arg info for later lookup */
	if (trace_allowed(tgid, pid)) {
		struct args_t args = {};
		args.fname = (const char *)ctx->args[1];
		args.flags = (int)ctx->args[2];
		bpf_map_update_elem(&start, &pid, &args, 0);
	}
	return 0;
}

static __always_inline
int trace_exit(struct trace_event_raw_sys_exit* ctx)
{
	struct event event = {};
	struct args_t *ap;
	uintptr_t stack[3];
	int ret;
	u32 pid = bpf_get_current_pid_tgid();

	ap = bpf_map_lookup_elem(&start, &pid);
	if (!ap)
		return 0;	/* missed entry */
	ret = ctx->ret;
	if (targ_failed && ret >= 0)
		goto cleanup;	/* want failed only */

	/* event data */
	event.pid = bpf_get_current_pid_tgid() >> 32;
	event.uid = bpf_get_current_uid_gid();
	bpf_get_current_comm(&event.comm, sizeof(event.comm));
	bpf_probe_read_user_str(&event.fname, sizeof(event.fname), ap->fname);
	event.flags = ap->flags;
	event.ret = ret;

	bpf_get_stack(ctx, &stack, sizeof(stack),
		      BPF_F_USER_STACK);
	/* Skip the first address that is usually the syscall it-self */
	event.callers[0] = stack[1];
	event.callers[1] = stack[2];

	/* emit event */
	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
			      &event, sizeof(event));

cleanup:
	bpf_map_delete_elem(&start, &pid);
	return 0;
}

SEC("tracepoint/syscalls/sys_exit_open")
int tracepoint__syscalls__sys_exit_open(struct trace_event_raw_sys_exit* ctx) //here
{
	return trace_exit(ctx);
}

SEC("tracepoint/syscalls/sys_exit_openat")
int tracepoint__syscalls__sys_exit_openat(struct trace_event_raw_sys_exit* ctx) //here
{
	return trace_exit(ctx);
}

char LICENSE[] SEC("license") = "GPL";

상기의 code내에서 int tracepoint__syscalls ....로 시작하는 function들을 4개 찾을 수 있다. 해당되는 부분에 //here을 통해 표기하였다.

이들은 각각 readelf에 의해 나열된 실행 가능한 section에 해당하는 SEC() macro가 앞에 온다. 그리고 코드를 부착해야하는 eBPF hook를 정의한다SEC("tracepoint/<category>/<name>")). sys_enter_open과 sys_enter_openat에 대한 본 실습의 경우 eBPF code는 open 및 openat syscall이 수행될 때마다 호출되어야한다. Tracepoint들은 실행중인 kernel에서 코드를 부탁하는 데 사용할 수 있는 kernel code의 static marker이다. 이러한 tracepoint는 종종 퍼포먼스(성능)를 측정하기 위해 다양한 위치에 배치될 수 있다.

tracepoint__syscalls__sys_enter_open 및 tracepoint__syscalls__sys_enter_openat 함수는 open()/openat() syscall이 실행될 때마다 실행된다. 이후에 호출의 인자(파일 이름 등)를 parsing하고 이 정보를 BPF map에 기록한다. 기록된 곳에서 여기에서 우리의 compile된 opensnoop.c binary부분인 사용자 공간 프로그램(USP)이 이를 읽고 STDOUT으로 출력할 수 있다.

'ebpf' 카테고리의 다른 글

O’Reilly Report What is eBPF? - tutorial 4 (0)	2023.07.21
O’Reilly Report What is eBPF? - tutorial 3 (0)	2023.07.21
O’Reilly Report What is eBPF? - tutorial (0)	2023.07.20
google/ buzzer (eBPF fuzzer toolchain) (0)	2023.07.19
eBPF-fuzzer (0)	2023.07.19

낙서장

O’Reilly Report What is eBPF? - tutorial 2

'ebpf' 카테고리의 다른 글

+ Recent posts

티스토리툴바