O’Reilly Report What is eBPF? - tutorial 3

tr4ce 2023. 7. 21. 02:02

2023. 7. 21. 02:02

본 게시글은 개인적으로 공부하기 위해 작성된 글입니다.

현재 머신에서는 opensnoop이 실행되고 있지 않다. 이제 bpftool을 활용하여 eBPF 프로그램들이 machine에서 동작하는 것을 관찰해보자.

우선 아래의 명령어를 실행한다.

bpftool prog list

출력 결과는 아래와 같다.

출력 결과에는 상기와 같이 cgroup_skb 및 cgroup_device의 두 가지 항목이 표시되어 있어야 한다. 둘다 Systemd의 구성요소인 systemd.resource-control에 의해 관리되며 네트워크장치 및 파일 시스템에 대한 Systemd 장치의 엑세스를 관리하는데에 사용된다고 한다.

그리고 현재는 tracepoint 종류의 항목이 없어야 한다.

이번엔 다른 터미널에서 아래의 명령을 수행한 이후에 다시 bpftool prog list를 원래의 터미널에서 실행해본다.

./opensnoop

출력 결과는 아까 처럼 82번까지 나오고, 이후의 출력 결과가 다르다.

4개의 BPF program이 적재된 것을 확인할 수 있다. 상기의 4개의 program은 이전의 tutorial 2에서 언급된 4가지 opensnoop BPF program에 해당된다. 이것은 모두 tracepoint유형이다. 그러나, 이름이 짤려 있어서 entry point와 exit point를 구별할 수 없다.

그리고 두세개의 숫자로 이뤄진 map_ids를 확인할 수 있다.(숫자는 실행마다 다를 수 있다.)

이제 기존에 실행해둔 ./opensnoop을 그대로 두고, 아래의 명령어를 수행한다.

bpftool map list

그리고 출력의 결과로 현재 존재하는 BPF map들을 볼 수 있다.

- 이름이 start인 hash table과 이름이 events인 perf 이벤트 배열을 확인할 수 있다.

그리고 이는 ~/bcc/libbpf-tools/opensnoop.bpf.c 의 line 13~24 의 source code에 정의 되어 있다.

// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
// Copyright (c) 2020 Netflix
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include "opensnoop.h"

const volatile pid_t targ_pid = 0;
const volatile pid_t targ_tgid = 0;
const volatile uid_t targ_uid = 0;
const volatile bool targ_failed = false;

struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__uint(max_entries, 10240);
	__type(key, u32);
	__type(value, struct args_t);
} start SEC(".maps");

struct {
	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
	__uint(key_size, sizeof(u32));
	__uint(value_size, sizeof(u32));
} events SEC(".maps");

static __always_inline bool valid_uid(uid_t uid) {
	return uid != INVALID_UID;
}

static __always_inline
bool trace_allowed(u32 tgid, u32 pid)
{
	u32 uid;

	/* filters */
	if (targ_tgid && targ_tgid != tgid)
		return false;
	if (targ_pid && targ_pid != pid)
		return false;
	if (valid_uid(targ_uid)) {
		uid = (u32)bpf_get_current_uid_gid();
		if (targ_uid != uid) {
			return false;
		}
	}
	return true;
}

SEC("tracepoint/syscalls/sys_enter_open")
int tracepoint__syscalls__sys_enter_open(struct trace_event_raw_sys_enter* ctx)
{
	u64 id = bpf_get_current_pid_tgid();
	/* use kernel terminology here for tgid/pid: */
	u32 tgid = id >> 32;
	u32 pid = id;

	/* store arg info for later lookup */
	if (trace_allowed(tgid, pid)) {
		struct args_t args = {};
		args.fname = (const char *)ctx->args[0];
		args.flags = (int)ctx->args[1];
		bpf_map_update_elem(&start, &pid, &args, 0);
	}
	return 0;
}

SEC("tracepoint/syscalls/sys_enter_openat")
int tracepoint__syscalls__sys_enter_openat(struct trace_event_raw_sys_enter* ctx)
{
	u64 id = bpf_get_current_pid_tgid();
	/* use kernel terminology here for tgid/pid: */
	u32 tgid = id >> 32;
	u32 pid = id;

	/* store arg info for later lookup */
	if (trace_allowed(tgid, pid)) {
		struct args_t args = {};
		args.fname = (const char *)ctx->args[1];
		args.flags = (int)ctx->args[2];
		bpf_map_update_elem(&start, &pid, &args, 0);
	}
	return 0;
}

static __always_inline
int trace_exit(struct trace_event_raw_sys_exit* ctx)
{
	struct event event = {};
	struct args_t *ap;
	uintptr_t stack[3];
	int ret;
	u32 pid = bpf_get_current_pid_tgid();

	ap = bpf_map_lookup_elem(&start, &pid);
	if (!ap)
		return 0;	/* missed entry */
	ret = ctx->ret;
	if (targ_failed && ret >= 0)
		goto cleanup;	/* want failed only */

	/* event data */
	event.pid = bpf_get_current_pid_tgid() >> 32;
	event.uid = bpf_get_current_uid_gid();
	bpf_get_current_comm(&event.comm, sizeof(event.comm));
	bpf_probe_read_user_str(&event.fname, sizeof(event.fname), ap->fname);
	event.flags = ap->flags;
	event.ret = ret;

	bpf_get_stack(ctx, &stack, sizeof(stack),
		      BPF_F_USER_STACK);
	/* Skip the first address that is usually the syscall it-self */
	event.callers[0] = stack[1];
	event.callers[1] = stack[2];

	/* emit event */
	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
			      &event, sizeof(event));

cleanup:
	bpf_map_delete_elem(&start, &pid);
	return 0;
}

SEC("tracepoint/syscalls/sys_exit_open")
int tracepoint__syscalls__sys_exit_open(struct trace_event_raw_sys_exit* ctx)
{
	return trace_exit(ctx);
}

SEC("tracepoint/syscalls/sys_exit_openat")
int tracepoint__syscalls__sys_exit_openat(struct trace_event_raw_sys_exit* ctx)
{
	return trace_exit(ctx);
}

char LICENSE[] SEC("license") = "GPL";

- opensnoop 의 read only data를 위한 배열도 있다.(array name opensnoo.rodata)

-마지막으로 각 행의 시작에 있는 map ID가 이전의 bpftool prog list 에서 참조한 ID인지도 대응해보아야한다.

이제 program 중 하나의 bytecode를 볼 것이다.

다시 bpftool prog list를 수행해본다.( 사진은 앞서 있는 것을 참고한다)

각 줄의 시작부분에 ID와 대응되는 BPF program들이 나온다.

ID 중에 tracepoint program의 ID를 참고하여 아래의 명령어를 입력한다.

bpftool prog dump xlated id 110 linum

출력 결과는 아래와 같다.( 깔끔하게 보여야될거같아서 code block를 썼다.)

int tracepoint__syscalls__sys_enter_open(struct trace_event_raw_sys_enter * ctx):
; int tracepoint__syscalls__sys_enter_open(struct trace_event_raw_sys_enter* ctx) [file:/opt/ebpf/bcc/libbpf-tools/opensnoop.bpf.c line_num:50 line_col:0]
   0: (bf) r6 = r1
; u64 id = bpf_get_current_pid_tgid(); [file:/opt/ebpf/bcc/libbpf-tools/opensnoop.bpf.c line_num:52 line_col:11]
   1: (85) call bpf_get_current_pid_tgid#186208
; u32 pid = id; [file:/opt/ebpf/bcc/libbpf-tools/opensnoop.bpf.c line_num:55 line_col:6]
   2: (63) *(u32 *)(r10 -4) = r0
; if (targ_tgid && targ_tgid != tgid) [file:/opt/ebpf/bcc/libbpf-tools/opensnoop.bpf.c line_num:36 line_col:6]
   3: (18) r1 = map[id:15][0]+4
   5: (61) r2 = *(u32 *)(r1 +0)
; if (targ_pid && targ_pid != pid) [file:/opt/ebpf/bcc/libbpf-tools/opensnoop.bpf.c line_num:38 line_col:6]
   6: (18) r1 = map[id:15][0]+0
   8: (61) r2 = *(u32 *)(r1 +0)
; if (valid_uid(targ_uid)) { [file:/opt/ebpf/bcc/libbpf-tools/opensnoop.bpf.c line_num:40 line_col:16]
   9: (18) r7 = map[id:15][0]+8
  11: (61) r1 = *(u32 *)(r7 +0)
  12: (18) r2 = 0xffffffff
; if (targ_uid != uid) { [file:/opt/ebpf/bcc/libbpf-tools/opensnoop.bpf.c line_num:42 line_col:7]
  14: (b7) r1 = 0
; struct args_t args = {}; [file:/opt/ebpf/bcc/libbpf-tools/opensnoop.bpf.c line_num:59 line_col:17]
  15: (7b) *(u64 *)(r10 -16) = r1
; args.fname = (const char *)ctx->args[0]; [file:/opt/ebpf/bcc/libbpf-tools/opensnoop.bpf.c line_num:60 line_col:30]
  16: (79) r1 = *(u64 *)(r6 +16)
; args.fname = (const char *)ctx->args[0]; [file:/opt/ebpf/bcc/libbpf-tools/opensnoop.bpf.c line_num:60 line_col:14]
  17: (7b) *(u64 *)(r10 -24) = r1
; args.flags = (int)ctx->args[1]; [file:/opt/ebpf/bcc/libbpf-tools/opensnoop.bpf.c line_num:61 line_col:21]
  18: (79) r1 = *(u64 *)(r6 +24)
; args.flags = (int)ctx->args[1]; [file:/opt/ebpf/bcc/libbpf-tools/opensnoop.bpf.c line_num:61 line_col:14]
  19: (63) *(u32 *)(r10 -16) = r1
  20: (bf) r2 = r10
; struct args_t args = {}; [file:/opt/ebpf/bcc/libbpf-tools/opensnoop.bpf.c line_num:59 line_col:17]
  21: (07) r2 += -4
  22: (bf) r3 = r10
  23: (07) r3 += -24
; bpf_map_update_elem(&start, &pid, &args, 0); [file:/opt/ebpf/bcc/libbpf-tools/opensnoop.bpf.c line_num:62 line_col:3]
  24: (18) r1 = map[id:12]
  26: (b7) r4 = 0
  27: (85) call htab_map_update_elem#220464
; return 0; [file:/opt/ebpf/bcc/libbpf-tools/opensnoop.bpf.c line_num:64 line_col:2]
  28: (b7) r0 = 0
  29: (95) exit

이는 source doce의 정보를 보여준다. 그리고, 상기에 올려둔 opensnoop.bpf.c와 함꼐 대조하며 확인해보면된다.

'ebpf' 카테고리의 다른 글

eBPF references - Blackhat (0)	2023.08.06
O’Reilly Report What is eBPF? - tutorial 4 (0)	2023.07.21
O’Reilly Report What is eBPF? - tutorial 2 (0)	2023.07.20
O’Reilly Report What is eBPF? - tutorial (0)	2023.07.20
google/ buzzer (eBPF fuzzer toolchain) (0)	2023.07.19

낙서장

O’Reilly Report What is eBPF? - tutorial 3

'ebpf' 카테고리의 다른 글

+ Recent posts

티스토리툴바