从环形缓冲区读取时,删除了BPF尾部程序。

huangapple go评论81阅读模式
英文:

BPF tail program removed when reading from ring buffer

问题

我注意到,如果我在尾部程序中向环形缓冲区写入数据,并从用户空间读取环形缓冲区,尾部程序最终会被移除。尾部程序不再显示在bpftool prog中。bpftool map dump name jump_table显示Found 0 elements;它最初有1个元素,即尾部程序。

这个BPF程序由main_prog调用一个尾部程序组成。尾部程序向环形缓冲区写入0

#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>

struct bpf_map_def SEC("maps") flow_ring_buf = {
    .type = BPF_MAP_TYPE_RINGBUF,
    .max_entries = 1<<12
};

struct bpf_map_def SEC("maps") jump_table = {
   .type = BPF_MAP_TYPE_PROG_ARRAY,
   .key_size = sizeof(__u32),
   .value_size = sizeof(__u32),
   .max_entries = 1,
};

SEC("xdp")
int main_prog(struct xdp_md *ctx) {
    bpf_tail_call(ctx, &jump_table, 0);

    bpf_printk("Tail call failed");

    return XDP_PASS;
}

SEC("xdp_2")
int tail_prog(struct xdp_md *ctx) {
    __u32 num = 0;

    bpf_ringbuf_output(&flow_ring_buf, &num, sizeof(__u32), 0);

    return XDP_PASS;
}

char _license[] SEC("license") = "GPL";

这个Go程序加载程序和映射,并从环形缓冲区中读取数据:

package main

import "C"
import (
	"errors"
	"github.com/cilium/ebpf"
	"github.com/cilium/ebpf/ringbuf"
	"github.com/vishvananda/netlink"
	"log"
)

type bpfObjects struct {
	MainProg        *ebpf.Program `ebpf:"main_prog"`
	TailProg        *ebpf.Program `ebpf:"tail_prog"`
	JumpTable       *ebpf.Map     `ebpf:"jump_table"`
	FlowRingBuf     *ebpf.Map     `ebpf:"flow_ring_buf"`
}

func main() {
	var objects bpfObjects

	spec, err := ebpf.LoadCollectionSpec("test.o")

	if err != nil {
		log.Fatalln("ebpf.LoadCollectionSpec", err)
	}

	if err := spec.LoadAndAssign(&objects, nil); err != nil {
		log.Fatalln("ebpf.LoadAndAssign", err)
	}

	// Update the jump table with the tail prog
	if err = objects.JumpTable.Update(uint32(0), uint32(objects.TailProg.FD()), ebpf.UpdateAny); err != nil {
		log.Fatalln("Update prog_array", err)
	}

	link, err := netlink.LinkByName("enp0s8")

	if err != nil {
		log.Fatalln("netlink.LinkByName", err)
	}

	// Load the program onto the interface
	if err = netlink.LinkSetXdpFdWithFlags(link, objects.MainProg.FD(), 0x2); err != nil {
		log.Fatalln("netlink.LinkSetXdpFdWithFlags:", err)
	}

	// A
	// Problem doesn't happen if you comment out code below and replace with select {}
	reader, err := ringbuf.NewReader(objects.FlowRingBuf)

	for {
		_, err := reader.Read()

		if err != nil {
			if errors.Is(err, ringbuf.ErrClosed) {
				log.Println("Received signal, exiting..")
				return
			}
			log.Printf("reading from reader: %s", err)
			continue
		}
	}
}

当我向接口发送流量时,我遇到了这个问题。reader.Read()从不返回错误,并且返回的Record对象的值为0。由于跳转表为空,尾部调用失败,我在内核日志中看到了bpf_printk的输出。

如果我注释掉A下面的代码,并用无限等待替换它,比如select {},我就不会遇到这个问题。

作为参考,等效的C程序可以正常工作。我正在使用libbpf 0.7:

#include <libbpf.h>
#include <bpf.h>
#include <stdio.h>
#include <net/if.h>

// Handles each insert into ring buffer
static int flow_buf_sample(void *ctx, void *data, size_t len) {
    return 0;
}

int main(int argc, char *argv[]) {
    struct bpf_object *obj;
    struct bpf_map *jump_table;
    struct bpf_map *flow_ring_buf;
    struct bpf_program *tail_prog;
    struct bpf_program *main_prog;

    int err;

    if ((obj = bpf_object__open_file("test.o", NULL)) == NULL) {
        fprintf(stderr, "Could not open ELF");
        return 1;
    }

    if ((err = bpf_object__load(obj)) < 0) {
        fprintf(stderr, "Could not load BPF");
        return 1;
    }

    if ((jump_table = bpf_object__find_map_by_name(obj, "jump_table")) == NULL) {
        fprintf(stderr, "Could not find jump_table map");
        return 1;
    }

    if ((flow_ring_buf = bpf_object__find_map_by_name(obj, "flow_ring_buf")) == NULL) {
        fprintf(stderr, "Could not find flow_ring_buf_map map");
        return 1;
    }

    if ((main_prog = bpf_object__find_program_by_name(obj, "main_prog")) == NULL) {
        fprintf(stderr, "Could not find main_prog");
        return 1;
    }

    if ((tail_prog = bpf_object__find_program_by_name(obj, "tail_prog")) == NULL) {
        fprintf(stderr, "Could not find tail_prog");
        return 1;
    }

    struct ring_buffer *ring_buffer = ring_buffer__new(bpf_map__fd(flow_ring_buf), flow_buf_sample, NULL, NULL);

    if (ring_buffer == NULL) {
        fprintf(stderr, "failed to create ring buffer\n");
        return 1;
    }

    int index0 = 0;
    int tail_prog_fd = bpf_program__fd(tail_prog);

    if ((err = bpf_map_update_elem(bpf_map__fd(jump_table), &index0, &tail_prog_fd, 0)) < 0) {
        fprintf(stderr, "failed update jump_table: %d", err);
    }

    int if_index = if_nametoindex(argv[1]);

    if (!if_index) {
        printf("get if_index from interface name failed\n");
        return 1;
    }

    if ((err = bpf_xdp_attach(if_index, bpf_program__fd(main_prog), 0x2, NULL)) != 0) {
        fprintf(stderr, "bpf_xdp_attach failed: %d", err);
        return 1;
    }

    while(1) {
        ring_buffer__poll(ring_buffer, -1);
    }

    return 0;
}
英文:

I observe that if I write to a ring buffer in a tail program and read the ring buffer from user space, the tail program is eventually removed. The tail program no longer shows up in bpftool prog. bpftool map dump name jump_table says it Found 0 elements; it originally had 1 element, the tail program.

This BPF program consists of main_prog calling a tail program. The tail program writes 0 to a ring buffer.

#include &lt;linux/bpf.h&gt;
#include &lt;bpf/bpf_helpers.h&gt;
struct bpf_map_def SEC(&quot;maps&quot;) flow_ring_buf = {
.type = BPF_MAP_TYPE_RINGBUF,
.max_entries = 1&lt;&lt;12
};
struct bpf_map_def SEC(&quot;maps&quot;) jump_table = {
.type = BPF_MAP_TYPE_PROG_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u32),
.max_entries = 1,
};
SEC(&quot;xdp&quot;)
int main_prog(struct xdp_md *ctx) {
bpf_tail_call(ctx, &amp;jump_table, 0);
bpf_printk(&quot;Tail call failed&quot;);
return XDP_PASS;
}
SEC(&quot;xdp_2&quot;)
int tail_prog(struct xdp_md *ctx) {
__u32 num = 0;
bpf_ringbuf_output(&amp;flow_ring_buf, &amp;num, sizeof(__u32), 0);
return XDP_PASS;
}
char _license[] SEC(&quot;license&quot;) = &quot;GPL&quot;;

This Go program loads the programs and map and reads from the ring buffer:

package main
import &quot;C&quot;
import (
&quot;errors&quot;
&quot;github.com/cilium/ebpf&quot;
&quot;github.com/cilium/ebpf/ringbuf&quot;
&quot;github.com/vishvananda/netlink&quot;
&quot;log&quot;
)
type bpfObjects struct {
MainProg        *ebpf.Program `ebpf:&quot;main_prog&quot;`
TailProg        *ebpf.Program `ebpf:&quot;tail_prog&quot;`
JumpTable       *ebpf.Map     `ebpf:&quot;jump_table&quot;`
FlowRingBuf     *ebpf.Map     `ebpf:&quot;flow_ring_buf&quot;`
}
func main() {
var objects bpfObjects
spec, err := ebpf.LoadCollectionSpec(&quot;test.o&quot;)
if err != nil {
log.Fatalln(&quot;ebpf.LoadCollectionSpec&quot;, err)
}
if err := spec.LoadAndAssign(&amp;objects, nil); err != nil {
log.Fatalln(&quot;ebpf.LoadAndAssign&quot;, err)
}
// Update the jump table with the tail prog
if err = objects.JumpTable.Update(uint32(0), uint32(objects.TailProg.FD()), ebpf.UpdateAny); err != nil {
log.Fatalln(&quot;Update prog_array&quot;, err)
}
link, err := netlink.LinkByName(&quot;enp0s8&quot;)
if err != nil {
log.Fatalln(&quot;netlink.LinkByName&quot;, err)
}
// Load the program onto the interface
if err = netlink.LinkSetXdpFdWithFlags(link, objects.MainProg.FD(), 0x2); err != nil {
log.Fatalln(&quot;netlink.LinkSetXdpFdWithFlags:&quot;, err)
}
// A
// Problem doesn&#39;t happen if you comment out code below and replace with select {}
reader, err := ringbuf.NewReader(objects.FlowRingBuf)
for {
_, err := reader.Read()
if err != nil {
if errors.Is(err, ringbuf.ErrClosed) {
log.Println(&quot;Received signal, exiting..&quot;)
return
}
log.Printf(&quot;reading from reader: %s&quot;, err)
continue
}
}
}

I run into the problem when I send traffic to the interface. reader.Read() never returns an error and the returned Record object has 0. Because the jump table is empty, the tail call fails and I see the bpf_printk output in the kernel log.

If comment out the code below A and replace it with an infinite wait, like select {}, I don't run into the problem.

For reference, the equivalent C program works without issues. I'm using libbpf 0.7:

#include &lt;libbpf.h&gt;
#include &lt;bpf.h&gt;
#include &lt;stdio.h&gt;
#include &lt;net/if.h&gt;
// Handles each insert into ring buffer
static int flow_buf_sample(void *ctx, void *data, size_t len) {
return 0;
}
int main(int argc, char *argv[]) {
struct bpf_object *obj;
struct bpf_map *jump_table;
struct bpf_map *flow_ring_buf;
struct bpf_program *tail_prog;
struct bpf_program *main_prog;
int err;
if ((obj = bpf_object__open_file(&quot;test.o&quot;, NULL)) == NULL) {
fprintf(stderr, &quot;Could not open ELF&quot;);
return 1;
}
if ((err = bpf_object__load(obj)) &lt; 0) {
fprintf(stderr, &quot;Could not load BPF&quot;);
return 1;
}
if ((jump_table = bpf_object__find_map_by_name(obj, &quot;jump_table&quot;)) == NULL) {
fprintf(stderr, &quot;Could not find jump_table map&quot;);
return 1;
}
if ((flow_ring_buf = bpf_object__find_map_by_name(obj, &quot;flow_ring_buf&quot;)) == NULL) {
fprintf(stderr, &quot;Could not find flow_ring_buf_map map&quot;);
return 1;
}
if ((main_prog = bpf_object__find_program_by_name(obj, &quot;main_prog&quot;)) == NULL) {
fprintf(stderr, &quot;Could not find main_prog&quot;);
return 1;
}
if ((tail_prog = bpf_object__find_program_by_name(obj, &quot;tail_prog&quot;)) == NULL) {
fprintf(stderr, &quot;Could not find tail_prog&quot;);
return 1;
}
struct ring_buffer *ring_buffer = ring_buffer__new(bpf_map__fd(flow_ring_buf), flow_buf_sample, NULL, NULL);
if (ring_buffer == NULL) {
fprintf(stderr, &quot;failed to create ring buffer\n&quot;);
return 1;
}
int index0 = 0;
int tail_prog_fd = bpf_program__fd(tail_prog);
if ((err = bpf_map_update_elem(bpf_map__fd(jump_table), &amp;index0, &amp;tail_prog_fd, 0)) &lt; 0) {
fprintf(stderr, &quot;failed update jump_table: %d&quot;, err);
}
int if_index = if_nametoindex(argv[1]);
if (!if_index) {
printf(&quot;get if_index from interface name failed\n&quot;);
return 1;
}
if ((err = bpf_xdp_attach(if_index, bpf_program__fd(main_prog), 0x2, NULL)) != 0) {
fprintf(stderr, &quot;bpf_xdp_attach failed: %d&quot;, err);
return 1;
}
while(1) {
ring_buffer__poll(ring_buffer, -1);
}
return 0;
}

答案1

得分: 1

这是因为在主程序循环运行时,objects.JumpTable被垃圾回收了。当引用被移除时,映射jump_table被取消固定。解决方法是在调用LoadAndAssign()之后使用defer objects.JumpTable.Close()。或者添加以下代码:

func (o *bpfObjects) Close() {
  o.MainProg.Close()
  o.TailProg.Close()
  o.JumpTable.Close()
  o.FlowRingBuf.Close()
}

并在调用LoadAndAssign()之后使用defer objects.Close()。这在C版本中不会发生,因为没有垃圾回收机制。

英文:

This is happening because objects.JumpTable is getting garbage collected while the main program loop is running. When the reference is removed, map jump_table is unpinned. The solution is to do defer objects.JumpTable.Close() after calling LoadAndAssign(). Or add this code

func (o *bpfObjects) Close() {
o.MainProg.Close()
o.TailProg.Close()
o.JumpTable.Close()
o.FlowRingBuf.Close()
}

and call defer objects.Close() after calling LoadAndAssign(). This does not happen in the C version because there is no garbage collection.

huangapple
  • 本文由 发表于 2022年2月17日 22:39:20
  • 转载请务必保留本文链接:https://go.coder-hub.com/71160179.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定