英文:
BPF tail program removed when reading from ring buffer
问题
我注意到,如果我在尾部程序中向环形缓冲区写入数据,并从用户空间读取环形缓冲区,尾部程序最终会被移除。尾部程序不再显示在bpftool prog
中。bpftool map dump name jump_table
显示Found 0 elements
;它最初有1个元素,即尾部程序。
这个BPF程序由main_prog
调用一个尾部程序组成。尾部程序向环形缓冲区写入0
。
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
struct bpf_map_def SEC("maps") flow_ring_buf = {
.type = BPF_MAP_TYPE_RINGBUF,
.max_entries = 1<<12
};
struct bpf_map_def SEC("maps") jump_table = {
.type = BPF_MAP_TYPE_PROG_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u32),
.max_entries = 1,
};
SEC("xdp")
int main_prog(struct xdp_md *ctx) {
bpf_tail_call(ctx, &jump_table, 0);
bpf_printk("Tail call failed");
return XDP_PASS;
}
SEC("xdp_2")
int tail_prog(struct xdp_md *ctx) {
__u32 num = 0;
bpf_ringbuf_output(&flow_ring_buf, &num, sizeof(__u32), 0);
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";
这个Go程序加载程序和映射,并从环形缓冲区中读取数据:
package main
import "C"
import (
"errors"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/ringbuf"
"github.com/vishvananda/netlink"
"log"
)
type bpfObjects struct {
MainProg *ebpf.Program `ebpf:"main_prog"`
TailProg *ebpf.Program `ebpf:"tail_prog"`
JumpTable *ebpf.Map `ebpf:"jump_table"`
FlowRingBuf *ebpf.Map `ebpf:"flow_ring_buf"`
}
func main() {
var objects bpfObjects
spec, err := ebpf.LoadCollectionSpec("test.o")
if err != nil {
log.Fatalln("ebpf.LoadCollectionSpec", err)
}
if err := spec.LoadAndAssign(&objects, nil); err != nil {
log.Fatalln("ebpf.LoadAndAssign", err)
}
// Update the jump table with the tail prog
if err = objects.JumpTable.Update(uint32(0), uint32(objects.TailProg.FD()), ebpf.UpdateAny); err != nil {
log.Fatalln("Update prog_array", err)
}
link, err := netlink.LinkByName("enp0s8")
if err != nil {
log.Fatalln("netlink.LinkByName", err)
}
// Load the program onto the interface
if err = netlink.LinkSetXdpFdWithFlags(link, objects.MainProg.FD(), 0x2); err != nil {
log.Fatalln("netlink.LinkSetXdpFdWithFlags:", err)
}
// A
// Problem doesn't happen if you comment out code below and replace with select {}
reader, err := ringbuf.NewReader(objects.FlowRingBuf)
for {
_, err := reader.Read()
if err != nil {
if errors.Is(err, ringbuf.ErrClosed) {
log.Println("Received signal, exiting..")
return
}
log.Printf("reading from reader: %s", err)
continue
}
}
}
当我向接口发送流量时,我遇到了这个问题。reader.Read()
从不返回错误,并且返回的Record
对象的值为0
。由于跳转表为空,尾部调用失败,我在内核日志中看到了bpf_printk
的输出。
如果我注释掉A
下面的代码,并用无限等待替换它,比如select {}
,我就不会遇到这个问题。
作为参考,等效的C程序可以正常工作。我正在使用libbpf 0.7:
#include <libbpf.h>
#include <bpf.h>
#include <stdio.h>
#include <net/if.h>
// Handles each insert into ring buffer
static int flow_buf_sample(void *ctx, void *data, size_t len) {
return 0;
}
int main(int argc, char *argv[]) {
struct bpf_object *obj;
struct bpf_map *jump_table;
struct bpf_map *flow_ring_buf;
struct bpf_program *tail_prog;
struct bpf_program *main_prog;
int err;
if ((obj = bpf_object__open_file("test.o", NULL)) == NULL) {
fprintf(stderr, "Could not open ELF");
return 1;
}
if ((err = bpf_object__load(obj)) < 0) {
fprintf(stderr, "Could not load BPF");
return 1;
}
if ((jump_table = bpf_object__find_map_by_name(obj, "jump_table")) == NULL) {
fprintf(stderr, "Could not find jump_table map");
return 1;
}
if ((flow_ring_buf = bpf_object__find_map_by_name(obj, "flow_ring_buf")) == NULL) {
fprintf(stderr, "Could not find flow_ring_buf_map map");
return 1;
}
if ((main_prog = bpf_object__find_program_by_name(obj, "main_prog")) == NULL) {
fprintf(stderr, "Could not find main_prog");
return 1;
}
if ((tail_prog = bpf_object__find_program_by_name(obj, "tail_prog")) == NULL) {
fprintf(stderr, "Could not find tail_prog");
return 1;
}
struct ring_buffer *ring_buffer = ring_buffer__new(bpf_map__fd(flow_ring_buf), flow_buf_sample, NULL, NULL);
if (ring_buffer == NULL) {
fprintf(stderr, "failed to create ring buffer\n");
return 1;
}
int index0 = 0;
int tail_prog_fd = bpf_program__fd(tail_prog);
if ((err = bpf_map_update_elem(bpf_map__fd(jump_table), &index0, &tail_prog_fd, 0)) < 0) {
fprintf(stderr, "failed update jump_table: %d", err);
}
int if_index = if_nametoindex(argv[1]);
if (!if_index) {
printf("get if_index from interface name failed\n");
return 1;
}
if ((err = bpf_xdp_attach(if_index, bpf_program__fd(main_prog), 0x2, NULL)) != 0) {
fprintf(stderr, "bpf_xdp_attach failed: %d", err);
return 1;
}
while(1) {
ring_buffer__poll(ring_buffer, -1);
}
return 0;
}
英文:
I observe that if I write to a ring buffer in a tail program and read the ring buffer from user space, the tail program is eventually removed. The tail program no longer shows up in bpftool prog
. bpftool map dump name jump_table
says it Found 0 elements
; it originally had 1 element, the tail program.
This BPF program consists of main_prog
calling a tail program. The tail program writes 0
to a ring buffer.
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
struct bpf_map_def SEC("maps") flow_ring_buf = {
.type = BPF_MAP_TYPE_RINGBUF,
.max_entries = 1<<12
};
struct bpf_map_def SEC("maps") jump_table = {
.type = BPF_MAP_TYPE_PROG_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u32),
.max_entries = 1,
};
SEC("xdp")
int main_prog(struct xdp_md *ctx) {
bpf_tail_call(ctx, &jump_table, 0);
bpf_printk("Tail call failed");
return XDP_PASS;
}
SEC("xdp_2")
int tail_prog(struct xdp_md *ctx) {
__u32 num = 0;
bpf_ringbuf_output(&flow_ring_buf, &num, sizeof(__u32), 0);
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";
This Go program loads the programs and map and reads from the ring buffer:
package main
import "C"
import (
"errors"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/ringbuf"
"github.com/vishvananda/netlink"
"log"
)
type bpfObjects struct {
MainProg *ebpf.Program `ebpf:"main_prog"`
TailProg *ebpf.Program `ebpf:"tail_prog"`
JumpTable *ebpf.Map `ebpf:"jump_table"`
FlowRingBuf *ebpf.Map `ebpf:"flow_ring_buf"`
}
func main() {
var objects bpfObjects
spec, err := ebpf.LoadCollectionSpec("test.o")
if err != nil {
log.Fatalln("ebpf.LoadCollectionSpec", err)
}
if err := spec.LoadAndAssign(&objects, nil); err != nil {
log.Fatalln("ebpf.LoadAndAssign", err)
}
// Update the jump table with the tail prog
if err = objects.JumpTable.Update(uint32(0), uint32(objects.TailProg.FD()), ebpf.UpdateAny); err != nil {
log.Fatalln("Update prog_array", err)
}
link, err := netlink.LinkByName("enp0s8")
if err != nil {
log.Fatalln("netlink.LinkByName", err)
}
// Load the program onto the interface
if err = netlink.LinkSetXdpFdWithFlags(link, objects.MainProg.FD(), 0x2); err != nil {
log.Fatalln("netlink.LinkSetXdpFdWithFlags:", err)
}
// A
// Problem doesn't happen if you comment out code below and replace with select {}
reader, err := ringbuf.NewReader(objects.FlowRingBuf)
for {
_, err := reader.Read()
if err != nil {
if errors.Is(err, ringbuf.ErrClosed) {
log.Println("Received signal, exiting..")
return
}
log.Printf("reading from reader: %s", err)
continue
}
}
}
I run into the problem when I send traffic to the interface. reader.Read()
never returns an error and the returned Record
object has 0
. Because the jump table is empty, the tail call fails and I see the bpf_printk
output in the kernel log.
If comment out the code below A
and replace it with an infinite wait, like select {}
, I don't run into the problem.
For reference, the equivalent C program works without issues. I'm using libbpf 0.7:
#include <libbpf.h>
#include <bpf.h>
#include <stdio.h>
#include <net/if.h>
// Handles each insert into ring buffer
static int flow_buf_sample(void *ctx, void *data, size_t len) {
return 0;
}
int main(int argc, char *argv[]) {
struct bpf_object *obj;
struct bpf_map *jump_table;
struct bpf_map *flow_ring_buf;
struct bpf_program *tail_prog;
struct bpf_program *main_prog;
int err;
if ((obj = bpf_object__open_file("test.o", NULL)) == NULL) {
fprintf(stderr, "Could not open ELF");
return 1;
}
if ((err = bpf_object__load(obj)) < 0) {
fprintf(stderr, "Could not load BPF");
return 1;
}
if ((jump_table = bpf_object__find_map_by_name(obj, "jump_table")) == NULL) {
fprintf(stderr, "Could not find jump_table map");
return 1;
}
if ((flow_ring_buf = bpf_object__find_map_by_name(obj, "flow_ring_buf")) == NULL) {
fprintf(stderr, "Could not find flow_ring_buf_map map");
return 1;
}
if ((main_prog = bpf_object__find_program_by_name(obj, "main_prog")) == NULL) {
fprintf(stderr, "Could not find main_prog");
return 1;
}
if ((tail_prog = bpf_object__find_program_by_name(obj, "tail_prog")) == NULL) {
fprintf(stderr, "Could not find tail_prog");
return 1;
}
struct ring_buffer *ring_buffer = ring_buffer__new(bpf_map__fd(flow_ring_buf), flow_buf_sample, NULL, NULL);
if (ring_buffer == NULL) {
fprintf(stderr, "failed to create ring buffer\n");
return 1;
}
int index0 = 0;
int tail_prog_fd = bpf_program__fd(tail_prog);
if ((err = bpf_map_update_elem(bpf_map__fd(jump_table), &index0, &tail_prog_fd, 0)) < 0) {
fprintf(stderr, "failed update jump_table: %d", err);
}
int if_index = if_nametoindex(argv[1]);
if (!if_index) {
printf("get if_index from interface name failed\n");
return 1;
}
if ((err = bpf_xdp_attach(if_index, bpf_program__fd(main_prog), 0x2, NULL)) != 0) {
fprintf(stderr, "bpf_xdp_attach failed: %d", err);
return 1;
}
while(1) {
ring_buffer__poll(ring_buffer, -1);
}
return 0;
}
答案1
得分: 1
这是因为在主程序循环运行时,objects.JumpTable
被垃圾回收了。当引用被移除时,映射jump_table
被取消固定。解决方法是在调用LoadAndAssign()
之后使用defer objects.JumpTable.Close()
。或者添加以下代码:
func (o *bpfObjects) Close() {
o.MainProg.Close()
o.TailProg.Close()
o.JumpTable.Close()
o.FlowRingBuf.Close()
}
并在调用LoadAndAssign()
之后使用defer objects.Close()
。这在C版本中不会发生,因为没有垃圾回收机制。
英文:
This is happening because objects.JumpTable
is getting garbage collected while the main program loop is running. When the reference is removed, map jump_table
is unpinned. The solution is to do defer objects.JumpTable.Close()
after calling LoadAndAssign()
. Or add this code
func (o *bpfObjects) Close() {
o.MainProg.Close()
o.TailProg.Close()
o.JumpTable.Close()
o.FlowRingBuf.Close()
}
and call defer objects.Close()
after calling LoadAndAssign()
. This does not happen in the C version because there is no garbage collection.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论