bpf: Add BPF ringbuf and perf buffer benchmarks

author Andrii Nakryiko <andriin@fb.com>

Fri, 29 May 2020 07:54:23 +0000 (00:54 -0700)

committer Alexei Starovoitov <ast@kernel.org>

Mon, 1 Jun 2020 21:38:22 +0000 (14:38 -0700)
author Andrii Nakryiko <andriin@fb.com>
Fri, 29 May 2020 07:54:23 +0000 (00:54 -0700)
committer Alexei Starovoitov <ast@kernel.org>
Mon, 1 Jun 2020 21:38:22 +0000 (14:38 -0700)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile

index e716e931d0c967d0feb5212e760b2fd35de03ebb..3ce548eff8a83732cb0377adac152c9446655833 100644 (file)
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -413,12 +413,15 @@ $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
         $(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
  $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h
  $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
+$(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
+                           $(OUTPUT)/perfbuf_bench.skel.h
  $(OUTPUT)/bench.o: bench.h testing_helpers.h
  $(OUTPUT)/bench: LDLIBS += -lm
  $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
                  $(OUTPUT)/bench_count.o \
                  $(OUTPUT)/bench_rename.o \
-                $(OUTPUT)/bench_trigger.o
+                $(OUTPUT)/bench_trigger.o \
+                $(OUTPUT)/bench_ringbufs.o
         $(call msg,BINARY,,$@)
         $(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
  
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c

index 14390689ef90839c4f2e04366ce04e76a23d8178..944ad4721c83c05ff6dbf3335f2d2e962a0330df 100644 (file)
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -130,6 +130,13 @@ static const struct argp_option opts[] = {
         {},
  };
  
+extern struct argp bench_ringbufs_argp;
+
+static const struct argp_child bench_parsers[] = {
+       { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
+       {},
+};
+
  static error_t parse_arg(int key, char *arg, struct argp_state *state)
  {
         static int pos_args;
@@ -208,6 +215,7 @@ static void parse_cmdline_args(int argc, char **argv)
                 .options = opts,
                 .parser = parse_arg,
                 .doc = argp_program_doc,
+               .children = bench_parsers,
         };
         if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
                 exit(1);
@@ -310,6 +318,10 @@ extern const struct bench bench_trig_rawtp;
  extern const struct bench bench_trig_kprobe;
  extern const struct bench bench_trig_fentry;
  extern const struct bench bench_trig_fmodret;
+extern const struct bench bench_rb_libbpf;
+extern const struct bench bench_rb_custom;
+extern const struct bench bench_pb_libbpf;
+extern const struct bench bench_pb_custom;
  
  static const struct bench *benchs[] = {
         &bench_count_global,
@@ -327,6 +339,10 @@ static const struct bench *benchs[] = {
         &bench_trig_kprobe,
         &bench_trig_fentry,
         &bench_trig_fmodret,
+       &bench_rb_libbpf,
+       &bench_rb_custom,
+       &bench_pb_libbpf,
+       &bench_pb_custom,
  };
  
  static void setup_benchmark()
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c

new file mode 100644 (file)

index 0000000..da87c7f
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <asm/barrier.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
+#include <argp.h>
+#include <stdlib.h>
+#include "bench.h"
+#include "ringbuf_bench.skel.h"
+#include "perfbuf_bench.skel.h"
+
+static struct {
+       bool back2back;
+       int batch_cnt;
+       bool sampled;
+       int sample_rate;
+       int ringbuf_sz; /* per-ringbuf, in bytes */
+       bool ringbuf_use_output; /* use slower output API */
+       int perfbuf_sz; /* per-CPU size, in pages */
+} args = {
+       .back2back = false,
+       .batch_cnt = 500,
+       .sampled = false,
+       .sample_rate = 500,
+       .ringbuf_sz = 512 * 1024,
+       .ringbuf_use_output = false,
+       .perfbuf_sz = 128,
+};
+
+enum {
+       ARG_RB_BACK2BACK = 2000,
+       ARG_RB_USE_OUTPUT = 2001,
+       ARG_RB_BATCH_CNT = 2002,
+       ARG_RB_SAMPLED = 2003,
+       ARG_RB_SAMPLE_RATE = 2004,
+};
+
+static const struct argp_option opts[] = {
+       { "rb-b2b", ARG_RB_BACK2BACK, NULL, 0, "Back-to-back mode"},
+       { "rb-use-output", ARG_RB_USE_OUTPUT, NULL, 0, "Use bpf_ringbuf_output() instead of bpf_ringbuf_reserve()"},
+       { "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
+       { "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
+       { "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
+       {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+       switch (key) {
+       case ARG_RB_BACK2BACK:
+               args.back2back = true;
+               break;
+       case ARG_RB_USE_OUTPUT:
+               args.ringbuf_use_output = true;
+               break;
+       case ARG_RB_BATCH_CNT:
+               args.batch_cnt = strtol(arg, NULL, 10);
+               if (args.batch_cnt < 0) {
+                       fprintf(stderr, "Invalid batch count.");
+                       argp_usage(state);
+               }
+               break;
+       case ARG_RB_SAMPLED:
+               args.sampled = true;
+               break;
+       case ARG_RB_SAMPLE_RATE:
+               args.sample_rate = strtol(arg, NULL, 10);
+               if (args.sample_rate < 0) {
+                       fprintf(stderr, "Invalid perfbuf sample rate.");
+                       argp_usage(state);
+               }
+               break;
+       default:
+               return ARGP_ERR_UNKNOWN;
+       }
+       return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_ringbufs_argp = {
+       .options = opts,
+       .parser = parse_arg,
+};
+
+/* RINGBUF-LIBBPF benchmark */
+
+static struct counter buf_hits;
+
+static inline void bufs_trigger_batch()
+{
+       (void)syscall(__NR_getpgid);
+}
+
+static void bufs_validate()
+{
+       if (env.consumer_cnt != 1) {
+               fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n");
+               exit(1);
+       }
+
+       if (args.back2back && env.producer_cnt > 1) {
+               fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n");
+               exit(1);
+       }
+}
+
+static void *bufs_sample_producer(void *input)
+{
+       if (args.back2back) {
+               /* initial batch to get everything started */
+               bufs_trigger_batch();
+               return NULL;
+       }
+
+       while (true)
+               bufs_trigger_batch();
+       return NULL;
+}
+
+static struct ringbuf_libbpf_ctx {
+       struct ringbuf_bench *skel;
+       struct ring_buffer *ringbuf;
+} ringbuf_libbpf_ctx;
+
+static void ringbuf_libbpf_measure(struct bench_res *res)
+{
+       struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+
+       res->hits = atomic_swap(&buf_hits.value, 0);
+       res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static struct ringbuf_bench *ringbuf_setup_skeleton()
+{
+       struct ringbuf_bench *skel;
+
+       setup_libbpf();
+
+       skel = ringbuf_bench__open();
+       if (!skel) {
+               fprintf(stderr, "failed to open skeleton\n");
+               exit(1);
+       }
+
+       skel->rodata->batch_cnt = args.batch_cnt;
+       skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
+
+       if (args.sampled)
+               /* record data + header take 16 bytes */
+               skel->rodata->wakeup_data_size = args.sample_rate * 16;
+
+       bpf_map__resize(skel->maps.ringbuf, args.ringbuf_sz);
+
+       if (ringbuf_bench__load(skel)) {
+               fprintf(stderr, "failed to load skeleton\n");
+               exit(1);
+       }
+
+       return skel;
+}
+
+static int buf_process_sample(void *ctx, void *data, size_t len)
+{
+       atomic_inc(&buf_hits.value);
+       return 0;
+}
+
+static void ringbuf_libbpf_setup()
+{
+       struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+       struct bpf_link *link;
+
+       ctx->skel = ringbuf_setup_skeleton();
+       ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
+                                       buf_process_sample, NULL, NULL);
+       if (!ctx->ringbuf) {
+               fprintf(stderr, "failed to create ringbuf\n");
+               exit(1);
+       }
+
+       link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
+       if (IS_ERR(link)) {
+               fprintf(stderr, "failed to attach program!\n");
+               exit(1);
+       }
+}
+
+static void *ringbuf_libbpf_consumer(void *input)
+{
+       struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+
+       while (ring_buffer__poll(ctx->ringbuf, -1) >= 0) {
+               if (args.back2back)
+                       bufs_trigger_batch();
+       }
+       fprintf(stderr, "ringbuf polling failed!\n");
+       return NULL;
+}
+
+/* RINGBUF-CUSTOM benchmark */
+struct ringbuf_custom {
+       __u64 *consumer_pos;
+       __u64 *producer_pos;
+       __u64 mask;
+       void *data;
+       int map_fd;
+};
+
+static struct ringbuf_custom_ctx {
+       struct ringbuf_bench *skel;
+       struct ringbuf_custom ringbuf;
+       int epoll_fd;
+       struct epoll_event event;
+} ringbuf_custom_ctx;
+
+static void ringbuf_custom_measure(struct bench_res *res)
+{
+       struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+
+       res->hits = atomic_swap(&buf_hits.value, 0);
+       res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static void ringbuf_custom_setup()
+{
+       struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+       const size_t page_size = getpagesize();
+       struct bpf_link *link;
+       struct ringbuf_custom *r;
+       void *tmp;
+       int err;
+
+       ctx->skel = ringbuf_setup_skeleton();
+
+       ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+       if (ctx->epoll_fd < 0) {
+               fprintf(stderr, "failed to create epoll fd: %d\n", -errno);
+               exit(1);
+       }
+
+       r = &ctx->ringbuf;
+       r->map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
+       r->mask = args.ringbuf_sz - 1;
+
+       /* Map writable consumer page */
+       tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                  r->map_fd, 0);
+       if (tmp == MAP_FAILED) {
+               fprintf(stderr, "failed to mmap consumer page: %d\n", -errno);
+               exit(1);
+       }
+       r->consumer_pos = tmp;
+
+       /* Map read-only producer page and data pages. */
+       tmp = mmap(NULL, page_size + 2 * args.ringbuf_sz, PROT_READ, MAP_SHARED,
+                  r->map_fd, page_size);
+       if (tmp == MAP_FAILED) {
+               fprintf(stderr, "failed to mmap data pages: %d\n", -errno);
+               exit(1);
+       }
+       r->producer_pos = tmp;
+       r->data = tmp + page_size;
+
+       ctx->event.events = EPOLLIN;
+       err = epoll_ctl(ctx->epoll_fd, EPOLL_CTL_ADD, r->map_fd, &ctx->event);
+       if (err < 0) {
+               fprintf(stderr, "failed to epoll add ringbuf: %d\n", -errno);
+               exit(1);
+       }
+
+       link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
+       if (IS_ERR(link)) {
+               fprintf(stderr, "failed to attach program\n");
+               exit(1);
+       }
+}
+
+#define RINGBUF_BUSY_BIT (1 << 31)
+#define RINGBUF_DISCARD_BIT (1 << 30)
+#define RINGBUF_META_LEN 8
+
+static inline int roundup_len(__u32 len)
+{
+       /* clear out top 2 bits */
+       len <<= 2;
+       len >>= 2;
+       /* add length prefix */
+       len += RINGBUF_META_LEN;
+       /* round up to 8 byte alignment */
+       return (len + 7) / 8 * 8;
+}
+
+static void ringbuf_custom_process_ring(struct ringbuf_custom *r)
+{
+       unsigned long cons_pos, prod_pos;
+       int *len_ptr, len;
+       bool got_new_data;
+
+       cons_pos = smp_load_acquire(r->consumer_pos);
+       while (true) {
+               got_new_data = false;
+               prod_pos = smp_load_acquire(r->producer_pos);
+               while (cons_pos < prod_pos) {
+                       len_ptr = r->data + (cons_pos & r->mask);
+                       len = smp_load_acquire(len_ptr);
+
+                       /* sample not committed yet, bail out for now */
+                       if (len & RINGBUF_BUSY_BIT)
+                               return;
+
+                       got_new_data = true;
+                       cons_pos += roundup_len(len);
+
+                       atomic_inc(&buf_hits.value);
+               }
+               if (got_new_data)
+                       smp_store_release(r->consumer_pos, cons_pos);
+               else
+                       break;
+       };
+}
+
+static void *ringbuf_custom_consumer(void *input)
+{
+       struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+       int cnt;
+
+       do {
+               if (args.back2back)
+                       bufs_trigger_batch();
+               cnt = epoll_wait(ctx->epoll_fd, &ctx->event, 1, -1);
+               if (cnt > 0)
+                       ringbuf_custom_process_ring(&ctx->ringbuf);
+       } while (cnt >= 0);
+       fprintf(stderr, "ringbuf polling failed!\n");
+       return 0;
+}
+
+/* PERFBUF-LIBBPF benchmark */
+static struct perfbuf_libbpf_ctx {
+       struct perfbuf_bench *skel;
+       struct perf_buffer *perfbuf;
+} perfbuf_libbpf_ctx;
+
+static void perfbuf_measure(struct bench_res *res)
+{
+       struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+
+       res->hits = atomic_swap(&buf_hits.value, 0);
+       res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static struct perfbuf_bench *perfbuf_setup_skeleton()
+{
+       struct perfbuf_bench *skel;
+
+       setup_libbpf();
+
+       skel = perfbuf_bench__open();
+       if (!skel) {
+               fprintf(stderr, "failed to open skeleton\n");
+               exit(1);
+       }
+
+       skel->rodata->batch_cnt = args.batch_cnt;
+
+       if (perfbuf_bench__load(skel)) {
+               fprintf(stderr, "failed to load skeleton\n");
+               exit(1);
+       }
+
+       return skel;
+}
+
+static enum bpf_perf_event_ret
+perfbuf_process_sample_raw(void *input_ctx, int cpu,
+                          struct perf_event_header *e)
+{
+       switch (e->type) {
+       case PERF_RECORD_SAMPLE:
+               atomic_inc(&buf_hits.value);
+               break;
+       case PERF_RECORD_LOST:
+               break;
+       default:
+               return LIBBPF_PERF_EVENT_ERROR;
+       }
+       return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void perfbuf_libbpf_setup()
+{
+       struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+       struct perf_event_attr attr;
+       struct perf_buffer_raw_opts pb_opts = {
+               .event_cb = perfbuf_process_sample_raw,
+               .ctx = (void *)(long)0,
+               .attr = &attr,
+       };
+       struct bpf_link *link;
+
+       ctx->skel = perfbuf_setup_skeleton();
+
+       memset(&attr, 0, sizeof(attr));
+       attr.config = PERF_COUNT_SW_BPF_OUTPUT,
+       attr.type = PERF_TYPE_SOFTWARE;
+       attr.sample_type = PERF_SAMPLE_RAW;
+       /* notify only every Nth sample */
+       if (args.sampled) {
+               attr.sample_period = args.sample_rate;
+               attr.wakeup_events = args.sample_rate;
+       } else {
+               attr.sample_period = 1;
+               attr.wakeup_events = 1;
+       }
+
+       if (args.sample_rate > args.batch_cnt) {
+               fprintf(stderr, "sample rate %d is too high for given batch count %d\n",
+                       args.sample_rate, args.batch_cnt);
+               exit(1);
+       }
+
+       ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf),
+                                           args.perfbuf_sz, &pb_opts);
+       if (!ctx->perfbuf) {
+               fprintf(stderr, "failed to create perfbuf\n");
+               exit(1);
+       }
+
+       link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
+       if (IS_ERR(link)) {
+               fprintf(stderr, "failed to attach program\n");
+               exit(1);
+       }
+}
+
+static void *perfbuf_libbpf_consumer(void *input)
+{
+       struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+
+       while (perf_buffer__poll(ctx->perfbuf, -1) >= 0) {
+               if (args.back2back)
+                       bufs_trigger_batch();
+       }
+       fprintf(stderr, "perfbuf polling failed!\n");
+       return NULL;
+}
+
+/* PERFBUF-CUSTOM benchmark */
+
+/* copies of internal libbpf definitions */
+struct perf_cpu_buf {
+       struct perf_buffer *pb;
+       void *base; /* mmap()'ed memory */
+       void *buf; /* for reconstructing segmented data */
+       size_t buf_size;
+       int fd;
+       int cpu;
+       int map_key;
+};
+
+struct perf_buffer {
+       perf_buffer_event_fn event_cb;
+       perf_buffer_sample_fn sample_cb;
+       perf_buffer_lost_fn lost_cb;
+       void *ctx; /* passed into callbacks */
+
+       size_t page_size;
+       size_t mmap_size;
+       struct perf_cpu_buf **cpu_bufs;
+       struct epoll_event *events;
+       int cpu_cnt; /* number of allocated CPU buffers */
+       int epoll_fd; /* perf event FD */
+       int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
+};
+
+static void *perfbuf_custom_consumer(void *input)
+{
+       struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+       struct perf_buffer *pb = ctx->perfbuf;
+       struct perf_cpu_buf *cpu_buf;
+       struct perf_event_mmap_page *header;
+       size_t mmap_mask = pb->mmap_size - 1;
+       struct perf_event_header *ehdr;
+       __u64 data_head, data_tail;
+       size_t ehdr_size;
+       void *base;
+       int i, cnt;
+
+       while (true) {
+               if (args.back2back)
+                       bufs_trigger_batch();
+               cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, -1);
+               if (cnt <= 0) {
+                       fprintf(stderr, "perf epoll failed: %d\n", -errno);
+                       exit(1);
+               }
+
+               for (i = 0; i < cnt; ++i) {
+                       cpu_buf = pb->events[i].data.ptr;
+                       header = cpu_buf->base;
+                       base = ((void *)header) + pb->page_size;
+
+                       data_head = ring_buffer_read_head(header);
+                       data_tail = header->data_tail;
+                       while (data_head != data_tail) {
+                               ehdr = base + (data_tail & mmap_mask);
+                               ehdr_size = ehdr->size;
+
+                               if (ehdr->type == PERF_RECORD_SAMPLE)
+                                       atomic_inc(&buf_hits.value);
+
+                               data_tail += ehdr_size;
+                       }
+                       ring_buffer_write_tail(header, data_tail);
+               }
+       }
+       return NULL;
+}
+
+const struct bench bench_rb_libbpf = {
+       .name = "rb-libbpf",
+       .validate = bufs_validate,
+       .setup = ringbuf_libbpf_setup,
+       .producer_thread = bufs_sample_producer,
+       .consumer_thread = ringbuf_libbpf_consumer,
+       .measure = ringbuf_libbpf_measure,
+       .report_progress = hits_drops_report_progress,
+       .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rb_custom = {
+       .name = "rb-custom",
+       .validate = bufs_validate,
+       .setup = ringbuf_custom_setup,
+       .producer_thread = bufs_sample_producer,
+       .consumer_thread = ringbuf_custom_consumer,
+       .measure = ringbuf_custom_measure,
+       .report_progress = hits_drops_report_progress,
+       .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_pb_libbpf = {
+       .name = "pb-libbpf",
+       .validate = bufs_validate,
+       .setup = perfbuf_libbpf_setup,
+       .producer_thread = bufs_sample_producer,
+       .consumer_thread = perfbuf_libbpf_consumer,
+       .measure = perfbuf_measure,
+       .report_progress = hits_drops_report_progress,
+       .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_pb_custom = {
+       .name = "pb-custom",
+       .validate = bufs_validate,
+       .setup = perfbuf_libbpf_setup,
+       .producer_thread = bufs_sample_producer,
+       .consumer_thread = perfbuf_custom_consumer,
+       .measure = perfbuf_measure,
+       .report_progress = hits_drops_report_progress,
+       .report_final = hits_drops_report_final,
+};
+
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh

new file mode 100755 (executable)

index 0000000..af4aa04
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+RUN_BENCH="sudo ./bench -w3 -d10 -a"
+
+function hits()
+{
+       echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function drops()
+{
+       echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function header()
+{
+       local len=${#1}
+
+       printf "\n%s\n" "$1"
+       for i in $(seq 1 $len); do printf '='; done
+       printf '\n'
+}
+
+function summarize()
+{
+       bench="$1"
+       summary=$(echo $2 | tail -n1)
+       printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)"
+}
+
+header "Single-producer, parallel producer"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+       summarize $b "$($RUN_BENCH $b)"
+done
+
+header "Single-producer, parallel producer, sampled notification"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+       summarize $b "$($RUN_BENCH --rb-sampled $b)"
+done
+
+header "Single-producer, back-to-back mode"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+       summarize $b "$($RUN_BENCH --rb-b2b $b)"
+       summarize $b-sampled "$($RUN_BENCH --rb-sampled --rb-b2b $b)"
+done
+
+header "Ringbuf back-to-back, effect of sample rate"
+for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
+       summarize "rb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)"
+done
+header "Perfbuf back-to-back, effect of sample rate"
+for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
+       summarize "pb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)"
+done
+
+header "Ringbuf back-to-back, reserve+commit vs output"
+summarize "reserve" "$($RUN_BENCH --rb-b2b                 rb-custom)"
+summarize "output"  "$($RUN_BENCH --rb-b2b --rb-use-output rb-custom)"
+
+header "Ringbuf sampled, reserve+commit vs output"
+summarize "reserve-sampled" "$($RUN_BENCH --rb-sampled                 rb-custom)"
+summarize "output-sampled"  "$($RUN_BENCH --rb-sampled --rb-use-output rb-custom)"
+
+header "Single-producer, consumer/producer competing on the same CPU, low batch count"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+       summarize $b "$($RUN_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)"
+done
+
+header "Ringbuf, multi-producer contention"
+for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
+       summarize "rb-libbpf nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
+done
+
diff --git a/tools/testing/selftests/bpf/progs/perfbuf_bench.c b/tools/testing/selftests/bpf/progs/perfbuf_bench.c

new file mode 100644 (file)

index 0000000..e5ab483
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+       __uint(value_size, sizeof(int));
+       __uint(key_size, sizeof(int));
+} perfbuf SEC(".maps");
+
+const volatile int batch_cnt = 0;
+
+long sample_val = 42;
+long dropped __attribute__((aligned(128))) = 0;
+
+SEC("fentry/__x64_sys_getpgid")
+int bench_perfbuf(void *ctx)
+{
+       __u64 *sample;
+       int i;
+
+       for (i = 0; i < batch_cnt; i++) {
+               if (bpf_perf_event_output(ctx, &perfbuf, BPF_F_CURRENT_CPU,
+                                         &sample_val, sizeof(sample_val)))
+                       __sync_add_and_fetch(&dropped, 1);
+       }
+       return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c

new file mode 100644 (file)

index 0000000..123607d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+       __uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+const volatile int batch_cnt = 0;
+const volatile long use_output = 0;
+
+long sample_val = 42;
+long dropped __attribute__((aligned(128))) = 0;
+
+const volatile long wakeup_data_size = 0;
+
+static __always_inline long get_flags()
+{
+       long sz;
+
+       if (!wakeup_data_size)
+               return 0;
+
+       sz = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
+       return sz >= wakeup_data_size ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP;
+}
+
+SEC("fentry/__x64_sys_getpgid")
+int bench_ringbuf(void *ctx)
+{
+       long *sample, flags;
+       int i;
+
+       if (!use_output) {
+               for (i = 0; i < batch_cnt; i++) {
+                       sample = bpf_ringbuf_reserve(&ringbuf,
+                                                    sizeof(sample_val), 0);
+                       if (!sample) {
+                               __sync_add_and_fetch(&dropped, 1);
+                       } else {
+                               *sample = sample_val;
+                               flags = get_flags();
+                               bpf_ringbuf_submit(sample, flags);
+                       }
+               }
+       } else {
+               for (i = 0; i < batch_cnt; i++) {
+                       flags = get_flags();
+                       if (bpf_ringbuf_output(&ringbuf, &sample_val,
+                                              sizeof(sample_val), flags))
+                               __sync_add_and_fetch(&dropped, 1);
+               }
+       }
+       return 0;
+}
author	Andrii Nakryiko <andriin@fb.com>
	Fri, 29 May 2020 07:54:23 +0000 (00:54 -0700)
committer	Alexei Starovoitov <ast@kernel.org>
	Mon, 1 Jun 2020 21:38:22 +0000 (14:38 -0700)
tools/testing/selftests/bpf/Makefile		patch \| blob \| history
tools/testing/selftests/bpf/bench.c		patch \| blob \| history
tools/testing/selftests/bpf/benchs/bench_ringbufs.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh	[new file with mode: 0755]	patch \| blob
tools/testing/selftests/bpf/progs/perfbuf_bench.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/ringbuf_bench.c	[new file with mode: 0644]	patch \| blob