case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_trace_printk:
- if (capable(CAP_SYS_ADMIN))
+ if (perfmon_capable())
return bpf_get_trace_printk_proto();
/* fall through */
default:
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
+#include <linux/capability.h>
struct bpf_verifier_env;
struct bpf_verifier_log;
struct bpf_map_memory memory;
char name[BPF_OBJ_NAME_LEN];
u32 btf_vmlinux_value_type_id;
- bool unpriv_array;
+ bool bypass_spec_v1;
bool frozen; /* write-once; write-protected by freeze_mutex */
/* 22 bytes hole */
extern int sysctl_unprivileged_bpf_disabled;
+static inline bool bpf_allow_ptr_leaks(void)
+{
+ return perfmon_capable();
+}
+
+static inline bool bpf_bypass_spec_v1(void)
+{
+ return perfmon_capable();
+}
+
+static inline bool bpf_bypass_spec_v4(void)
+{
+ return perfmon_capable();
+}
+
int bpf_map_new_fd(struct bpf_map *map, int flags);
int bpf_prog_new_fd(struct bpf_prog *prog);
u32 used_map_cnt; /* number of used maps */
u32 id_gen; /* used to generate unique reg IDs */
bool allow_ptr_leaks;
+ bool bpf_capable;
+ bool bypass_spec_v1;
+ bool bypass_spec_v4;
bool seen_direct_write;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
const struct bpf_line_info *prev_linfo;
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int ret, numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
- bool unpriv = !capable(CAP_SYS_ADMIN);
+ bool bypass_spec_v1 = bpf_bypass_spec_v1();
u64 cost, array_size, mask64;
struct bpf_map_memory mem;
struct bpf_array *array;
mask64 -= 1;
index_mask = mask64;
- if (unpriv) {
+ if (!bypass_spec_v1) {
/* round up array size to nearest power of 2,
* since cpu will speculate within index_mask limits
*/
return ERR_PTR(-ENOMEM);
}
array->index_mask = index_mask;
- array->map.unpriv_array = unpriv;
+ array->map.bypass_spec_v1 = bypass_spec_v1;
/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
- if (map->unpriv_array) {
+ if (!map->bypass_spec_v1) {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
} else {
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
- if (map->unpriv_array) {
+ if (!map->bypass_spec_v1) {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
} else {
struct bpf_map *map;
int err;
- if (!capable(CAP_SYS_ADMIN))
+ if (!bpf_capable())
return ERR_PTR(-EPERM);
st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id);
void bpf_prog_kallsyms_add(struct bpf_prog *fp)
{
if (!bpf_prog_kallsyms_candidate(fp) ||
- !capable(CAP_SYS_ADMIN))
+ !bpf_capable())
return;
bpf_prog_ksym_set_addr(fp);
u64 cost;
int ret;
- if (!capable(CAP_SYS_ADMIN))
+ if (!bpf_capable())
return ERR_PTR(-EPERM);
/* check sanity of attributes */
BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
offsetof(struct htab_elem, hash_node.pprev));
- if (lru && !capable(CAP_SYS_ADMIN))
+ if (lru && !bpf_capable())
/* LRU implementation is much complicated than other
- * maps. Hence, limit to CAP_SYS_ADMIN for now.
+ * maps. Hence, limit to CAP_BPF.
*/
return -EPERM;
break;
}
- if (!capable(CAP_SYS_ADMIN))
+ if (!bpf_capable())
return NULL;
switch (func_id) {
case BPF_FUNC_spin_unlock:
return &bpf_spin_unlock_proto;
case BPF_FUNC_trace_printk:
+ if (!perfmon_capable())
+ return NULL;
return bpf_get_trace_printk_proto();
case BPF_FUNC_jiffies64:
return &bpf_jiffies64_proto;
u64 cost = sizeof(*trie), cost_per_node;
int ret;
- if (!capable(CAP_SYS_ADMIN))
+ if (!bpf_capable())
return ERR_PTR(-EPERM);
/* check sanity of attributes */
/* Misc members not needed in bpf_map_meta_equal() check. */
inner_map_meta->ops = inner_map->ops;
if (inner_map->ops == &array_map_ops) {
- inner_map_meta->unpriv_array = inner_map->unpriv_array;
+ inner_map_meta->bypass_spec_v1 = inner_map->bypass_spec_v1;
container_of(inner_map_meta, struct bpf_array, map)->index_mask =
container_of(inner_map, struct bpf_array, map)->index_mask;
}
/* Called from syscall */
static int queue_stack_map_alloc_check(union bpf_attr *attr)
{
- if (!capable(CAP_SYS_ADMIN))
+ if (!bpf_capable())
return -EPERM;
/* check sanity of attributes */
struct bpf_map_memory mem;
u64 array_size;
- if (!capable(CAP_SYS_ADMIN))
+ if (!bpf_capable())
return ERR_PTR(-EPERM);
array_size = sizeof(*array);
u64 cost, n_buckets;
int err;
- if (!capable(CAP_SYS_ADMIN))
+ if (!bpf_capable())
return ERR_PTR(-EPERM);
if (attr->map_flags & ~STACK_CREATE_FLAG_MASK)
err = -EBUSY;
goto err_put;
}
- if (!capable(CAP_SYS_ADMIN)) {
+ if (!bpf_capable()) {
err = -EPERM;
goto err_put;
}
}
}
+static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
+{
+ switch (prog_type) {
+ case BPF_PROG_TYPE_SCHED_CLS:
+ case BPF_PROG_TYPE_SCHED_ACT:
+ case BPF_PROG_TYPE_XDP:
+ case BPF_PROG_TYPE_LWT_IN:
+ case BPF_PROG_TYPE_LWT_OUT:
+ case BPF_PROG_TYPE_LWT_XMIT:
+ case BPF_PROG_TYPE_LWT_SEG6LOCAL:
+ case BPF_PROG_TYPE_SK_SKB:
+ case BPF_PROG_TYPE_SK_MSG:
+ case BPF_PROG_TYPE_LIRC_MODE2:
+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
+ case BPF_PROG_TYPE_CGROUP_DEVICE:
+ case BPF_PROG_TYPE_CGROUP_SOCK:
+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
+ case BPF_PROG_TYPE_SOCK_OPS:
+ case BPF_PROG_TYPE_EXT: /* extends any prog */
+ return true;
+ case BPF_PROG_TYPE_CGROUP_SKB:
+ /* always unpriv */
+ case BPF_PROG_TYPE_SK_REUSEPORT:
+ /* equivalent to SOCKET_FILTER. need CAP_BPF only */
+ default:
+ return false;
+ }
+}
+
+static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
+{
+ switch (prog_type) {
+ case BPF_PROG_TYPE_KPROBE:
+ case BPF_PROG_TYPE_TRACEPOINT:
+ case BPF_PROG_TYPE_PERF_EVENT:
+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
+ case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
+ case BPF_PROG_TYPE_TRACING:
+ case BPF_PROG_TYPE_LSM:
+ case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
+ case BPF_PROG_TYPE_EXT: /* extends any prog */
+ return true;
+ default:
+ return false;
+ }
+}
+
/* last field in 'union bpf_attr' used by this command */
#define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
(attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
- !capable(CAP_SYS_ADMIN))
+ !bpf_capable())
return -EPERM;
/* copy eBPF program license from user space */
is_gpl = license_is_gpl_compatible(license);
if (attr->insn_cnt == 0 ||
- attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
+ attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
return -E2BIG;
if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
type != BPF_PROG_TYPE_CGROUP_SKB &&
- !capable(CAP_SYS_ADMIN))
+ !bpf_capable())
+ return -EPERM;
+
+ if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN))
+ return -EPERM;
+ if (is_perfmon_prog_type(type) && !perfmon_capable())
return -EPERM;
bpf_prog_load_fixup_attach_type(attr);
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
case BPF_PROG_TYPE_CGROUP_SKB:
+ if (!capable(CAP_NET_ADMIN))
+ /* cg-skb progs can be loaded by unpriv user.
+ * check permissions at attach time.
+ */
+ return -EPERM;
return prog->enforce_expected_attach_type &&
prog->expected_attach_type != attach_type ?
-EINVAL : 0;
struct bpf_prog *prog;
int ret;
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
if (CHECK_ATTR(BPF_PROG_ATTACH))
return -EINVAL;
{
enum bpf_prog_type ptype;
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
if (CHECK_ATTR(BPF_PROG_DETACH))
return -EINVAL;
case BPF_PROG_TYPE_LIRC_MODE2:
return lirc_prog_detach(attr);
case BPF_PROG_TYPE_FLOW_DISSECTOR:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
return skb_flow_dissector_bpf_prog_detach(attr);
case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_CGROUP_SKB:
struct bpf_prog *prog;
int ret = -ENOTSUPP;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
if (CHECK_ATTR(BPF_PROG_TEST_RUN))
return -EINVAL;
info.run_time_ns = stats.nsecs;
info.run_cnt = stats.cnt;
- if (!capable(CAP_SYS_ADMIN)) {
+ if (!bpf_capable()) {
info.jited_prog_len = 0;
info.xlated_prog_len = 0;
info.nr_jited_ksyms = 0;
if (CHECK_ATTR(BPF_BTF_LOAD))
return -EINVAL;
- if (!capable(CAP_SYS_ADMIN))
+ if (!bpf_capable())
return -EPERM;
return btf_new_fd(attr);
struct bpf_prog *prog;
int ret;
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
if (CHECK_ATTR(BPF_LINK_CREATE))
return -EINVAL;
u32 flags;
int ret;
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
if (CHECK_ATTR(BPF_LINK_UPDATE))
return -EINVAL;
union bpf_attr attr;
int err;
- if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
+ if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
return -EPERM;
err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
reg->type = SCALAR_VALUE;
reg->var_off = tnum_unknown;
reg->frameno = 0;
- reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks;
+ reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
__mark_reg_unbounded(reg);
}
continue;
if (insn[i].src_reg != BPF_PSEUDO_CALL)
continue;
- if (!env->allow_ptr_leaks) {
- verbose(env, "function calls to other bpf functions are allowed for root only\n");
+ if (!env->bpf_capable) {
+ verbose(env,
+ "function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
return -EPERM;
}
ret = add_subprog(env, i + insn[i].imm + 1);
bool new_marks = false;
int i, err;
- if (!env->allow_ptr_leaks)
- /* backtracking is root only for now */
+ if (!env->bpf_capable)
return 0;
func = st->frame[st->curframe];
reg = &cur->regs[value_regno];
if (reg && size == BPF_REG_SIZE && register_is_const(reg) &&
- !register_is_null(reg) && env->allow_ptr_leaks) {
+ !register_is_null(reg) && env->bpf_capable) {
if (dst_reg != BPF_REG_FP) {
/* The backtracking logic can only recognize explicit
* stack slot address like [fp - 8]. Other spill of
return -EINVAL;
}
- if (!env->allow_ptr_leaks) {
+ if (!env->bypass_spec_v4) {
bool sanitize = false;
if (state->stack[spi].slot_type[0] == STACK_SPILL &&
* Spectre masking for stack ALU.
* See also retrieve_ptr_limit().
*/
- if (!env->allow_ptr_leaks) {
+ if (!env->bypass_spec_v1) {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
if (!BPF_MAP_PTR(aux->map_ptr_state))
bpf_map_ptr_store(aux, meta->map_ptr,
- meta->map_ptr->unpriv_array);
+ !meta->map_ptr->bypass_spec_v1);
else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
- meta->map_ptr->unpriv_array);
+ !meta->map_ptr->bypass_spec_v1);
return 0;
}
static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
const struct bpf_insn *insn)
{
- return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
+ return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
}
static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
/* For unprivileged we require that resulting offset must be in bounds
* in order to be able to sanitize access later on.
*/
- if (!env->allow_ptr_leaks) {
+ if (!env->bypass_spec_v1) {
if (dst_reg->type == PTR_TO_MAP_VALUE &&
check_map_access(env, dst, dst_reg->off, 1, false)) {
verbose(env, "R%d pointer arithmetic of map value goes out of range, "
insn_stack[env->cfg.cur_stack++] = w;
return 1;
} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
- if (loop_ok && env->allow_ptr_leaks)
+ if (loop_ok && env->bpf_capable)
return 0;
verbose_linfo(env, t, "%d: ", t);
verbose_linfo(env, w, "%d: ", w);
if (env->max_states_per_insn < states_cnt)
env->max_states_per_insn = states_cnt;
- if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
+ if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
return push_jmp_history(env, cur);
if (!add_new_state)
insn->code = BPF_JMP | BPF_TAIL_CALL;
aux = &env->insn_aux_data[i + delta];
- if (env->allow_ptr_leaks && !expect_blinding &&
+ if (env->bpf_capable && !expect_blinding &&
prog->jit_requested &&
!bpf_map_key_poisoned(aux) &&
!bpf_map_ptr_poisoned(aux) &&
env->insn_aux_data[i].orig_idx = i;
env->prog = *prog;
env->ops = bpf_verifier_ops[env->prog->type];
- is_priv = capable(CAP_SYS_ADMIN);
+ is_priv = bpf_capable();
if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
mutex_lock(&bpf_verifier_lock);
if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
env->strict_alignment = false;
- env->allow_ptr_leaks = is_priv;
+ env->allow_ptr_leaks = bpf_allow_ptr_leaks();
+ env->bypass_spec_v1 = bpf_bypass_spec_v1();
+ env->bypass_spec_v4 = bpf_bypass_spec_v4();
+ env->bpf_capable = bpf_capable();
if (is_priv)
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
{
+ if (!capable(CAP_SYS_ADMIN))
+ return NULL;
+
pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
current->comm, task_pid_nr(current));
!attr->btf_key_type_id || !attr->btf_value_type_id)
return -EINVAL;
- if (!capable(CAP_SYS_ADMIN))
+ if (!bpf_capable())
return -EPERM;
if (attr->value_size > MAX_VALUE_SIZE)
/* bpf_sk_storage_map is currently limited to CAP_SYS_ADMIN as
* the map_alloc_check() side also does.
*/
- if (!capable(CAP_SYS_ADMIN))
+ if (!bpf_capable())
return ERR_PTR(-EPERM);
nla_for_each_nested(nla, nla_stgs, rem) {
return false;
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_end):
- if (!capable(CAP_SYS_ADMIN))
+ if (!bpf_capable())
return false;
break;
}
case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
break;
case bpf_ctx_range(struct __sk_buff, tstamp):
- if (!capable(CAP_SYS_ADMIN))
+ if (!bpf_capable())
return false;
break;
default: