Skip to content

Commit 7b9e74c

Browse files
committed
Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Pull bpf fixes from Alexei Starovoitov: - Fix register equivalence for pointers to packet (Alexei Starovoitov) - Fix incorrect pruning due to atomic fetch precision tracking (Daniel Borkmann) - Fix grace period wait for bpf_link-ed tracepoints (Kumar Kartikeya Dwivedi) - Fix use-after-free of sockmap's sk->sk_socket (Kuniyuki Iwashima) - Reject direct access to nullable PTR_TO_BUF pointers (Qi Tang) - Reject sleepable kprobe_multi programs at attach time (Varun R Mallya) * tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: selftests/bpf: Add more precision tracking tests for atomics bpf: Fix incorrect pruning due to atomic fetch precision tracking bpf: Reject sleepable kprobe_multi programs at attach time bpf: reject direct access to nullable PTR_TO_BUF pointers bpf: sockmap: Fix use-after-free of sk->sk_socket in sk_psock_verdict_data_ready(). bpf: Fix grace period wait for tracepoint bpf_link bpf: Fix regsafe() for pointers to packet
2 parents 5a9617d + e1b5687 commit 7b9e74c

7 files changed

Lines changed: 432 additions & 12 deletions

File tree

include/linux/bpf.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1854,6 +1854,10 @@ struct bpf_link_ops {
18541854
* target hook is sleepable, we'll go through tasks trace RCU GP and
18551855
* then "classic" RCU GP; this need for chaining tasks trace and
18561856
* classic RCU GPs is designated by setting bpf_link->sleepable flag
1857+
*
1858+
* For non-sleepable tracepoint links we go through SRCU gp instead,
1859+
* since RCU is not used in that case. Sleepable tracepoints still
1860+
* follow the scheme above.
18571861
*/
18581862
void (*dealloc_deferred)(struct bpf_link *link);
18591863
int (*detach)(struct bpf_link *link);

include/linux/tracepoint.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,33 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp)
122122
{
123123
return tp->ext && tp->ext->faultable;
124124
}
125+
/*
126+
* Run RCU callback with the appropriate grace period wait for non-faultable
127+
* tracepoints, e.g., those used in atomic context.
128+
*/
129+
static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func)
130+
{
131+
call_srcu(&tracepoint_srcu, rcu, func);
132+
}
133+
/*
134+
* Run RCU callback with the appropriate grace period wait for faultable
135+
* tracepoints, e.g., those used in syscall context.
136+
*/
137+
static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func)
138+
{
139+
call_rcu_tasks_trace(rcu, func);
140+
}
125141
#else
126142
static inline void tracepoint_synchronize_unregister(void)
127143
{ }
128144
static inline bool tracepoint_is_faultable(struct tracepoint *tp)
129145
{
130146
return false;
131147
}
148+
static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func)
149+
{ }
150+
static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func)
151+
{ }
132152
#endif
133153

134154
#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS

kernel/bpf/syscall.c

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3261,6 +3261,18 @@ static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
32613261
bpf_link_dealloc(link);
32623262
}
32633263

3264+
static bool bpf_link_is_tracepoint(struct bpf_link *link)
3265+
{
3266+
/*
3267+
* Only these combinations support a tracepoint bpf_link.
3268+
* BPF_LINK_TYPE_TRACING raw_tp progs are hardcoded to use
3269+
* bpf_raw_tp_link_lops and thus dealloc_deferred(), see
3270+
* bpf_raw_tp_link_attach().
3271+
*/
3272+
return link->type == BPF_LINK_TYPE_RAW_TRACEPOINT ||
3273+
(link->type == BPF_LINK_TYPE_TRACING && link->attach_type == BPF_TRACE_RAW_TP);
3274+
}
3275+
32643276
static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
32653277
{
32663278
if (rcu_trace_implies_rcu_gp())
@@ -3279,16 +3291,25 @@ static void bpf_link_free(struct bpf_link *link)
32793291
if (link->prog)
32803292
ops->release(link);
32813293
if (ops->dealloc_deferred) {
3282-
/* Schedule BPF link deallocation, which will only then
3294+
/*
3295+
* Schedule BPF link deallocation, which will only then
32833296
* trigger putting BPF program refcount.
32843297
* If underlying BPF program is sleepable or BPF link's target
32853298
* attach hookpoint is sleepable or otherwise requires RCU GPs
32863299
* to ensure link and its underlying BPF program is not
32873300
* reachable anymore, we need to first wait for RCU tasks
3288-
* trace sync, and then go through "classic" RCU grace period
3301+
* trace sync, and then go through "classic" RCU grace period.
3302+
*
3303+
* For tracepoint BPF links, we need to go through SRCU grace
3304+
* period wait instead when non-faultable tracepoint is used. We
3305+
* don't need to chain SRCU grace period waits, however, for the
3306+
* faultable case, since it exclusively uses RCU Tasks Trace.
32893307
*/
32903308
if (link->sleepable || (link->prog && link->prog->sleepable))
32913309
call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
3310+
/* We need to do a SRCU grace period wait for non-faultable tracepoint BPF links. */
3311+
else if (bpf_link_is_tracepoint(link))
3312+
call_tracepoint_unregister_atomic(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
32923313
else
32933314
call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
32943315
} else if (ops->dealloc) {

kernel/bpf/verifier.c

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,13 @@ static bool is_atomic_load_insn(const struct bpf_insn *insn)
617617
insn->imm == BPF_LOAD_ACQ;
618618
}
619619

620+
static bool is_atomic_fetch_insn(const struct bpf_insn *insn)
621+
{
622+
return BPF_CLASS(insn->code) == BPF_STX &&
623+
BPF_MODE(insn->code) == BPF_ATOMIC &&
624+
(insn->imm & BPF_FETCH);
625+
}
626+
620627
static int __get_spi(s32 off)
621628
{
622629
return (-off - 1) / BPF_REG_SIZE;
@@ -4447,10 +4454,24 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
44474454
* dreg still needs precision before this insn
44484455
*/
44494456
}
4450-
} else if (class == BPF_LDX || is_atomic_load_insn(insn)) {
4451-
if (!bt_is_reg_set(bt, dreg))
4457+
} else if (class == BPF_LDX ||
4458+
is_atomic_load_insn(insn) ||
4459+
is_atomic_fetch_insn(insn)) {
4460+
u32 load_reg = dreg;
4461+
4462+
/*
4463+
* Atomic fetch operation writes the old value into
4464+
* a register (sreg or r0) and if it was tracked for
4465+
* precision, propagate to the stack slot like we do
4466+
* in regular ldx.
4467+
*/
4468+
if (is_atomic_fetch_insn(insn))
4469+
load_reg = insn->imm == BPF_CMPXCHG ?
4470+
BPF_REG_0 : sreg;
4471+
4472+
if (!bt_is_reg_set(bt, load_reg))
44524473
return 0;
4453-
bt_clear_reg(bt, dreg);
4474+
bt_clear_reg(bt, load_reg);
44544475

44554476
/* scalars can only be spilled into stack w/o losing precision.
44564477
* Load from any other memory can be zero extended.
@@ -7905,7 +7926,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
79057926
} else if (reg->type == CONST_PTR_TO_MAP) {
79067927
err = check_ptr_to_map_access(env, regs, regno, off, size, t,
79077928
value_regno);
7908-
} else if (base_type(reg->type) == PTR_TO_BUF) {
7929+
} else if (base_type(reg->type) == PTR_TO_BUF &&
7930+
!type_may_be_null(reg->type)) {
79097931
bool rdonly_mem = type_is_rdonly_mem(reg->type);
79107932
u32 *max_access;
79117933

@@ -19915,8 +19937,13 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
1991519937
* since someone could have accessed through (ptr - k), or
1991619938
* even done ptr -= k in a register, to get a safe access.
1991719939
*/
19918-
if (rold->range > rcur->range)
19940+
if (rold->range < 0 || rcur->range < 0) {
19941+
/* special case for [BEYOND|AT]_PKT_END */
19942+
if (rold->range != rcur->range)
19943+
return false;
19944+
} else if (rold->range > rcur->range) {
1991919945
return false;
19946+
}
1992019947
/* If the offsets don't match, we can't trust our alignment;
1992119948
* nor can we be sure that we won't fall out of range.
1992219949
*/

kernel/trace/bpf_trace.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2752,6 +2752,10 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
27522752
if (!is_kprobe_multi(prog))
27532753
return -EINVAL;
27542754

2755+
/* kprobe_multi is not allowed to be sleepable. */
2756+
if (prog->sleepable)
2757+
return -EINVAL;
2758+
27552759
/* Writing to context is not allowed for kprobes. */
27562760
if (prog->aux->kprobe_write_ctx)
27572761
return -EINVAL;

net/core/skmsg.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,17 +1267,20 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
12671267

12681268
static void sk_psock_verdict_data_ready(struct sock *sk)
12691269
{
1270-
struct socket *sock = sk->sk_socket;
1271-
const struct proto_ops *ops;
1270+
const struct proto_ops *ops = NULL;
1271+
struct socket *sock;
12721272
int copied;
12731273

12741274
trace_sk_data_ready(sk);
12751275

1276-
if (unlikely(!sock))
1277-
return;
1278-
ops = READ_ONCE(sock->ops);
1276+
rcu_read_lock();
1277+
sock = READ_ONCE(sk->sk_socket);
1278+
if (likely(sock))
1279+
ops = READ_ONCE(sock->ops);
1280+
rcu_read_unlock();
12791281
if (!ops || !ops->read_skb)
12801282
return;
1283+
12811284
copied = ops->read_skb(sk, sk_psock_verdict_recv);
12821285
if (copied >= 0) {
12831286
struct sk_psock *psock;

0 commit comments

Comments
 (0)