Skip to content

Commit 9df9578

Browse files
author
Florian Westphal
committed
netfilter: nft_set_pipapo: split gc into unlink and reclaim phase
Yiming Qian reports Use-after-free in the pipapo set type: Under a large number of expired elements, commit-time GC can run for a very long time in a non-preemptible context, triggering soft lockup warnings and RCU stall reports (local denial of service). We must split GC in an unlink and a reclaim phase. We cannot queue elements for freeing until pointers have been swapped. Expired elements are still exposed to both the packet path and userspace dumpers via the live copy of the data structure. call_rcu() does not protect us: dump operations or element lookups starting after call_rcu has fired can still observe the free'd element, unless the commit phase has made enough progress to swap the clone and live pointers before any new reader has picked up the old version. This a similar approach as done recently for the rbtree backend in commit 35f83a7 ("netfilter: nft_set_rbtree: don't gc elements on insert"). Fixes: 3c4287f ("nf_tables: Add set type for arbitrary concatenation of ranges") Reported-by: Yiming Qian <[email protected]> Signed-off-by: Florian Westphal <[email protected]>
1 parent fb7fb40 commit 9df9578

4 files changed

Lines changed: 50 additions & 13 deletions

File tree

include/net/netfilter/nf_tables.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1863,6 +1863,11 @@ struct nft_trans_gc {
18631863
struct rcu_head rcu;
18641864
};
18651865

1866+
static inline int nft_trans_gc_space(const struct nft_trans_gc *trans)
1867+
{
1868+
return NFT_TRANS_GC_BATCHCOUNT - trans->count;
1869+
}
1870+
18661871
static inline void nft_ctx_update(struct nft_ctx *ctx,
18671872
const struct nft_trans *trans)
18681873
{

net/netfilter/nf_tables_api.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10493,11 +10493,6 @@ static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
1049310493
schedule_work(&trans_gc_work);
1049410494
}
1049510495

10496-
static int nft_trans_gc_space(struct nft_trans_gc *trans)
10497-
{
10498-
return NFT_TRANS_GC_BATCHCOUNT - trans->count;
10499-
}
10500-
1050110496
struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
1050210497
unsigned int gc_seq, gfp_t gfp)
1050310498
{

net/netfilter/nft_set_pipapo.c

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1680,11 +1680,11 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
16801680
}
16811681

16821682
/**
1683-
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
1683+
* pipapo_gc_scan() - Drop expired entries from set and link them to gc list
16841684
* @set: nftables API set representation
16851685
* @m: Matching data
16861686
*/
1687-
static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
1687+
static void pipapo_gc_scan(struct nft_set *set, struct nft_pipapo_match *m)
16881688
{
16891689
struct nft_pipapo *priv = nft_set_priv(set);
16901690
struct net *net = read_pnet(&set->net);
@@ -1697,6 +1697,8 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
16971697
if (!gc)
16981698
return;
16991699

1700+
list_add(&gc->list, &priv->gc_head);
1701+
17001702
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
17011703
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
17021704
const struct nft_pipapo_field *f;
@@ -1724,9 +1726,13 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
17241726
* NFT_SET_ELEM_DEAD_BIT.
17251727
*/
17261728
if (__nft_set_elem_expired(&e->ext, tstamp)) {
1727-
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
1728-
if (!gc)
1729-
return;
1729+
if (!nft_trans_gc_space(gc)) {
1730+
gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
1731+
if (!gc)
1732+
return;
1733+
1734+
list_add(&gc->list, &priv->gc_head);
1735+
}
17301736

17311737
nft_pipapo_gc_deactivate(net, set, e);
17321738
pipapo_drop(m, rulemap);
@@ -1740,10 +1746,30 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
17401746
}
17411747
}
17421748

1743-
gc = nft_trans_gc_catchall_sync(gc);
1749+
priv->last_gc = jiffies;
1750+
}
1751+
1752+
/**
1753+
* pipapo_gc_queue() - Free expired elements
1754+
* @set: nftables API set representation
1755+
*/
1756+
static void pipapo_gc_queue(struct nft_set *set)
1757+
{
1758+
struct nft_pipapo *priv = nft_set_priv(set);
1759+
struct nft_trans_gc *gc, *next;
1760+
1761+
/* always do a catchall cycle: */
1762+
gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
17441763
if (gc) {
1764+
gc = nft_trans_gc_catchall_sync(gc);
1765+
if (gc)
1766+
nft_trans_gc_queue_sync_done(gc);
1767+
}
1768+
1769+
/* always purge queued gc elements. */
1770+
list_for_each_entry_safe(gc, next, &priv->gc_head, list) {
1771+
list_del(&gc->list);
17451772
nft_trans_gc_queue_sync_done(gc);
1746-
priv->last_gc = jiffies;
17471773
}
17481774
}
17491775

@@ -1797,6 +1823,10 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
17971823
*
17981824
* We also need to create a new working copy for subsequent insertions and
17991825
* deletions.
1826+
*
1827+
* After the live copy has been replaced by the clone, we can safely queue
1828+
* expired elements that have been collected by pipapo_gc_scan() for
1829+
* memory reclaim.
18001830
*/
18011831
static void nft_pipapo_commit(struct nft_set *set)
18021832
{
@@ -1807,14 +1837,16 @@ static void nft_pipapo_commit(struct nft_set *set)
18071837
return;
18081838

18091839
if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
1810-
pipapo_gc(set, priv->clone);
1840+
pipapo_gc_scan(set, priv->clone);
18111841

18121842
old = rcu_replace_pointer(priv->match, priv->clone,
18131843
nft_pipapo_transaction_mutex_held(set));
18141844
priv->clone = NULL;
18151845

18161846
if (old)
18171847
call_rcu(&old->rcu, pipapo_reclaim_match);
1848+
1849+
pipapo_gc_queue(set);
18181850
}
18191851

18201852
static void nft_pipapo_abort(const struct nft_set *set)
@@ -2279,6 +2311,7 @@ static int nft_pipapo_init(const struct nft_set *set,
22792311
f->mt = NULL;
22802312
}
22812313

2314+
INIT_LIST_HEAD(&priv->gc_head);
22822315
rcu_assign_pointer(priv->match, m);
22832316

22842317
return 0;
@@ -2328,6 +2361,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
23282361
struct nft_pipapo *priv = nft_set_priv(set);
23292362
struct nft_pipapo_match *m;
23302363

2364+
WARN_ON_ONCE(!list_empty(&priv->gc_head));
2365+
23312366
m = rcu_dereference_protected(priv->match, true);
23322367

23332368
if (priv->clone) {

net/netfilter/nft_set_pipapo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,14 @@ struct nft_pipapo_match {
156156
* @clone: Copy where pending insertions and deletions are kept
157157
* @width: Total bytes to be matched for one packet, including padding
158158
* @last_gc: Timestamp of last garbage collection run, jiffies
159+
* @gc_head: list of nft_trans_gc to queue up for mem reclaim
159160
*/
160161
struct nft_pipapo {
161162
struct nft_pipapo_match __rcu *match;
162163
struct nft_pipapo_match *clone;
163164
int width;
164165
unsigned long last_gc;
166+
struct list_head gc_head;
165167
};
166168

167169
struct nft_pipapo_elem;

0 commit comments

Comments
 (0)