Skip to content

Commit 5c5e419

Browse files
Philipp-Reisnerkawasaki
authored andcommitted
drbd: Remove the open-coded page pool
If the network stack keeps a reference for too long, DRBD keeps references on a higher number of pages as a consequence. Fix all that by no longer relying on page reference counts dropping to an expected value. Instead, DRBD gives up its reference and lets the system handle everything else. While at it, remove the open-coded custom page pool mechanism and use the page_pool included in the kernel. Signed-off-by: Philipp Reisner <[email protected]> Signed-off-by: Christoph Böhmwalder <[email protected]>
1 parent 3d460ec commit 5c5e419

4 files changed

Lines changed: 71 additions & 347 deletions

File tree

drivers/block/drbd/drbd_int.h

Lines changed: 6 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,9 @@ enum {
380380
/* this is/was a write request */
381381
__EE_WRITE,
382382

383+
/* hand back using mempool_free(e, drbd_buffer_page_pool) */
384+
__EE_RELEASE_TO_MEMPOOL,
385+
383386
/* this is/was a write same request */
384387
__EE_WRITE_SAME,
385388

@@ -402,6 +405,7 @@ enum {
402405
#define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE)
403406
#define EE_SUBMITTED (1<<__EE_SUBMITTED)
404407
#define EE_WRITE (1<<__EE_WRITE)
408+
#define EE_RELEASE_TO_MEMPOOL (1<<__EE_RELEASE_TO_MEMPOOL)
405409
#define EE_WRITE_SAME (1<<__EE_WRITE_SAME)
406410
#define EE_APPLICATION (1<<__EE_APPLICATION)
407411
#define EE_RS_THIN_REQ (1<<__EE_RS_THIN_REQ)
@@ -858,7 +862,6 @@ struct drbd_device {
858862
struct list_head sync_ee; /* IO in progress (P_RS_DATA_REPLY gets written to disk) */
859863
struct list_head done_ee; /* need to send P_WRITE_ACK */
860864
struct list_head read_ee; /* [RS]P_DATA_REQUEST being read */
861-
struct list_head net_ee; /* zero-copy network send in progress */
862865

863866
struct list_head resync_reads;
864867
atomic_t pp_in_use; /* allocated from page pool */
@@ -1329,31 +1332,14 @@ extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
13291332
extern mempool_t drbd_request_mempool;
13301333
extern mempool_t drbd_ee_mempool;
13311334

1332-
/* drbd's page pool, used to buffer data received from the peer,
1333-
* or data requested by the peer.
1334-
*
1335-
* This does not have an emergency reserve.
1336-
*
1337-
* When allocating from this pool, it first takes pages from the pool.
1338-
* Only if the pool is depleted will try to allocate from the system.
1339-
*
1340-
* The assumption is that pages taken from this pool will be processed,
1341-
* and given back, "quickly", and then can be recycled, so we can avoid
1342-
* frequent calls to alloc_page(), and still will be able to make progress even
1343-
* under memory pressure.
1344-
*/
1345-
extern struct page *drbd_pp_pool;
1346-
extern spinlock_t drbd_pp_lock;
1347-
extern int drbd_pp_vacant;
1348-
extern wait_queue_head_t drbd_pp_wait;
1349-
13501335
/* We also need a standard (emergency-reserve backed) page pool
13511336
* for meta data IO (activity log, bitmap).
13521337
* We can keep it global, as long as it is used as "N pages at a time".
13531338
* 128 should be plenty, currently we probably can get away with as few as 1.
13541339
*/
13551340
#define DRBD_MIN_POOL_PAGES 128
13561341
extern mempool_t drbd_md_io_page_pool;
1342+
extern mempool_t drbd_buffer_page_pool;
13571343

13581344
/* We also need to make sure we get a bio
13591345
* when we need it for housekeeping purposes */
@@ -1488,10 +1474,7 @@ extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *,
14881474
sector_t, unsigned int,
14891475
unsigned int,
14901476
gfp_t) __must_hold(local);
1491-
extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *,
1492-
int);
1493-
#define drbd_free_peer_req(m,e) __drbd_free_peer_req(m, e, 0)
1494-
#define drbd_free_net_peer_req(m,e) __drbd_free_peer_req(m, e, 1)
1477+
extern void drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *req);
14951478
extern struct page *drbd_alloc_pages(struct drbd_peer_device *, unsigned int, bool);
14961479
extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed);
14971480
extern int drbd_connected(struct drbd_peer_device *);
@@ -1610,16 +1593,6 @@ static inline struct page *page_chain_next(struct page *page)
16101593
for (; page && ({ n = page_chain_next(page); 1; }); page = n)
16111594

16121595

1613-
static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_req)
1614-
{
1615-
struct page *page = peer_req->pages;
1616-
page_chain_for_each(page) {
1617-
if (page_count(page) > 1)
1618-
return 1;
1619-
}
1620-
return 0;
1621-
}
1622-
16231596
static inline union drbd_state drbd_read_state(struct drbd_device *device)
16241597
{
16251598
struct drbd_resource *resource = device->resource;

drivers/block/drbd/drbd_main.c

Lines changed: 15 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -114,20 +114,10 @@ struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
114114
mempool_t drbd_request_mempool;
115115
mempool_t drbd_ee_mempool;
116116
mempool_t drbd_md_io_page_pool;
117+
mempool_t drbd_buffer_page_pool;
117118
struct bio_set drbd_md_io_bio_set;
118119
struct bio_set drbd_io_bio_set;
119120

120-
/* I do not use a standard mempool, because:
121-
1) I want to hand out the pre-allocated objects first.
122-
2) I want to be able to interrupt sleeping allocation with a signal.
123-
Note: This is a single linked list, the next pointer is the private
124-
member of struct page.
125-
*/
126-
struct page *drbd_pp_pool;
127-
DEFINE_SPINLOCK(drbd_pp_lock);
128-
int drbd_pp_vacant;
129-
wait_queue_head_t drbd_pp_wait;
130-
131121
DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
132122

133123
static const struct block_device_operations drbd_ops = {
@@ -1611,6 +1601,7 @@ static int _drbd_send_zc_bio(struct drbd_peer_device *peer_device, struct bio *b
16111601
static int _drbd_send_zc_ee(struct drbd_peer_device *peer_device,
16121602
struct drbd_peer_request *peer_req)
16131603
{
1604+
bool use_sendpage = !(peer_req->flags & EE_RELEASE_TO_MEMPOOL);
16141605
struct page *page = peer_req->pages;
16151606
unsigned len = peer_req->i.size;
16161607
int err;
@@ -1619,8 +1610,13 @@ static int _drbd_send_zc_ee(struct drbd_peer_device *peer_device,
16191610
page_chain_for_each(page) {
16201611
unsigned l = min_t(unsigned, len, PAGE_SIZE);
16211612

1622-
err = _drbd_send_page(peer_device, page, 0, l,
1623-
page_chain_next(page) ? MSG_MORE : 0);
1613+
if (likely(use_sendpage))
1614+
err = _drbd_send_page(peer_device, page, 0, l,
1615+
page_chain_next(page) ? MSG_MORE : 0);
1616+
else
1617+
err = _drbd_no_send_page(peer_device, page, 0, l,
1618+
page_chain_next(page) ? MSG_MORE : 0);
1619+
16241620
if (err)
16251621
return err;
16261622
len -= l;
@@ -1962,7 +1958,6 @@ void drbd_init_set_defaults(struct drbd_device *device)
19621958
INIT_LIST_HEAD(&device->sync_ee);
19631959
INIT_LIST_HEAD(&device->done_ee);
19641960
INIT_LIST_HEAD(&device->read_ee);
1965-
INIT_LIST_HEAD(&device->net_ee);
19661961
INIT_LIST_HEAD(&device->resync_reads);
19671962
INIT_LIST_HEAD(&device->resync_work.list);
19681963
INIT_LIST_HEAD(&device->unplug_work.list);
@@ -2043,7 +2038,6 @@ void drbd_device_cleanup(struct drbd_device *device)
20432038
D_ASSERT(device, list_empty(&device->sync_ee));
20442039
D_ASSERT(device, list_empty(&device->done_ee));
20452040
D_ASSERT(device, list_empty(&device->read_ee));
2046-
D_ASSERT(device, list_empty(&device->net_ee));
20472041
D_ASSERT(device, list_empty(&device->resync_reads));
20482042
D_ASSERT(device, list_empty(&first_peer_device(device)->connection->sender_work.q));
20492043
D_ASSERT(device, list_empty(&device->resync_work.list));
@@ -2055,19 +2049,11 @@ void drbd_device_cleanup(struct drbd_device *device)
20552049

20562050
static void drbd_destroy_mempools(void)
20572051
{
2058-
struct page *page;
2059-
2060-
while (drbd_pp_pool) {
2061-
page = drbd_pp_pool;
2062-
drbd_pp_pool = (struct page *)page_private(page);
2063-
__free_page(page);
2064-
drbd_pp_vacant--;
2065-
}
2066-
20672052
/* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */
20682053

20692054
bioset_exit(&drbd_io_bio_set);
20702055
bioset_exit(&drbd_md_io_bio_set);
2056+
mempool_exit(&drbd_buffer_page_pool);
20712057
mempool_exit(&drbd_md_io_page_pool);
20722058
mempool_exit(&drbd_ee_mempool);
20732059
mempool_exit(&drbd_request_mempool);
@@ -2086,9 +2072,8 @@ static void drbd_destroy_mempools(void)
20862072

20872073
static int drbd_create_mempools(void)
20882074
{
2089-
struct page *page;
20902075
const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count;
2091-
int i, ret;
2076+
int ret;
20922077

20932078
/* caches */
20942079
drbd_request_cache = kmem_cache_create(
@@ -2125,6 +2110,10 @@ static int drbd_create_mempools(void)
21252110
if (ret)
21262111
goto Enomem;
21272112

2113+
ret = mempool_init_page_pool(&drbd_buffer_page_pool, number, 0);
2114+
if (ret)
2115+
goto Enomem;
2116+
21282117
ret = mempool_init_slab_pool(&drbd_request_mempool, number,
21292118
drbd_request_cache);
21302119
if (ret)
@@ -2134,15 +2123,6 @@ static int drbd_create_mempools(void)
21342123
if (ret)
21352124
goto Enomem;
21362125

2137-
for (i = 0; i < number; i++) {
2138-
page = alloc_page(GFP_HIGHUSER);
2139-
if (!page)
2140-
goto Enomem;
2141-
set_page_private(page, (unsigned long)drbd_pp_pool);
2142-
drbd_pp_pool = page;
2143-
}
2144-
drbd_pp_vacant = number;
2145-
21462126
return 0;
21472127

21482128
Enomem:
@@ -2169,10 +2149,6 @@ static void drbd_release_all_peer_reqs(struct drbd_device *device)
21692149
rr = drbd_free_peer_reqs(device, &device->done_ee);
21702150
if (rr)
21712151
drbd_err(device, "%d EEs in done list found!\n", rr);
2172-
2173-
rr = drbd_free_peer_reqs(device, &device->net_ee);
2174-
if (rr)
2175-
drbd_err(device, "%d EEs in net list found!\n", rr);
21762152
}
21772153

21782154
/* caution. no locking. */
@@ -2863,11 +2839,6 @@ static int __init drbd_init(void)
28632839
return err;
28642840
}
28652841

2866-
/*
2867-
* allocate all necessary structs
2868-
*/
2869-
init_waitqueue_head(&drbd_pp_wait);
2870-
28712842
drbd_proc = NULL; /* play safe for drbd_cleanup */
28722843
idr_init(&drbd_devices);
28732844

0 commit comments

Comments
 (0)