Skip to content

Commit 35ef1f0

Browse files
yangdongshengkawasaki
authored andcommitted
dm-pcache: add cache_writeback
Introduce cache_writeback.c, which implements the asynchronous write-back path for pcache. The new file is responsible for detecting dirty data, organising it into an in-memory tree, issuing bios to the backing block device, and advancing the cache’s *dirty tail* pointer once data has been safely persisted. * Dirty-state detection - `__is_cache_clean()` reads the kset header at `dirty_tail`, checks magic and CRC, and thus decides whether there is anything to flush. * Write-back scheduler - `cache_writeback_work` is queued on the cache task-workqueue and re-arms itself at `PCACHE_CACHE_WRITEBACK_INTERVAL`. - Uses an internal spin-protected `writeback_key_tree` to batch keys belonging to the same stripe before IO. * Key processing - `cache_kset_insert_tree()` decodes each key inside the on-media kset, allocates an in-memory key object, and inserts it into the writeback_key_tree. - `cache_key_writeback()` builds a *KMEM-type* backing request that maps the persistent-memory range directly into a WRITE bio and submits it with `submit_bio_noacct()`. - After all keys from the writeback_key_tree have been flushed, `backing_dev_flush()` issues a single FLUSH to ensure durability. * Tail advancement - Once a kset is written back, `cache_pos_advance()` moves `cache->dirty_tail` by the exact on-disk size and the new position is persisted via `cache_encode_dirty_tail()`. - When the `PCACHE_KSET_FLAGS_LAST` flag is seen, the write-back engine switches to the next segment indicated by `next_cache_seg_id`. Signed-off-by: Dongsheng Yang <[email protected]>
1 parent 1501ec6 commit 35ef1f0

1 file changed

Lines changed: 279 additions & 0 deletions

File tree

Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
// SPDX-License-Identifier: GPL-2.0-or-later
2+
3+
#include <linux/bio.h>
4+
5+
#include "cache.h"
6+
#include "backing_dev.h"
7+
#include "cache_dev.h"
8+
#include "dm_pcache.h"
9+
10+
static void writeback_ctx_end(struct pcache_cache *cache, int ret)
11+
{
12+
if (ret && !cache->writeback_ctx.ret) {
13+
pcache_dev_err(CACHE_TO_PCACHE(cache), "writeback error: %d", ret);
14+
cache->writeback_ctx.ret = ret;
15+
}
16+
17+
if (!atomic_dec_and_test(&cache->writeback_ctx.pending))
18+
return;
19+
20+
if (!cache->writeback_ctx.ret) {
21+
backing_dev_flush(cache->backing_dev);
22+
23+
mutex_lock(&cache->dirty_tail_lock);
24+
cache_pos_advance(&cache->dirty_tail, cache->writeback_ctx.advance);
25+
cache_encode_dirty_tail(cache);
26+
mutex_unlock(&cache->dirty_tail_lock);
27+
}
28+
queue_delayed_work(cache_get_wq(cache), &cache->writeback_work, 0);
29+
}
30+
31+
static void writeback_end_req(struct pcache_backing_dev_req *backing_req, int ret)
32+
{
33+
struct pcache_cache *cache = backing_req->priv_data;
34+
35+
mutex_lock(&cache->writeback_lock);
36+
writeback_ctx_end(cache, ret);
37+
mutex_unlock(&cache->writeback_lock);
38+
}
39+
40+
static inline bool is_cache_clean(struct pcache_cache *cache, struct pcache_cache_pos *dirty_tail)
41+
{
42+
struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
43+
struct pcache_cache_kset_onmedia *kset_onmedia;
44+
u32 to_copy;
45+
void *addr;
46+
int ret;
47+
48+
addr = cache_pos_addr(dirty_tail);
49+
kset_onmedia = (struct pcache_cache_kset_onmedia *)cache->wb_kset_onmedia_buf;
50+
51+
to_copy = min(PCACHE_KSET_ONMEDIA_SIZE_MAX, PCACHE_SEG_SIZE - dirty_tail->seg_off);
52+
ret = copy_mc_to_kernel(kset_onmedia, addr, to_copy);
53+
if (ret) {
54+
pcache_dev_err(pcache, "error to read kset: %d", ret);
55+
return true;
56+
}
57+
58+
/* Check if the magic number matches the expected value */
59+
if (kset_onmedia->magic != PCACHE_KSET_MAGIC) {
60+
pcache_dev_debug(pcache, "dirty_tail: %u:%u magic: %llx, not expected: %llx\n",
61+
dirty_tail->cache_seg->cache_seg_id, dirty_tail->seg_off,
62+
kset_onmedia->magic, PCACHE_KSET_MAGIC);
63+
return true;
64+
}
65+
66+
/* Verify the CRC checksum for data integrity */
67+
if (kset_onmedia->crc != cache_kset_crc(kset_onmedia)) {
68+
pcache_dev_debug(pcache, "dirty_tail: %u:%u crc: %x, not expected: %x\n",
69+
dirty_tail->cache_seg->cache_seg_id, dirty_tail->seg_off,
70+
cache_kset_crc(kset_onmedia), kset_onmedia->crc);
71+
return true;
72+
}
73+
74+
return false;
75+
}
76+
77+
void cache_writeback_exit(struct pcache_cache *cache)
78+
{
79+
cancel_delayed_work_sync(&cache->writeback_work);
80+
cache_tree_exit(&cache->writeback_key_tree);
81+
}
82+
83+
int cache_writeback_init(struct pcache_cache *cache)
84+
{
85+
int ret;
86+
87+
ret = cache_tree_init(cache, &cache->writeback_key_tree, 1);
88+
if (ret)
89+
goto err;
90+
91+
atomic_set(&cache->writeback_ctx.pending, 0);
92+
93+
/* Queue delayed work to start writeback handling */
94+
queue_delayed_work(cache_get_wq(cache), &cache->writeback_work, 0);
95+
96+
return 0;
97+
err:
98+
return ret;
99+
}
100+
101+
static int cache_key_writeback(struct pcache_cache *cache, struct pcache_cache_key *key)
102+
{
103+
struct pcache_backing_dev_req *writeback_req;
104+
struct pcache_backing_dev_req_opts writeback_req_opts = { 0 };
105+
struct pcache_cache_pos *pos;
106+
void *addr;
107+
u32 seg_remain;
108+
u64 off;
109+
110+
if (cache_key_clean(key))
111+
return 0;
112+
113+
pos = &key->cache_pos;
114+
115+
seg_remain = cache_seg_remain(pos);
116+
BUG_ON(seg_remain < key->len);
117+
118+
addr = cache_pos_addr(pos);
119+
off = key->off;
120+
121+
writeback_req_opts.type = BACKING_DEV_REQ_TYPE_KMEM;
122+
writeback_req_opts.end_fn = writeback_end_req;
123+
writeback_req_opts.priv_data = cache;
124+
125+
writeback_req_opts.kmem.data = addr;
126+
writeback_req_opts.kmem.opf = REQ_OP_WRITE;
127+
writeback_req_opts.kmem.len = key->len;
128+
writeback_req_opts.kmem.backing_off = off;
129+
130+
writeback_req = backing_dev_req_create(cache->backing_dev, &writeback_req_opts);
131+
if (!writeback_req)
132+
return -EIO;
133+
134+
atomic_inc(&cache->writeback_ctx.pending);
135+
backing_dev_req_submit(writeback_req, true);
136+
137+
return 0;
138+
}
139+
140+
static int cache_wb_tree_writeback(struct pcache_cache *cache, u32 advance)
141+
{
142+
struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
143+
struct pcache_cache_tree *cache_tree = &cache->writeback_key_tree;
144+
struct pcache_cache_subtree *cache_subtree;
145+
struct rb_node *node;
146+
struct pcache_cache_key *key;
147+
int ret = 0;
148+
u32 i;
149+
150+
cache->writeback_ctx.ret = 0;
151+
cache->writeback_ctx.advance = advance;
152+
atomic_set(&cache->writeback_ctx.pending, 1);
153+
154+
for (i = 0; i < cache_tree->n_subtrees; i++) {
155+
cache_subtree = &cache_tree->subtrees[i];
156+
157+
node = rb_first(&cache_subtree->root);
158+
while (node) {
159+
key = CACHE_KEY(node);
160+
node = rb_next(node);
161+
162+
ret = cache_key_writeback(cache, key);
163+
if (ret) {
164+
pcache_dev_err(pcache, "writeback error: %d\n", ret);
165+
goto release;
166+
}
167+
168+
cache_key_delete(key);
169+
}
170+
}
171+
release:
172+
writeback_ctx_end(cache, ret);
173+
174+
return ret;
175+
}
176+
177+
static int cache_kset_insert_tree(struct pcache_cache *cache, struct pcache_cache_kset_onmedia *kset_onmedia)
178+
{
179+
struct pcache_cache_key_onmedia *key_onmedia;
180+
struct pcache_cache_subtree *cache_subtree;
181+
struct pcache_cache_key *key;
182+
int ret;
183+
u32 i;
184+
185+
/* Iterate through all keys in the kset and write each back to storage */
186+
for (i = 0; i < kset_onmedia->key_num; i++) {
187+
key_onmedia = &kset_onmedia->data[i];
188+
189+
key = cache_key_alloc(&cache->writeback_key_tree);
190+
if (!key)
191+
return -ENOMEM;
192+
193+
ret = cache_key_decode(cache, key_onmedia, key);
194+
if (ret) {
195+
cache_key_put(key);
196+
return ret;
197+
}
198+
199+
cache_subtree = get_subtree(&cache->writeback_key_tree, key->off);
200+
spin_lock(&cache_subtree->tree_lock);
201+
ret = cache_key_insert(&cache->writeback_key_tree, key, true);
202+
spin_unlock(&cache_subtree->tree_lock);
203+
if (ret) {
204+
cache_key_put(key);
205+
return ret;
206+
}
207+
}
208+
209+
return 0;
210+
}
211+
212+
static void last_kset_writeback(struct pcache_cache *cache,
213+
struct pcache_cache_kset_onmedia *last_kset_onmedia)
214+
{
215+
struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
216+
struct pcache_cache_segment *next_seg;
217+
218+
pcache_dev_debug(pcache, "last kset, next: %u\n", last_kset_onmedia->next_cache_seg_id);
219+
220+
next_seg = &cache->segments[last_kset_onmedia->next_cache_seg_id];
221+
222+
mutex_lock(&cache->dirty_tail_lock);
223+
cache->dirty_tail.cache_seg = next_seg;
224+
cache->dirty_tail.seg_off = 0;
225+
cache_encode_dirty_tail(cache);
226+
mutex_unlock(&cache->dirty_tail_lock);
227+
}
228+
229+
void cache_writeback_fn(struct work_struct *work)
230+
{
231+
struct pcache_cache *cache = container_of(work, struct pcache_cache, writeback_work.work);
232+
struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
233+
struct pcache_cache_pos dirty_tail;
234+
struct pcache_cache_kset_onmedia *kset_onmedia;
235+
u32 delay;
236+
int ret;
237+
238+
mutex_lock(&cache->writeback_lock);
239+
if (atomic_read(&cache->writeback_ctx.pending))
240+
goto unlock;
241+
242+
if (pcache_is_stopping(pcache))
243+
goto unlock;
244+
245+
kset_onmedia = (struct pcache_cache_kset_onmedia *)cache->wb_kset_onmedia_buf;
246+
247+
mutex_lock(&cache->dirty_tail_lock);
248+
cache_pos_copy(&dirty_tail, &cache->dirty_tail);
249+
mutex_unlock(&cache->dirty_tail_lock);
250+
251+
if (is_cache_clean(cache, &dirty_tail)) {
252+
delay = PCACHE_CACHE_WRITEBACK_INTERVAL;
253+
goto queue_work;
254+
}
255+
256+
if (kset_onmedia->flags & PCACHE_KSET_FLAGS_LAST) {
257+
last_kset_writeback(cache, kset_onmedia);
258+
delay = 0;
259+
goto queue_work;
260+
}
261+
262+
ret = cache_kset_insert_tree(cache, kset_onmedia);
263+
if (ret) {
264+
delay = PCACHE_CACHE_WRITEBACK_INTERVAL;
265+
goto queue_work;
266+
}
267+
268+
ret = cache_wb_tree_writeback(cache, get_kset_onmedia_size(kset_onmedia));
269+
if (ret) {
270+
delay = PCACHE_CACHE_WRITEBACK_INTERVAL;
271+
goto queue_work;
272+
}
273+
274+
delay = 0;
275+
queue_work:
276+
queue_delayed_work(cache_get_wq(cache), &cache->writeback_work, delay);
277+
unlock:
278+
mutex_unlock(&cache->writeback_lock);
279+
}

0 commit comments

Comments
 (0)