Skip to content

Commit 723ffbd

Browse files
yangdongshengkawasaki
authored andcommitted
dm-pcache: add cache_writeback
Introduce cache_writeback.c, which implements the asynchronous write-back path for pcache. The new file is responsible for detecting dirty data, organising it into an in-memory tree, issuing bios to the backing block device, and advancing the cache’s *dirty tail* pointer once data has been safely persisted. * Dirty-state detection - `__is_cache_clean()` reads the kset header at `dirty_tail`, checks magic and CRC, and thus decides whether there is anything to flush. * Write-back scheduler - `cache_writeback_work` is queued on the cache task-workqueue and re-arms itself at `PCACHE_CACHE_WRITEBACK_INTERVAL`. - Uses an internal spin-protected `writeback_key_tree` to batch keys belonging to the same stripe before IO. * Key processing - `cache_kset_insert_tree()` decodes each key inside the on-media kset, allocates an in-memory key object, and inserts it into the writeback_key_tree. - `cache_key_writeback()` builds a *KMEM-type* backing request that maps the persistent-memory range directly into a WRITE bio and submits it with `submit_bio_noacct()`. - After all keys from the writeback_key_tree have been flushed, `backing_dev_flush()` issues a single FLUSH to ensure durability. * Tail advancement - Once a kset is written back, `cache_pos_advance()` moves `cache->dirty_tail` by the exact on-disk size and the new position is persisted via `cache_encode_dirty_tail()`. - When the `PCACHE_KSET_FLAGS_LAST` flag is seen, the write-back engine switches to the next segment indicated by `next_cache_seg_id`. Signed-off-by: Dongsheng Yang <[email protected]>
1 parent 817b706 commit 723ffbd

1 file changed

Lines changed: 239 additions & 0 deletions

File tree

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
// SPDX-License-Identifier: GPL-2.0-or-later
2+
3+
#include <linux/bio.h>
4+
5+
#include "cache.h"
6+
#include "backing_dev.h"
7+
#include "cache_dev.h"
8+
#include "dm_pcache.h"
9+
10+
static inline bool is_cache_clean(struct pcache_cache *cache, struct pcache_cache_pos *dirty_tail)
11+
{
12+
struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
13+
struct pcache_cache_kset_onmedia *kset_onmedia;
14+
u32 to_copy;
15+
void *addr;
16+
int ret;
17+
18+
addr = cache_pos_addr(dirty_tail);
19+
kset_onmedia = (struct pcache_cache_kset_onmedia *)cache->wb_kset_onmedia_buf;
20+
21+
to_copy = min(PCACHE_KSET_ONMEDIA_SIZE_MAX, PCACHE_SEG_SIZE - dirty_tail->seg_off);
22+
ret = copy_mc_to_kernel(kset_onmedia, addr, to_copy);
23+
if (ret) {
24+
pcache_dev_err(pcache, "error to read kset: %d", ret);
25+
return true;
26+
}
27+
28+
/* Check if the magic number matches the expected value */
29+
if (kset_onmedia->magic != PCACHE_KSET_MAGIC) {
30+
pcache_dev_debug(pcache, "dirty_tail: %u:%u magic: %llx, not expected: %llx\n",
31+
dirty_tail->cache_seg->cache_seg_id, dirty_tail->seg_off,
32+
kset_onmedia->magic, PCACHE_KSET_MAGIC);
33+
return true;
34+
}
35+
36+
/* Verify the CRC checksum for data integrity */
37+
if (kset_onmedia->crc != cache_kset_crc(kset_onmedia)) {
38+
pcache_dev_debug(pcache, "dirty_tail: %u:%u crc: %x, not expected: %x\n",
39+
dirty_tail->cache_seg->cache_seg_id, dirty_tail->seg_off,
40+
cache_kset_crc(kset_onmedia), kset_onmedia->crc);
41+
return true;
42+
}
43+
44+
return false;
45+
}
46+
47+
void cache_writeback_exit(struct pcache_cache *cache)
48+
{
49+
cancel_delayed_work_sync(&cache->writeback_work);
50+
cache_tree_exit(&cache->writeback_key_tree);
51+
}
52+
53+
int cache_writeback_init(struct pcache_cache *cache)
54+
{
55+
int ret;
56+
57+
ret = cache_tree_init(cache, &cache->writeback_key_tree, 1);
58+
if (ret)
59+
goto err;
60+
61+
/* Queue delayed work to start writeback handling */
62+
queue_delayed_work(cache_get_wq(cache), &cache->writeback_work, 0);
63+
64+
return 0;
65+
err:
66+
return ret;
67+
}
68+
69+
static int cache_key_writeback(struct pcache_cache *cache, struct pcache_cache_key *key)
70+
{
71+
struct pcache_backing_dev_req *writeback_req;
72+
struct pcache_backing_dev_req_opts writeback_req_opts = { 0 };
73+
struct pcache_cache_pos *pos;
74+
void *addr;
75+
u32 seg_remain;
76+
u64 off;
77+
78+
if (cache_key_clean(key))
79+
return 0;
80+
81+
pos = &key->cache_pos;
82+
83+
seg_remain = cache_seg_remain(pos);
84+
BUG_ON(seg_remain < key->len);
85+
86+
addr = cache_pos_addr(pos);
87+
off = key->off;
88+
89+
writeback_req_opts.type = BACKING_DEV_REQ_TYPE_KMEM;
90+
writeback_req_opts.end_fn = NULL;
91+
writeback_req_opts.gfp_mask = GFP_NOIO;
92+
93+
writeback_req_opts.kmem.data = addr;
94+
writeback_req_opts.kmem.opf = REQ_OP_WRITE;
95+
writeback_req_opts.kmem.len = key->len;
96+
writeback_req_opts.kmem.backing_off = off;
97+
98+
writeback_req = backing_dev_req_create(cache->backing_dev, &writeback_req_opts);
99+
if (!writeback_req)
100+
return -EIO;
101+
102+
backing_dev_req_submit(writeback_req, true);
103+
104+
return 0;
105+
}
106+
107+
static int cache_wb_tree_writeback(struct pcache_cache *cache)
108+
{
109+
struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
110+
struct pcache_cache_tree *cache_tree = &cache->writeback_key_tree;
111+
struct pcache_cache_subtree *cache_subtree;
112+
struct rb_node *node;
113+
struct pcache_cache_key *key;
114+
int ret;
115+
u32 i;
116+
117+
for (i = 0; i < cache_tree->n_subtrees; i++) {
118+
cache_subtree = &cache_tree->subtrees[i];
119+
120+
node = rb_first(&cache_subtree->root);
121+
while (node) {
122+
key = CACHE_KEY(node);
123+
node = rb_next(node);
124+
125+
ret = cache_key_writeback(cache, key);
126+
if (ret) {
127+
pcache_dev_err(pcache, "writeback error: %d\n", ret);
128+
return ret;
129+
}
130+
131+
cache_key_delete(key);
132+
}
133+
}
134+
135+
backing_dev_flush(cache->backing_dev);
136+
137+
return 0;
138+
}
139+
140+
static int cache_kset_insert_tree(struct pcache_cache *cache, struct pcache_cache_kset_onmedia *kset_onmedia)
141+
{
142+
struct pcache_cache_key_onmedia *key_onmedia;
143+
struct pcache_cache_key *key;
144+
int ret;
145+
u32 i;
146+
147+
/* Iterate through all keys in the kset and write each back to storage */
148+
for (i = 0; i < kset_onmedia->key_num; i++) {
149+
key_onmedia = &kset_onmedia->data[i];
150+
151+
key = cache_key_alloc(&cache->writeback_key_tree);
152+
if (!key)
153+
return -ENOMEM;
154+
155+
ret = cache_key_decode(cache, key_onmedia, key);
156+
if (ret) {
157+
cache_key_delete(key);
158+
return ret;
159+
}
160+
161+
ret = cache_key_insert(&cache->writeback_key_tree, key, true);
162+
if (ret) {
163+
cache_key_delete(key);
164+
return ret;
165+
}
166+
}
167+
168+
return 0;
169+
}
170+
171+
static void last_kset_writeback(struct pcache_cache *cache,
172+
struct pcache_cache_kset_onmedia *last_kset_onmedia)
173+
{
174+
struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
175+
struct pcache_cache_segment *next_seg;
176+
177+
pcache_dev_debug(pcache, "last kset, next: %u\n", last_kset_onmedia->next_cache_seg_id);
178+
179+
next_seg = &cache->segments[last_kset_onmedia->next_cache_seg_id];
180+
181+
mutex_lock(&cache->dirty_tail_lock);
182+
cache->dirty_tail.cache_seg = next_seg;
183+
cache->dirty_tail.seg_off = 0;
184+
cache_encode_dirty_tail(cache);
185+
mutex_unlock(&cache->dirty_tail_lock);
186+
}
187+
188+
void cache_writeback_fn(struct work_struct *work)
189+
{
190+
struct pcache_cache *cache = container_of(work, struct pcache_cache, writeback_work.work);
191+
struct dm_pcache *pcache = CACHE_TO_PCACHE(cache);
192+
struct pcache_cache_pos dirty_tail;
193+
struct pcache_cache_kset_onmedia *kset_onmedia;
194+
int ret = 0;
195+
196+
mutex_lock(&cache->writeback_lock);
197+
kset_onmedia = (struct pcache_cache_kset_onmedia *)cache->wb_kset_onmedia_buf;
198+
/* Loop until all dirty data is written back and the cache is clean */
199+
while (true) {
200+
if (pcache_is_stopping(pcache)) {
201+
mutex_unlock(&cache->writeback_lock);
202+
return;
203+
}
204+
205+
/* Get new dirty tail */
206+
mutex_lock(&cache->dirty_tail_lock);
207+
cache_pos_copy(&dirty_tail, &cache->dirty_tail);
208+
mutex_unlock(&cache->dirty_tail_lock);
209+
210+
if (is_cache_clean(cache, &dirty_tail))
211+
break;
212+
213+
if (kset_onmedia->flags & PCACHE_KSET_FLAGS_LAST) {
214+
last_kset_writeback(cache, kset_onmedia);
215+
continue;
216+
}
217+
218+
ret = cache_kset_insert_tree(cache, kset_onmedia);
219+
if (ret)
220+
break;
221+
222+
ret = cache_wb_tree_writeback(cache);
223+
if (ret)
224+
break;
225+
226+
pcache_dev_debug(pcache, "writeback advance: %u:%u %u\n",
227+
dirty_tail.cache_seg->cache_seg_id,
228+
dirty_tail.seg_off,
229+
get_kset_onmedia_size(kset_onmedia));
230+
231+
mutex_lock(&cache->dirty_tail_lock);
232+
cache_pos_advance(&cache->dirty_tail, get_kset_onmedia_size(kset_onmedia));
233+
cache_encode_dirty_tail(cache);
234+
mutex_unlock(&cache->dirty_tail_lock);
235+
}
236+
mutex_unlock(&cache->writeback_lock);
237+
238+
queue_delayed_work(cache_get_wq(cache), &cache->writeback_work, PCACHE_CACHE_WRITEBACK_INTERVAL);
239+
}

0 commit comments

Comments
 (0)