Skip to content

Commit 49affb5

Browse files
yangdongshengkawasaki
authored andcommitted
dm-pcache: add cache device
Add cache_dev.{c,h} to manage the persistent-memory device that stores all pcache metadata and data segments. Splitting this logic out keeps the main dm-pcache code focused on policy while cache_dev handles the low-level interaction with the DAX block device. * DAX mapping - Opens the underlying device via dm_get_device(). - Uses dax_direct_access() to obtain a direct linear mapping; falls back to vmap() when the range is fragmented. * On-disk layout ┌─ 4 KB ─┐ super-block (SB) ├─ 4 KB ─┤ cache_info[0] ├─ 4 KB ─┤ cache_info[1] ├─ 4 KB ─┤ cache_ctrl └─ ... ─┘ segments Constants and macros in the header expose offsets and sizes. * Super-block handling - sb_read(), sb_validate(), sb_init() verify magic, CRC32 and host endianness (flag *PCACHE_SB_F_BIGENDIAN*). - Formatting zeroes the metadata replicas and initialises the segment bitmap when the SB is blank. * Segment allocator - Bitmap protected by seg_lock; find_next_zero_bit() yields the next free 16 MB segment. * Lifecycle helpers - cache_dev_start()/stop() encapsulate init/exit and are invoked by dm-pcache core. - Gracefully handles errors: CRC mismatch, wrong endianness, device too small (< 512 MB), or failed DAX mapping. Signed-off-by: Dongsheng Yang <[email protected]>
1 parent 1b65cd5 commit 49affb5

2 files changed

Lines changed: 380 additions & 0 deletions

File tree

drivers/md/dm-pcache/cache_dev.c

Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
// SPDX-License-Identifier: GPL-2.0-or-later
2+
3+
#include <linux/blkdev.h>
4+
#include <linux/dax.h>
5+
#include <linux/vmalloc.h>
6+
#include <linux/pfn_t.h>
7+
#include <linux/parser.h>
8+
9+
#include "cache_dev.h"
10+
#include "backing_dev.h"
11+
#include "cache.h"
12+
#include "dm_pcache.h"
13+
14+
static void cache_dev_dax_exit(struct pcache_cache_dev *cache_dev)
15+
{
16+
struct dm_pcache *pcache = CACHE_DEV_TO_PCACHE(cache_dev);
17+
18+
if (cache_dev->use_vmap)
19+
vunmap(cache_dev->mapping);
20+
21+
dm_put_device(pcache->ti, cache_dev->dm_dev);
22+
}
23+
24+
static int build_vmap(struct dax_device *dax_dev, long total_pages, void **vaddr)
25+
{
26+
struct page **pages;
27+
long i = 0, chunk;
28+
pfn_t pfn;
29+
int ret;
30+
31+
pages = vmalloc_array(total_pages, sizeof(struct page *));
32+
if (!pages)
33+
return -ENOMEM;
34+
35+
do {
36+
chunk = dax_direct_access(dax_dev, i, total_pages - i,
37+
DAX_ACCESS, NULL, &pfn);
38+
if (chunk <= 0) {
39+
ret = chunk ? chunk : -EINVAL;
40+
goto out_free;
41+
}
42+
43+
if (!pfn_t_has_page(pfn)) {
44+
ret = -EOPNOTSUPP;
45+
goto out_free;
46+
}
47+
48+
while (chunk-- && i < total_pages) {
49+
pages[i++] = pfn_t_to_page(pfn);
50+
pfn.val++;
51+
if (!(i & 15))
52+
cond_resched();
53+
}
54+
} while (i < total_pages);
55+
56+
*vaddr = vmap(pages, total_pages, VM_MAP, PAGE_KERNEL);
57+
if (!*vaddr)
58+
ret = -ENOMEM;
59+
out_free:
60+
vfree(pages);
61+
return ret;
62+
}
63+
64+
static int cache_dev_dax_init(struct pcache_cache_dev *cache_dev, const char *path)
65+
{
66+
struct dm_pcache *pcache = CACHE_DEV_TO_PCACHE(cache_dev);
67+
struct dax_device *dax_dev;
68+
long total_pages, mapped_pages;
69+
u64 bdev_size;
70+
void *vaddr;
71+
int ret, id;
72+
pfn_t pfn;
73+
74+
ret = dm_get_device(pcache->ti, path,
75+
BLK_OPEN_READ | BLK_OPEN_WRITE, &cache_dev->dm_dev);
76+
if (ret) {
77+
pcache_dev_err(pcache, "failed to open dm_dev: %s: %d", path, ret);
78+
goto err;
79+
}
80+
81+
dax_dev = cache_dev->dm_dev->dax_dev;
82+
83+
/* total size check */
84+
bdev_size = bdev_nr_bytes(cache_dev->dm_dev->bdev);
85+
if (!bdev_size) {
86+
ret = -ENODEV;
87+
pcache_dev_err(pcache, "device %s has zero size\n", path);
88+
goto put_dm;
89+
}
90+
91+
total_pages = bdev_size >> PAGE_SHIFT;
92+
/* attempt: direct-map the whole range */
93+
id = dax_read_lock();
94+
mapped_pages = dax_direct_access(dax_dev, 0, total_pages,
95+
DAX_ACCESS, &vaddr, &pfn);
96+
if (mapped_pages < 0) {
97+
pcache_dev_err(pcache, "dax_direct_access failed: %ld\n", mapped_pages);
98+
ret = mapped_pages;
99+
goto unlock;
100+
}
101+
102+
if (!pfn_t_has_page(pfn)) {
103+
ret = -EOPNOTSUPP;
104+
goto unlock;
105+
}
106+
107+
if (mapped_pages == total_pages) {
108+
/* success: contiguous direct mapping */
109+
cache_dev->mapping = vaddr;
110+
} else {
111+
/* need vmap fallback */
112+
ret = build_vmap(dax_dev, total_pages, &vaddr);
113+
if (ret) {
114+
pcache_dev_err(pcache, "vmap fallback failed: %d\n", ret);
115+
goto unlock;
116+
}
117+
118+
cache_dev->mapping = vaddr;
119+
cache_dev->use_vmap = true;
120+
}
121+
dax_read_unlock(id);
122+
123+
return 0;
124+
unlock:
125+
dax_read_unlock(id);
126+
put_dm:
127+
dm_put_device(pcache->ti, cache_dev->dm_dev);
128+
err:
129+
return ret;
130+
}
131+
132+
void cache_dev_zero_range(struct pcache_cache_dev *cache_dev, void *pos, u32 size)
133+
{
134+
memset(pos, 0, size);
135+
dax_flush(cache_dev->dm_dev->dax_dev, pos, size);
136+
}
137+
138+
static int sb_read(struct pcache_cache_dev *cache_dev, struct pcache_sb *sb)
139+
{
140+
struct pcache_sb *sb_addr = CACHE_DEV_SB(cache_dev);
141+
142+
if (copy_mc_to_kernel(sb, sb_addr, sizeof(struct pcache_sb)))
143+
return -EIO;
144+
145+
return 0;
146+
}
147+
148+
static void sb_write(struct pcache_cache_dev *cache_dev, struct pcache_sb *sb)
149+
{
150+
struct pcache_sb *sb_addr = CACHE_DEV_SB(cache_dev);
151+
152+
memcpy_flushcache(sb_addr, sb, sizeof(struct pcache_sb));
153+
pmem_wmb();
154+
}
155+
156+
static int sb_init(struct pcache_cache_dev *cache_dev, struct pcache_sb *sb)
157+
{
158+
struct dm_pcache *pcache = CACHE_DEV_TO_PCACHE(cache_dev);
159+
u64 nr_segs;
160+
u64 cache_dev_size;
161+
u64 magic;
162+
u32 flags = 0;
163+
164+
magic = le64_to_cpu(sb->magic);
165+
if (magic)
166+
return -EEXIST;
167+
168+
cache_dev_size = bdev_nr_bytes(file_bdev(cache_dev->dm_dev->bdev_file));
169+
if (cache_dev_size < PCACHE_CACHE_DEV_SIZE_MIN) {
170+
pcache_dev_err(pcache, "dax device is too small, required at least %llu",
171+
PCACHE_CACHE_DEV_SIZE_MIN);
172+
return -ENOSPC;
173+
}
174+
175+
nr_segs = (cache_dev_size - PCACHE_SEGMENTS_OFF) / ((PCACHE_SEG_SIZE));
176+
177+
#if defined(__BYTE_ORDER) ? (__BIG_ENDIAN == __BYTE_ORDER) : defined(__BIG_ENDIAN)
178+
flags |= PCACHE_SB_F_BIGENDIAN;
179+
#endif
180+
sb->flags = cpu_to_le32(flags);
181+
sb->magic = cpu_to_le64(PCACHE_MAGIC);
182+
sb->seg_num = cpu_to_le32(nr_segs);
183+
sb->crc = cpu_to_le32(crc32(PCACHE_CRC_SEED, (void *)(sb) + 4, sizeof(struct pcache_sb) - 4));
184+
185+
cache_dev_zero_range(cache_dev, CACHE_DEV_CACHE_INFO(cache_dev),
186+
PCACHE_CACHE_INFO_SIZE * PCACHE_META_INDEX_MAX +
187+
PCACHE_CACHE_CTRL_SIZE);
188+
189+
return 0;
190+
}
191+
192+
static int sb_validate(struct pcache_cache_dev *cache_dev, struct pcache_sb *sb)
193+
{
194+
struct dm_pcache *pcache = CACHE_DEV_TO_PCACHE(cache_dev);
195+
u32 flags;
196+
u32 crc;
197+
198+
if (le64_to_cpu(sb->magic) != PCACHE_MAGIC) {
199+
pcache_dev_err(pcache, "unexpected magic: %llx\n",
200+
le64_to_cpu(sb->magic));
201+
return -EINVAL;
202+
}
203+
204+
crc = crc32(PCACHE_CRC_SEED, (void *)(sb) + 4, sizeof(struct pcache_sb) - 4);
205+
if (crc != le32_to_cpu(sb->crc)) {
206+
pcache_dev_err(pcache, "corrupted sb: %u, expected: %u\n", crc, le32_to_cpu(sb->crc));
207+
return -EINVAL;
208+
}
209+
210+
flags = le32_to_cpu(sb->flags);
211+
#if defined(__BYTE_ORDER) ? (__BIG_ENDIAN == __BYTE_ORDER) : defined(__BIG_ENDIAN)
212+
if (!(flags & PCACHE_SB_F_BIGENDIAN)) {
213+
pcache_dev_err(pcache, "cache_dev is not big endian\n");
214+
return -EINVAL;
215+
}
216+
#else
217+
if (flags & PCACHE_SB_F_BIGENDIAN) {
218+
pcache_dev_err(pcache, "cache_dev is big endian\n");
219+
return -EINVAL;
220+
}
221+
#endif
222+
return 0;
223+
}
224+
225+
static int cache_dev_init(struct pcache_cache_dev *cache_dev, u32 seg_num)
226+
{
227+
cache_dev->seg_num = seg_num;
228+
cache_dev->seg_bitmap = bitmap_zalloc(cache_dev->seg_num, GFP_KERNEL);
229+
if (!cache_dev->seg_bitmap)
230+
return -ENOMEM;
231+
232+
return 0;
233+
}
234+
235+
static void cache_dev_exit(struct pcache_cache_dev *cache_dev)
236+
{
237+
bitmap_free(cache_dev->seg_bitmap);
238+
}
239+
240+
void cache_dev_stop(struct dm_pcache *pcache)
241+
{
242+
struct pcache_cache_dev *cache_dev = &pcache->cache_dev;
243+
244+
cache_dev_exit(cache_dev);
245+
cache_dev_dax_exit(cache_dev);
246+
}
247+
248+
int cache_dev_start(struct dm_pcache *pcache, const char *cache_dev_path)
249+
{
250+
struct pcache_cache_dev *cache_dev = &pcache->cache_dev;
251+
struct pcache_sb sb;
252+
bool format = false;
253+
int ret;
254+
255+
mutex_init(&cache_dev->seg_lock);
256+
257+
ret = cache_dev_dax_init(cache_dev, cache_dev_path);
258+
if (ret) {
259+
pcache_dev_err(pcache, "failed to init cache_dev via dax way: %d.", ret);
260+
goto err;
261+
}
262+
263+
ret = sb_read(cache_dev, &sb);
264+
if (ret)
265+
goto dax_release;
266+
267+
if (le64_to_cpu(sb.magic) == 0) {
268+
format = true;
269+
ret = sb_init(cache_dev, &sb);
270+
if (ret < 0)
271+
goto dax_release;
272+
}
273+
274+
ret = sb_validate(cache_dev, &sb);
275+
if (ret)
276+
goto dax_release;
277+
278+
cache_dev->sb_flags = le32_to_cpu(sb.flags);
279+
ret = cache_dev_init(cache_dev, sb.seg_num);
280+
if (ret)
281+
goto dax_release;
282+
283+
if (format)
284+
sb_write(cache_dev, &sb);
285+
286+
return 0;
287+
288+
dax_release:
289+
cache_dev_dax_exit(cache_dev);
290+
err:
291+
return ret;
292+
}
293+
294+
int cache_dev_get_empty_segment_id(struct pcache_cache_dev *cache_dev, u32 *seg_id)
295+
{
296+
int ret;
297+
298+
mutex_lock(&cache_dev->seg_lock);
299+
*seg_id = find_next_zero_bit(cache_dev->seg_bitmap, cache_dev->seg_num, 0);
300+
if (*seg_id == cache_dev->seg_num) {
301+
ret = -ENOSPC;
302+
goto unlock;
303+
}
304+
305+
set_bit(*seg_id, cache_dev->seg_bitmap);
306+
ret = 0;
307+
unlock:
308+
mutex_unlock(&cache_dev->seg_lock);
309+
return ret;
310+
}

drivers/md/dm-pcache/cache_dev.h

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/* SPDX-License-Identifier: GPL-2.0-or-later */
2+
#ifndef _PCACHE_CACHE_DEV_H
3+
#define _PCACHE_CACHE_DEV_H
4+
5+
#include <linux/device.h>
6+
#include <linux/device-mapper.h>
7+
8+
#include "pcache_internal.h"
9+
10+
#define PCACHE_MAGIC 0x65B05EFA96C596EFULL
11+
12+
#define PCACHE_SB_OFF (4 * PCACHE_KB)
13+
#define PCACHE_SB_SIZE (4 * PCACHE_KB)
14+
15+
#define PCACHE_CACHE_INFO_OFF (PCACHE_SB_OFF + PCACHE_SB_SIZE)
16+
#define PCACHE_CACHE_INFO_SIZE (4 * PCACHE_KB)
17+
18+
#define PCACHE_CACHE_CTRL_OFF (PCACHE_CACHE_INFO_OFF + (PCACHE_CACHE_INFO_SIZE * PCACHE_META_INDEX_MAX))
19+
#define PCACHE_CACHE_CTRL_SIZE (4 * PCACHE_KB)
20+
21+
#define PCACHE_SEGMENTS_OFF (PCACHE_CACHE_CTRL_OFF + PCACHE_CACHE_CTRL_SIZE)
22+
#define PCACHE_SEG_INFO_SIZE (4 * PCACHE_KB)
23+
24+
#define PCACHE_CACHE_DEV_SIZE_MIN (512 * PCACHE_MB) /* 512 MB */
25+
#define PCACHE_SEG_SIZE (16 * PCACHE_MB) /* Size of each PCACHE segment (16 MB) */
26+
27+
#define CACHE_DEV_SB(cache_dev) ((struct pcache_sb *)(cache_dev->mapping + PCACHE_SB_OFF))
28+
#define CACHE_DEV_CACHE_INFO(cache_dev) ((void *)cache_dev->mapping + PCACHE_CACHE_INFO_OFF)
29+
#define CACHE_DEV_CACHE_CTRL(cache_dev) ((void *)cache_dev->mapping + PCACHE_CACHE_CTRL_OFF)
30+
#define CACHE_DEV_SEGMENTS(cache_dev) ((void *)cache_dev->mapping + PCACHE_SEGMENTS_OFF)
31+
#define CACHE_DEV_SEGMENT(cache_dev, id) ((void *)CACHE_DEV_SEGMENTS(cache_dev) + (u64)id * PCACHE_SEG_SIZE)
32+
33+
/*
34+
* PCACHE SB flags configured during formatting
35+
*
36+
* The PCACHE_SB_F_xxx flags define registration requirements based on cache_dev
37+
* formatting. For a machine to register a cache_dev:
38+
* - PCACHE_SB_F_BIGENDIAN: Requires a big-endian machine.
39+
*/
40+
#define PCACHE_SB_F_BIGENDIAN BIT(0)
41+
42+
struct pcache_sb {
43+
__le32 crc;
44+
__le32 flags;
45+
__le64 magic;
46+
47+
__le32 seg_num;
48+
};
49+
50+
struct pcache_cache_dev {
51+
u32 sb_flags;
52+
u32 seg_num;
53+
void *mapping;
54+
bool use_vmap;
55+
56+
struct dm_dev *dm_dev;
57+
58+
struct mutex seg_lock;
59+
unsigned long *seg_bitmap;
60+
};
61+
62+
struct dm_pcache;
63+
int cache_dev_start(struct dm_pcache *pcache, const char *cache_dev_path);
64+
void cache_dev_stop(struct dm_pcache *pcache);
65+
66+
void cache_dev_zero_range(struct pcache_cache_dev *cache_dev, void *pos, u32 size);
67+
68+
int cache_dev_get_empty_segment_id(struct pcache_cache_dev *cache_dev, u32 *seg_id);
69+
70+
#endif /* _PCACHE_CACHE_DEV_H */

0 commit comments

Comments
 (0)