|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | + |
| 3 | +#include <linux/blkdev.h> |
| 4 | +#include <linux/dax.h> |
| 5 | +#include <linux/vmalloc.h> |
| 6 | +#include <linux/pfn_t.h> |
| 7 | +#include <linux/parser.h> |
| 8 | + |
| 9 | +#include "cache_dev.h" |
| 10 | +#include "backing_dev.h" |
| 11 | +#include "cache.h" |
| 12 | +#include "dm_pcache.h" |
| 13 | + |
| 14 | +static void cache_dev_dax_exit(struct pcache_cache_dev *cache_dev) |
| 15 | +{ |
| 16 | + struct dm_pcache *pcache = CACHE_DEV_TO_PCACHE(cache_dev); |
| 17 | + |
| 18 | + if (cache_dev->use_vmap) |
| 19 | + vunmap(cache_dev->mapping); |
| 20 | + |
| 21 | + dm_put_device(pcache->ti, cache_dev->dm_dev); |
| 22 | +} |
| 23 | + |
| 24 | +static int build_vmap(struct dax_device *dax_dev, long total_pages, void **vaddr) |
| 25 | +{ |
| 26 | + struct page **pages; |
| 27 | + long i = 0, chunk; |
| 28 | + pfn_t pfn; |
| 29 | + int ret; |
| 30 | + |
| 31 | + pages = vmalloc_array(total_pages, sizeof(struct page *)); |
| 32 | + if (!pages) |
| 33 | + return -ENOMEM; |
| 34 | + |
| 35 | + do { |
| 36 | + chunk = dax_direct_access(dax_dev, i, total_pages - i, |
| 37 | + DAX_ACCESS, NULL, &pfn); |
| 38 | + if (chunk <= 0) { |
| 39 | + ret = chunk ? chunk : -EINVAL; |
| 40 | + goto out_free; |
| 41 | + } |
| 42 | + |
| 43 | + if (!pfn_t_has_page(pfn)) { |
| 44 | + ret = -EOPNOTSUPP; |
| 45 | + goto out_free; |
| 46 | + } |
| 47 | + |
| 48 | + while (chunk-- && i < total_pages) { |
| 49 | + pages[i++] = pfn_t_to_page(pfn); |
| 50 | + pfn.val++; |
| 51 | + if (!(i & 15)) |
| 52 | + cond_resched(); |
| 53 | + } |
| 54 | + } while (i < total_pages); |
| 55 | + |
| 56 | + *vaddr = vmap(pages, total_pages, VM_MAP, PAGE_KERNEL); |
| 57 | + if (!*vaddr) |
| 58 | + ret = -ENOMEM; |
| 59 | +out_free: |
| 60 | + vfree(pages); |
| 61 | + return ret; |
| 62 | +} |
| 63 | + |
| 64 | +static int cache_dev_dax_init(struct pcache_cache_dev *cache_dev, const char *path) |
| 65 | +{ |
| 66 | + struct dm_pcache *pcache = CACHE_DEV_TO_PCACHE(cache_dev); |
| 67 | + struct dax_device *dax_dev; |
| 68 | + long total_pages, mapped_pages; |
| 69 | + u64 bdev_size; |
| 70 | + void *vaddr; |
| 71 | + int ret, id; |
| 72 | + pfn_t pfn; |
| 73 | + |
| 74 | + ret = dm_get_device(pcache->ti, path, |
| 75 | + BLK_OPEN_READ | BLK_OPEN_WRITE, &cache_dev->dm_dev); |
| 76 | + if (ret) { |
| 77 | + pcache_dev_err(pcache, "failed to open dm_dev: %s: %d", path, ret); |
| 78 | + goto err; |
| 79 | + } |
| 80 | + |
| 81 | + dax_dev = cache_dev->dm_dev->dax_dev; |
| 82 | + |
| 83 | + /* total size check */ |
| 84 | + bdev_size = bdev_nr_bytes(cache_dev->dm_dev->bdev); |
| 85 | + if (!bdev_size) { |
| 86 | + ret = -ENODEV; |
| 87 | + pcache_dev_err(pcache, "device %s has zero size\n", path); |
| 88 | + goto put_dm; |
| 89 | + } |
| 90 | + |
| 91 | + total_pages = bdev_size >> PAGE_SHIFT; |
| 92 | + /* attempt: direct-map the whole range */ |
| 93 | + id = dax_read_lock(); |
| 94 | + mapped_pages = dax_direct_access(dax_dev, 0, total_pages, |
| 95 | + DAX_ACCESS, &vaddr, &pfn); |
| 96 | + if (mapped_pages < 0) { |
| 97 | + pcache_dev_err(pcache, "dax_direct_access failed: %ld\n", mapped_pages); |
| 98 | + ret = mapped_pages; |
| 99 | + goto unlock; |
| 100 | + } |
| 101 | + |
| 102 | + if (!pfn_t_has_page(pfn)) { |
| 103 | + ret = -EOPNOTSUPP; |
| 104 | + goto unlock; |
| 105 | + } |
| 106 | + |
| 107 | + if (mapped_pages == total_pages) { |
| 108 | + /* success: contiguous direct mapping */ |
| 109 | + cache_dev->mapping = vaddr; |
| 110 | + } else { |
| 111 | + /* need vmap fallback */ |
| 112 | + ret = build_vmap(dax_dev, total_pages, &vaddr); |
| 113 | + if (ret) { |
| 114 | + pcache_dev_err(pcache, "vmap fallback failed: %d\n", ret); |
| 115 | + goto unlock; |
| 116 | + } |
| 117 | + |
| 118 | + cache_dev->mapping = vaddr; |
| 119 | + cache_dev->use_vmap = true; |
| 120 | + } |
| 121 | + dax_read_unlock(id); |
| 122 | + |
| 123 | + return 0; |
| 124 | +unlock: |
| 125 | + dax_read_unlock(id); |
| 126 | +put_dm: |
| 127 | + dm_put_device(pcache->ti, cache_dev->dm_dev); |
| 128 | +err: |
| 129 | + return ret; |
| 130 | +} |
| 131 | + |
| 132 | +void cache_dev_zero_range(struct pcache_cache_dev *cache_dev, void *pos, u32 size) |
| 133 | +{ |
| 134 | + memset(pos, 0, size); |
| 135 | + dax_flush(cache_dev->dm_dev->dax_dev, pos, size); |
| 136 | +} |
| 137 | + |
| 138 | +static int sb_read(struct pcache_cache_dev *cache_dev, struct pcache_sb *sb) |
| 139 | +{ |
| 140 | + struct pcache_sb *sb_addr = CACHE_DEV_SB(cache_dev); |
| 141 | + |
| 142 | + if (copy_mc_to_kernel(sb, sb_addr, sizeof(struct pcache_sb))) |
| 143 | + return -EIO; |
| 144 | + |
| 145 | + return 0; |
| 146 | +} |
| 147 | + |
| 148 | +static void sb_write(struct pcache_cache_dev *cache_dev, struct pcache_sb *sb) |
| 149 | +{ |
| 150 | + struct pcache_sb *sb_addr = CACHE_DEV_SB(cache_dev); |
| 151 | + |
| 152 | + memcpy_flushcache(sb_addr, sb, sizeof(struct pcache_sb)); |
| 153 | + pmem_wmb(); |
| 154 | +} |
| 155 | + |
| 156 | +static int sb_init(struct pcache_cache_dev *cache_dev, struct pcache_sb *sb) |
| 157 | +{ |
| 158 | + struct dm_pcache *pcache = CACHE_DEV_TO_PCACHE(cache_dev); |
| 159 | + u64 nr_segs; |
| 160 | + u64 cache_dev_size; |
| 161 | + u64 magic; |
| 162 | + u32 flags = 0; |
| 163 | + |
| 164 | + magic = le64_to_cpu(sb->magic); |
| 165 | + if (magic) |
| 166 | + return -EEXIST; |
| 167 | + |
| 168 | + cache_dev_size = bdev_nr_bytes(file_bdev(cache_dev->dm_dev->bdev_file)); |
| 169 | + if (cache_dev_size < PCACHE_CACHE_DEV_SIZE_MIN) { |
| 170 | + pcache_dev_err(pcache, "dax device is too small, required at least %llu", |
| 171 | + PCACHE_CACHE_DEV_SIZE_MIN); |
| 172 | + return -ENOSPC; |
| 173 | + } |
| 174 | + |
| 175 | + nr_segs = (cache_dev_size - PCACHE_SEGMENTS_OFF) / ((PCACHE_SEG_SIZE)); |
| 176 | + |
| 177 | +#if defined(__BYTE_ORDER) ? (__BIG_ENDIAN == __BYTE_ORDER) : defined(__BIG_ENDIAN) |
| 178 | + flags |= PCACHE_SB_F_BIGENDIAN; |
| 179 | +#endif |
| 180 | + sb->flags = cpu_to_le32(flags); |
| 181 | + sb->magic = cpu_to_le64(PCACHE_MAGIC); |
| 182 | + sb->seg_num = cpu_to_le32(nr_segs); |
| 183 | + sb->crc = cpu_to_le32(crc32(PCACHE_CRC_SEED, (void *)(sb) + 4, sizeof(struct pcache_sb) - 4)); |
| 184 | + |
| 185 | + cache_dev_zero_range(cache_dev, CACHE_DEV_CACHE_INFO(cache_dev), |
| 186 | + PCACHE_CACHE_INFO_SIZE * PCACHE_META_INDEX_MAX + |
| 187 | + PCACHE_CACHE_CTRL_SIZE); |
| 188 | + |
| 189 | + return 0; |
| 190 | +} |
| 191 | + |
| 192 | +static int sb_validate(struct pcache_cache_dev *cache_dev, struct pcache_sb *sb) |
| 193 | +{ |
| 194 | + struct dm_pcache *pcache = CACHE_DEV_TO_PCACHE(cache_dev); |
| 195 | + u32 flags; |
| 196 | + u32 crc; |
| 197 | + |
| 198 | + if (le64_to_cpu(sb->magic) != PCACHE_MAGIC) { |
| 199 | + pcache_dev_err(pcache, "unexpected magic: %llx\n", |
| 200 | + le64_to_cpu(sb->magic)); |
| 201 | + return -EINVAL; |
| 202 | + } |
| 203 | + |
| 204 | + crc = crc32(PCACHE_CRC_SEED, (void *)(sb) + 4, sizeof(struct pcache_sb) - 4); |
| 205 | + if (crc != le32_to_cpu(sb->crc)) { |
| 206 | + pcache_dev_err(pcache, "corrupted sb: %u, expected: %u\n", crc, le32_to_cpu(sb->crc)); |
| 207 | + return -EINVAL; |
| 208 | + } |
| 209 | + |
| 210 | + flags = le32_to_cpu(sb->flags); |
| 211 | +#if defined(__BYTE_ORDER) ? (__BIG_ENDIAN == __BYTE_ORDER) : defined(__BIG_ENDIAN) |
| 212 | + if (!(flags & PCACHE_SB_F_BIGENDIAN)) { |
| 213 | + pcache_dev_err(pcache, "cache_dev is not big endian\n"); |
| 214 | + return -EINVAL; |
| 215 | + } |
| 216 | +#else |
| 217 | + if (flags & PCACHE_SB_F_BIGENDIAN) { |
| 218 | + pcache_dev_err(pcache, "cache_dev is big endian\n"); |
| 219 | + return -EINVAL; |
| 220 | + } |
| 221 | +#endif |
| 222 | + return 0; |
| 223 | +} |
| 224 | + |
| 225 | +static int cache_dev_init(struct pcache_cache_dev *cache_dev, u32 seg_num) |
| 226 | +{ |
| 227 | + cache_dev->seg_num = seg_num; |
| 228 | + cache_dev->seg_bitmap = bitmap_zalloc(cache_dev->seg_num, GFP_KERNEL); |
| 229 | + if (!cache_dev->seg_bitmap) |
| 230 | + return -ENOMEM; |
| 231 | + |
| 232 | + return 0; |
| 233 | +} |
| 234 | + |
| 235 | +static void cache_dev_exit(struct pcache_cache_dev *cache_dev) |
| 236 | +{ |
| 237 | + bitmap_free(cache_dev->seg_bitmap); |
| 238 | +} |
| 239 | + |
| 240 | +void cache_dev_stop(struct dm_pcache *pcache) |
| 241 | +{ |
| 242 | + struct pcache_cache_dev *cache_dev = &pcache->cache_dev; |
| 243 | + |
| 244 | + cache_dev_exit(cache_dev); |
| 245 | + cache_dev_dax_exit(cache_dev); |
| 246 | +} |
| 247 | + |
| 248 | +int cache_dev_start(struct dm_pcache *pcache, const char *cache_dev_path) |
| 249 | +{ |
| 250 | + struct pcache_cache_dev *cache_dev = &pcache->cache_dev; |
| 251 | + struct pcache_sb sb; |
| 252 | + bool format = false; |
| 253 | + int ret; |
| 254 | + |
| 255 | + mutex_init(&cache_dev->seg_lock); |
| 256 | + |
| 257 | + ret = cache_dev_dax_init(cache_dev, cache_dev_path); |
| 258 | + if (ret) { |
| 259 | + pcache_dev_err(pcache, "failed to init cache_dev via dax way: %d.", ret); |
| 260 | + goto err; |
| 261 | + } |
| 262 | + |
| 263 | + ret = sb_read(cache_dev, &sb); |
| 264 | + if (ret) |
| 265 | + goto dax_release; |
| 266 | + |
| 267 | + if (le64_to_cpu(sb.magic) == 0) { |
| 268 | + format = true; |
| 269 | + ret = sb_init(cache_dev, &sb); |
| 270 | + if (ret < 0) |
| 271 | + goto dax_release; |
| 272 | + } |
| 273 | + |
| 274 | + ret = sb_validate(cache_dev, &sb); |
| 275 | + if (ret) |
| 276 | + goto dax_release; |
| 277 | + |
| 278 | + cache_dev->sb_flags = le32_to_cpu(sb.flags); |
| 279 | + ret = cache_dev_init(cache_dev, sb.seg_num); |
| 280 | + if (ret) |
| 281 | + goto dax_release; |
| 282 | + |
| 283 | + if (format) |
| 284 | + sb_write(cache_dev, &sb); |
| 285 | + |
| 286 | + return 0; |
| 287 | + |
| 288 | +dax_release: |
| 289 | + cache_dev_dax_exit(cache_dev); |
| 290 | +err: |
| 291 | + return ret; |
| 292 | +} |
| 293 | + |
| 294 | +int cache_dev_get_empty_segment_id(struct pcache_cache_dev *cache_dev, u32 *seg_id) |
| 295 | +{ |
| 296 | + int ret; |
| 297 | + |
| 298 | + mutex_lock(&cache_dev->seg_lock); |
| 299 | + *seg_id = find_next_zero_bit(cache_dev->seg_bitmap, cache_dev->seg_num, 0); |
| 300 | + if (*seg_id == cache_dev->seg_num) { |
| 301 | + ret = -ENOSPC; |
| 302 | + goto unlock; |
| 303 | + } |
| 304 | + |
| 305 | + set_bit(*seg_id, cache_dev->seg_bitmap); |
| 306 | + ret = 0; |
| 307 | +unlock: |
| 308 | + mutex_unlock(&cache_dev->seg_lock); |
| 309 | + return ret; |
| 310 | +} |
0 commit comments