|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
| 2 | +/* |
| 3 | + * Offloaded and onloaded data copying support. |
| 4 | + */ |
| 5 | +#include <linux/bio.h> |
| 6 | +#include <linux/blkdev.h> |
| 7 | +#include <linux/blk-copy.h> |
| 8 | +#include <linux/blk-mq.h> |
| 9 | + |
| 10 | +/* End all bios in the @ctx->bios list with status @ctx->status. */ |
| 11 | +static void blkdev_end_bios(struct bio_copy_offload_ctx *ctx) |
| 12 | +{ |
| 13 | + struct bio *bio, *next; |
| 14 | + |
| 15 | + bio = ctx->bios; |
| 16 | + ctx->bios = NULL; |
| 17 | + for (; bio; bio = next) { |
| 18 | + next = bio->bi_next; |
| 19 | + bio->bi_status = ctx->status; |
| 20 | + bio_endio(bio); |
| 21 | + } |
| 22 | +} |
| 23 | + |
| 24 | +/* |
| 25 | + * Called after LBA translation finished for all bios associated with copy context |
| 26 | + * @ctx. |
| 27 | + */ |
| 28 | +static void blkdev_translation_complete(struct bio_copy_offload_ctx *ctx) |
| 29 | +{ |
| 30 | + struct module *owner = NULL; |
| 31 | + struct bio *bio; |
| 32 | + |
| 33 | + WARN_ON_ONCE(ctx->phase != BLKDEV_TRANSLATE_LBAS); |
| 34 | + ctx->phase = BLKDEV_COPY; |
| 35 | + |
| 36 | + /* Check whether all bios are associated with the same block driver. */ |
| 37 | + for (bio = ctx->bios; bio; bio = bio->bi_next) { |
| 38 | + if (!owner) { |
| 39 | + owner = bio->bi_bdev->bd_disk->fops->owner; |
| 40 | + } else if (owner != bio->bi_bdev->bd_disk->fops->owner) { |
| 41 | + ctx->status = BLK_STS_INVAL; |
| 42 | + break; |
| 43 | + } |
| 44 | + } |
| 45 | + |
| 46 | + /* Remove the first bio from the bio list and submit it. */ |
| 47 | + bio = ctx->bios; |
| 48 | + ctx->bios = bio->bi_next; |
| 49 | + bio->bi_next = NULL; |
| 50 | + if (ctx->biotail == bio) |
| 51 | + ctx->biotail = NULL; |
| 52 | + if (ctx->status == BLK_STS_OK) |
| 53 | + submit_bio(bio); |
| 54 | + else |
| 55 | + bio_endio(bio); |
| 56 | +} |
| 57 | + |
| 58 | +/* REQ_OP_COPY_* completion handler. */ |
| 59 | +static void blkdev_req_op_copy_done(struct bio *bio) |
| 60 | +{ |
| 61 | + struct bio_copy_offload_ctx *ctx = bio->bi_copy_ctx; |
| 62 | + struct blk_copy_params *params = ctx->params; |
| 63 | + blk_status_t status; |
| 64 | + |
| 65 | + switch (ctx->phase) { |
| 66 | + case BLKDEV_TRANSLATE_LBAS: |
| 67 | + scoped_guard(spinlock_irqsave, &ctx->lock) |
| 68 | + if (!ctx->status) |
| 69 | + ctx->status = bio->bi_status; |
| 70 | + break; |
| 71 | + case BLKDEV_COPY: |
| 72 | + status = ctx->status; |
| 73 | + ctx->phase = BLKDEV_COPY_DONE; |
| 74 | + blkdev_end_bios(ctx); |
| 75 | + kfree(ctx); |
| 76 | + scoped_guard(spinlock_irqsave, ¶ms->lock) { |
| 77 | + if (!params->status) |
| 78 | + params->status = status; |
| 79 | + } |
| 80 | + if (atomic_dec_and_test(¶ms->copy_ctx_count)) |
| 81 | + params->end_io(params); |
| 82 | + break; |
| 83 | + case BLKDEV_COPY_DONE: |
| 84 | + break; |
| 85 | + } |
| 86 | +} |
| 87 | + |
| 88 | +/* |
| 89 | + * Check that all LBA offsets are aligned with both the source and the destination |
| 90 | + * logical block sizes. Compare input and output length. Store the number of bytes |
| 91 | + * to be transferred in *@len. |
| 92 | + */ |
| 93 | +static int blkdev_copy_check_params(const struct blk_copy_params *params, |
| 94 | + loff_t *len) |
| 95 | +{ |
| 96 | + const unsigned int mask = |
| 97 | + max(bdev_logical_block_size(params->in_bdev), |
| 98 | + bdev_logical_block_size(params->out_bdev)) - 1; |
| 99 | + loff_t in_len = 0, out_len = 0; |
| 100 | + unsigned int i; |
| 101 | + |
| 102 | + for (i = 0; i < params->in_nseg; i++) { |
| 103 | + if ((params->in_segs[i].pos | params->in_segs[i].len) & mask) |
| 104 | + return -EINVAL; |
| 105 | + in_len += params->in_segs[i].len; |
| 106 | + } |
| 107 | + |
| 108 | + for (i = 0; i < params->out_nseg; i++) { |
| 109 | + if ((params->out_segs[i].pos | params->out_segs[i].len) & mask) |
| 110 | + return -EINVAL; |
| 111 | + out_len += params->out_segs[i].len; |
| 112 | + } |
| 113 | + |
| 114 | + if (in_len != out_len) |
| 115 | + return -EINVAL; |
| 116 | + |
| 117 | + *len = in_len; |
| 118 | + |
| 119 | + return 0; |
| 120 | +} |
| 121 | + |
| 122 | +/* |
| 123 | + * Calculate the number of bytes in the max_copy_src_segments input segments |
| 124 | + * starting from input segment @in_idx. |
| 125 | + */ |
| 126 | +static loff_t blk_max_src_len(const struct blk_copy_params *params, |
| 127 | + unsigned int in_idx) |
| 128 | +{ |
| 129 | + uint16_t max_src_segments = |
| 130 | + params->in_bdev->bd_queue->limits.max_copy_src_segments; |
| 131 | + unsigned int max_i = min(params->in_nseg, in_idx + max_src_segments); |
| 132 | + loff_t len = 0; |
| 133 | + |
| 134 | + for (uint32_t i = in_idx; i < max_i; i++) |
| 135 | + len += params->in_segs[i].len; |
| 136 | + |
| 137 | + return len; |
| 138 | +} |
| 139 | + |
| 140 | +/* |
| 141 | + * Calculate the number of bytes in the max_copy_dst_segments output segments |
| 142 | + * starting from output segment @out_idx. |
| 143 | + */ |
| 144 | +static loff_t blk_max_dst_len(const struct blk_copy_params *params, |
| 145 | + unsigned int out_idx) |
| 146 | +{ |
| 147 | + uint16_t max_dst_segments = |
| 148 | + params->out_bdev->bd_queue->limits.max_copy_dst_segments; |
| 149 | + unsigned int max_i = min(params->out_nseg, out_idx + max_dst_segments); |
| 150 | + loff_t len = 0; |
| 151 | + |
| 152 | + for (uint32_t i = out_idx; i < max_i; i++) |
| 153 | + len += params->out_segs[i].len; |
| 154 | + |
| 155 | + return len; |
| 156 | +} |
| 157 | + |
| 158 | +struct blkdev_copy_sync_ctx { |
| 159 | + struct completion compl; |
| 160 | + blk_status_t status; |
| 161 | +}; |
| 162 | + |
| 163 | +static void blkdev_end_copy_sync(const struct blk_copy_params *params) |
| 164 | +{ |
| 165 | + struct blkdev_copy_sync_ctx *ctx = params->private; |
| 166 | + |
| 167 | + complete(&ctx->compl); |
| 168 | +} |
| 169 | + |
| 170 | +static int blkdev_copy_sync(struct blk_copy_params *params) |
| 171 | +{ |
| 172 | + struct blkdev_copy_sync_ctx ctx = { |
| 173 | + .compl = COMPLETION_INITIALIZER_ONSTACK(ctx.compl), |
| 174 | + }; |
| 175 | + int ret; |
| 176 | + |
| 177 | + WARN_ON_ONCE(params->end_io || params->private); |
| 178 | + params->end_io = blkdev_end_copy_sync; |
| 179 | + params->private = &ctx; |
| 180 | + |
| 181 | + ret = blkdev_copy_offload(params); |
| 182 | + if (ret && ret != -EIOCBQUEUED) |
| 183 | + return ret; |
| 184 | + |
| 185 | + wait_for_completion(&ctx.compl); |
| 186 | + return blk_status_to_errno(ctx.status); |
| 187 | +} |
| 188 | + |
| 189 | +/** |
| 190 | + * blkdev_copy_chunk() - submit a single copy offload operation |
| 191 | + * @params: Copy offload input parameters. |
| 192 | + * @in_idx: Index of the input segment from where to start copying. |
| 193 | + * @out_idx: Index of the output segment to where to start copying. |
| 194 | + * @in_offset: Offset in bytes from the start of input segment @in_idx. |
| 195 | + * @out_offset: Offset in bytes from the start of output segment @out_idx. |
| 196 | + * @chunk: Maximum number of bytes to copy. |
| 197 | + * |
| 198 | + * Returns: the number of bytes covered by the submitted copy operation or a |
| 199 | + * negative error number. |
| 200 | + */ |
| 201 | +static loff_t blkdev_copy_chunk(struct blk_copy_params *params, u32 *in_idx, |
| 202 | + u32 *out_idx, loff_t *in_offset, |
| 203 | + loff_t *out_offset, loff_t chunk) |
| 204 | +{ |
| 205 | + struct bio_copy_offload_ctx *ctx; |
| 206 | + u32 bio_count; |
| 207 | + |
| 208 | + ctx = kzalloc_obj(*ctx); |
| 209 | + if (!ctx) |
| 210 | + return -ENOMEM; |
| 211 | + |
| 212 | + spin_lock_init(&ctx->lock); |
| 213 | + ctx->params = params; |
| 214 | + ctx->phase = BLKDEV_TRANSLATE_LBAS; |
| 215 | + ctx->translation_complete = blkdev_translation_complete; |
| 216 | + /* |
| 217 | + * Initialized to one to prevent that ctx->translation_complete() is |
| 218 | + * called before bio submission has finished. |
| 219 | + */ |
| 220 | + ctx->bio_count = 1; |
| 221 | + |
| 222 | + WARN_ON_ONCE(chunk <= 0); |
| 223 | + chunk = min(chunk, blk_max_src_len(params, *in_idx) - *in_offset); |
| 224 | + WARN_ON_ONCE(chunk <= 0); |
| 225 | + chunk = min(chunk, blk_max_dst_len(params, *out_idx) - *out_offset); |
| 226 | + WARN_ON_ONCE(chunk <= 0); |
| 227 | + ctx->len = chunk; |
| 228 | + for (loff_t bytes, remaining_in = chunk; remaining_in > 0; |
| 229 | + remaining_in -= bytes) { |
| 230 | + struct bio *src_bio; |
| 231 | + |
| 232 | + src_bio = bio_alloc(params->in_bdev, 0, REQ_OP_COPY_SRC, |
| 233 | + GFP_NOIO); |
| 234 | + if (!src_bio) { |
| 235 | + if (remaining_in == chunk) |
| 236 | + goto free_ctx; |
| 237 | + else |
| 238 | + goto enomem; |
| 239 | + } |
| 240 | + atomic_inc(¶ms->copy_ctx_count); |
| 241 | + scoped_guard(spinlock_irqsave, &ctx->lock) |
| 242 | + ctx->bio_count++; |
| 243 | + bytes = min(remaining_in, params->in_segs[*in_idx].len - |
| 244 | + *in_offset); |
| 245 | + src_bio->bi_iter.bi_size = bytes; |
| 246 | + src_bio->bi_iter.bi_sector = (params->in_segs[*in_idx].pos + |
| 247 | + *in_offset) >> SECTOR_SHIFT; |
| 248 | + src_bio->bi_copy_ctx = ctx; |
| 249 | + src_bio->bi_end_io = blkdev_req_op_copy_done; |
| 250 | + *in_offset += bytes; |
| 251 | + if (*in_offset >= params->in_segs[*in_idx].len) { |
| 252 | + *in_offset -= params->in_segs[*in_idx].len; |
| 253 | + (*in_idx)++; |
| 254 | + } |
| 255 | + submit_bio(src_bio); |
| 256 | + } |
| 257 | + for (loff_t bytes, remaining_out = chunk; remaining_out; |
| 258 | + remaining_out -= bytes) { |
| 259 | + struct bio *dst_bio; |
| 260 | + |
| 261 | + dst_bio = bio_alloc(params->out_bdev, 0, REQ_OP_COPY_DST, |
| 262 | + GFP_NOIO); |
| 263 | + if (!dst_bio) |
| 264 | + goto enomem; |
| 265 | + scoped_guard(spinlock_irqsave, &ctx->lock) |
| 266 | + ctx->bio_count++; |
| 267 | + bytes = min(remaining_out, params->out_segs[*out_idx].len - |
| 268 | + *out_offset); |
| 269 | + dst_bio->bi_iter.bi_size = bytes; |
| 270 | + dst_bio->bi_iter.bi_sector = (params->out_segs[*out_idx].pos + |
| 271 | + *out_offset) >> SECTOR_SHIFT; |
| 272 | + dst_bio->bi_copy_ctx = ctx; |
| 273 | + dst_bio->bi_end_io = blkdev_req_op_copy_done; |
| 274 | + *out_offset += bytes; |
| 275 | + if (*out_offset >= params->out_segs[*out_idx].len) { |
| 276 | + *out_offset -= params->out_segs[*out_idx].len; |
| 277 | + (*out_idx)++; |
| 278 | + } |
| 279 | + submit_bio(dst_bio); |
| 280 | + } |
| 281 | + |
| 282 | +dec_bio_count: |
| 283 | + scoped_guard(spinlock_irqsave, &ctx->lock) |
| 284 | + bio_count = --ctx->bio_count; |
| 285 | + if (bio_count == 0) |
| 286 | + ctx->translation_complete(ctx); |
| 287 | + return chunk; |
| 288 | + |
| 289 | +enomem: |
| 290 | + scoped_guard(spinlock_irqsave, &ctx->lock) |
| 291 | + if (!ctx->status) |
| 292 | + ctx->status = BLK_STS_RESOURCE; |
| 293 | + chunk = -ENOMEM; |
| 294 | + goto dec_bio_count; |
| 295 | + |
| 296 | +free_ctx: |
| 297 | + kfree(ctx); |
| 298 | + return -ENOMEM; |
| 299 | +} |
| 300 | + |
| 301 | +/** |
| 302 | + * blkdev_copy_offload() - copy data and offload copying if possible. |
| 303 | + * @params: Source and destination block device, data ranges and completion |
| 304 | + * callback. |
| 305 | + * |
| 306 | + * If @params->end_io != NULL, data is copied asynchronously. If @params->end_io |
| 307 | + * == NULL, this function only returns after data copying finished. |
| 308 | + * |
| 309 | + * Return: 0 upon success; -EIOCBQUEUED if the completion callback function will |
| 310 | + * be called or has already been called; -EOPNOTSUPP if copy offloading is |
| 311 | + * not supported by the block device or if the source or destination |
| 312 | + * address ranges span more than one dm device. |
| 313 | + */ |
| 314 | +int blkdev_copy_offload(struct blk_copy_params *params) |
| 315 | +{ |
| 316 | + loff_t in_offset = 0, out_offset = 0; |
| 317 | + u32 in_idx = 0, out_idx = 0; |
| 318 | + loff_t len, chunk, max_chunk; |
| 319 | + int ret; |
| 320 | + |
| 321 | + might_sleep(); |
| 322 | + |
| 323 | + if (!params->end_io) |
| 324 | + return blkdev_copy_sync(params); |
| 325 | + |
| 326 | + spin_lock_init(¶ms->lock); |
| 327 | + |
| 328 | + if (!bdev_max_copy_sectors(params->in_bdev) || |
| 329 | + !bdev_max_copy_sectors(params->out_bdev)) |
| 330 | + return -EOPNOTSUPP; |
| 331 | + |
| 332 | + ret = blkdev_copy_check_params(params, &len); |
| 333 | + if (ret) |
| 334 | + return ret; |
| 335 | + |
| 336 | + params->len = len; |
| 337 | + |
| 338 | + max_chunk = (u64)min(bdev_max_copy_sectors(params->in_bdev), |
| 339 | + bdev_max_copy_sectors(params->out_bdev)) |
| 340 | + << SECTOR_SHIFT; |
| 341 | + |
| 342 | + atomic_set(¶ms->copy_ctx_count, 1); |
| 343 | + |
| 344 | + for (loff_t offset = 0; offset < len; offset += chunk) { |
| 345 | + chunk = min(len - offset, max_chunk); |
| 346 | + chunk = blkdev_copy_chunk(params, &in_idx, &out_idx, &in_offset, |
| 347 | + &out_offset, chunk); |
| 348 | + } |
| 349 | + |
| 350 | + if (atomic_dec_and_test(¶ms->copy_ctx_count)) |
| 351 | + params->end_io(params); |
| 352 | + |
| 353 | + return -EIOCBQUEUED; |
| 354 | +} |
| 355 | +EXPORT_SYMBOL_GPL(blkdev_copy_offload); |
0 commit comments