Skip to content

Commit 0bd3285

Browse files
bvanasschekawasaki
authored andcommitted
block: Introduce blkdev_copy_offload()
Introduce blkdev_copy_offload() for performing copy offloading. This function implements the algorithm explained the description of the previous patch. If the input parameters exceed what can be supported with a single copy offload operation, multiple copy offload operations are submitted. Signed-off-by: Bart Van Assche <[email protected]>
1 parent 6be6e4d commit 0bd3285

4 files changed

Lines changed: 397 additions & 1 deletion

File tree

block/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
77
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
88
blk-merge.o blk-timeout.o blk-lib.o blk-mq.o \
9-
blk-mq-tag.o blk-mq-dma.o blk-stat.o \
9+
blk-mq-tag.o blk-mq-dma.o blk-stat.o blk-copy.o \
1010
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
1111
genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \
1212
disk-events.o blk-ia-ranges.o early-lookup.o

block/blk-copy.c

Lines changed: 355 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,355 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Offloaded and onloaded data copying support.
4+
*/
5+
#include <linux/bio.h>
6+
#include <linux/blkdev.h>
7+
#include <linux/blk-copy.h>
8+
#include <linux/blk-mq.h>
9+
10+
/* End all bios in the @ctx->bios list with status @ctx->status. */
11+
static void blkdev_end_bios(struct bio_copy_offload_ctx *ctx)
12+
{
13+
struct bio *bio, *next;
14+
15+
bio = ctx->bios;
16+
ctx->bios = NULL;
17+
for (; bio; bio = next) {
18+
next = bio->bi_next;
19+
bio->bi_status = ctx->status;
20+
bio_endio(bio);
21+
}
22+
}
23+
24+
/*
25+
* Called after LBA translation finished for all bios associated with copy context
26+
* @ctx.
27+
*/
28+
static void blkdev_translation_complete(struct bio_copy_offload_ctx *ctx)
29+
{
30+
struct module *owner = NULL;
31+
struct bio *bio;
32+
33+
WARN_ON_ONCE(ctx->phase != BLKDEV_TRANSLATE_LBAS);
34+
ctx->phase = BLKDEV_COPY;
35+
36+
/* Check whether all bios are associated with the same block driver. */
37+
for (bio = ctx->bios; bio; bio = bio->bi_next) {
38+
if (!owner) {
39+
owner = bio->bi_bdev->bd_disk->fops->owner;
40+
} else if (owner != bio->bi_bdev->bd_disk->fops->owner) {
41+
ctx->status = BLK_STS_INVAL;
42+
break;
43+
}
44+
}
45+
46+
/* Remove the first bio from the bio list and submit it. */
47+
bio = ctx->bios;
48+
ctx->bios = bio->bi_next;
49+
bio->bi_next = NULL;
50+
if (ctx->biotail == bio)
51+
ctx->biotail = NULL;
52+
if (ctx->status == BLK_STS_OK)
53+
submit_bio(bio);
54+
else
55+
bio_endio(bio);
56+
}
57+
58+
/* REQ_OP_COPY_* completion handler. */
59+
static void blkdev_req_op_copy_done(struct bio *bio)
60+
{
61+
struct bio_copy_offload_ctx *ctx = bio->bi_copy_ctx;
62+
struct blk_copy_params *params = ctx->params;
63+
blk_status_t status;
64+
65+
switch (ctx->phase) {
66+
case BLKDEV_TRANSLATE_LBAS:
67+
scoped_guard(spinlock_irqsave, &ctx->lock)
68+
if (!ctx->status)
69+
ctx->status = bio->bi_status;
70+
break;
71+
case BLKDEV_COPY:
72+
status = ctx->status;
73+
ctx->phase = BLKDEV_COPY_DONE;
74+
blkdev_end_bios(ctx);
75+
kfree(ctx);
76+
scoped_guard(spinlock_irqsave, &params->lock) {
77+
if (!params->status)
78+
params->status = status;
79+
}
80+
if (atomic_dec_and_test(&params->copy_ctx_count))
81+
params->end_io(params);
82+
break;
83+
case BLKDEV_COPY_DONE:
84+
break;
85+
}
86+
}
87+
88+
/*
89+
* Check that all LBA offsets are aligned with both the source and the destination
90+
* logical block sizes. Compare input and output length. Store the number of bytes
91+
* to be transferred in *@len.
92+
*/
93+
static int blkdev_copy_check_params(const struct blk_copy_params *params,
94+
loff_t *len)
95+
{
96+
const unsigned int mask =
97+
max(bdev_logical_block_size(params->in_bdev),
98+
bdev_logical_block_size(params->out_bdev)) - 1;
99+
loff_t in_len = 0, out_len = 0;
100+
unsigned int i;
101+
102+
for (i = 0; i < params->in_nseg; i++) {
103+
if ((params->in_segs[i].pos | params->in_segs[i].len) & mask)
104+
return -EINVAL;
105+
in_len += params->in_segs[i].len;
106+
}
107+
108+
for (i = 0; i < params->out_nseg; i++) {
109+
if ((params->out_segs[i].pos | params->out_segs[i].len) & mask)
110+
return -EINVAL;
111+
out_len += params->out_segs[i].len;
112+
}
113+
114+
if (in_len != out_len)
115+
return -EINVAL;
116+
117+
*len = in_len;
118+
119+
return 0;
120+
}
121+
122+
/*
123+
* Calculate the number of bytes in the max_copy_src_segments input segments
124+
* starting from input segment @in_idx.
125+
*/
126+
static loff_t blk_max_src_len(const struct blk_copy_params *params,
127+
unsigned int in_idx)
128+
{
129+
uint16_t max_src_segments =
130+
params->in_bdev->bd_queue->limits.max_copy_src_segments;
131+
unsigned int max_i = min(params->in_nseg, in_idx + max_src_segments);
132+
loff_t len = 0;
133+
134+
for (uint32_t i = in_idx; i < max_i; i++)
135+
len += params->in_segs[i].len;
136+
137+
return len;
138+
}
139+
140+
/*
141+
* Calculate the number of bytes in the max_copy_dst_segments output segments
142+
* starting from output segment @out_idx.
143+
*/
144+
static loff_t blk_max_dst_len(const struct blk_copy_params *params,
145+
unsigned int out_idx)
146+
{
147+
uint16_t max_dst_segments =
148+
params->out_bdev->bd_queue->limits.max_copy_dst_segments;
149+
unsigned int max_i = min(params->out_nseg, out_idx + max_dst_segments);
150+
loff_t len = 0;
151+
152+
for (uint32_t i = out_idx; i < max_i; i++)
153+
len += params->out_segs[i].len;
154+
155+
return len;
156+
}
157+
158+
struct blkdev_copy_sync_ctx {
159+
struct completion compl;
160+
blk_status_t status;
161+
};
162+
163+
static void blkdev_end_copy_sync(const struct blk_copy_params *params)
164+
{
165+
struct blkdev_copy_sync_ctx *ctx = params->private;
166+
167+
complete(&ctx->compl);
168+
}
169+
170+
static int blkdev_copy_sync(struct blk_copy_params *params)
171+
{
172+
struct blkdev_copy_sync_ctx ctx = {
173+
.compl = COMPLETION_INITIALIZER_ONSTACK(ctx.compl),
174+
};
175+
int ret;
176+
177+
WARN_ON_ONCE(params->end_io || params->private);
178+
params->end_io = blkdev_end_copy_sync;
179+
params->private = &ctx;
180+
181+
ret = blkdev_copy_offload(params);
182+
if (ret && ret != -EIOCBQUEUED)
183+
return ret;
184+
185+
wait_for_completion(&ctx.compl);
186+
return blk_status_to_errno(ctx.status);
187+
}
188+
189+
/**
190+
* blkdev_copy_chunk() - submit a single copy offload operation
191+
* @params: Copy offload input parameters.
192+
* @in_idx: Index of the input segment from where to start copying.
193+
* @out_idx: Index of the output segment to where to start copying.
194+
* @in_offset: Offset in bytes from the start of input segment @in_idx.
195+
* @out_offset: Offset in bytes from the start of output segment @out_idx.
196+
* @chunk: Maximum number of bytes to copy.
197+
*
198+
* Returns: the number of bytes covered by the submitted copy operation or a
199+
* negative error number.
200+
*/
201+
static loff_t blkdev_copy_chunk(struct blk_copy_params *params, u32 *in_idx,
202+
u32 *out_idx, loff_t *in_offset,
203+
loff_t *out_offset, loff_t chunk)
204+
{
205+
struct bio_copy_offload_ctx *ctx;
206+
u32 bio_count;
207+
208+
ctx = kzalloc_obj(*ctx);
209+
if (!ctx)
210+
return -ENOMEM;
211+
212+
spin_lock_init(&ctx->lock);
213+
ctx->params = params;
214+
ctx->phase = BLKDEV_TRANSLATE_LBAS;
215+
ctx->translation_complete = blkdev_translation_complete;
216+
/*
217+
* Initialized to one to prevent that ctx->translation_complete() is
218+
* called before bio submission has finished.
219+
*/
220+
ctx->bio_count = 1;
221+
222+
WARN_ON_ONCE(chunk <= 0);
223+
chunk = min(chunk, blk_max_src_len(params, *in_idx) - *in_offset);
224+
WARN_ON_ONCE(chunk <= 0);
225+
chunk = min(chunk, blk_max_dst_len(params, *out_idx) - *out_offset);
226+
WARN_ON_ONCE(chunk <= 0);
227+
ctx->len = chunk;
228+
for (loff_t bytes, remaining_in = chunk; remaining_in > 0;
229+
remaining_in -= bytes) {
230+
struct bio *src_bio;
231+
232+
src_bio = bio_alloc(params->in_bdev, 0, REQ_OP_COPY_SRC,
233+
GFP_NOIO);
234+
if (!src_bio) {
235+
if (remaining_in == chunk)
236+
goto free_ctx;
237+
else
238+
goto enomem;
239+
}
240+
atomic_inc(&params->copy_ctx_count);
241+
scoped_guard(spinlock_irqsave, &ctx->lock)
242+
ctx->bio_count++;
243+
bytes = min(remaining_in, params->in_segs[*in_idx].len -
244+
*in_offset);
245+
src_bio->bi_iter.bi_size = bytes;
246+
src_bio->bi_iter.bi_sector = (params->in_segs[*in_idx].pos +
247+
*in_offset) >> SECTOR_SHIFT;
248+
src_bio->bi_copy_ctx = ctx;
249+
src_bio->bi_end_io = blkdev_req_op_copy_done;
250+
*in_offset += bytes;
251+
if (*in_offset >= params->in_segs[*in_idx].len) {
252+
*in_offset -= params->in_segs[*in_idx].len;
253+
(*in_idx)++;
254+
}
255+
submit_bio(src_bio);
256+
}
257+
for (loff_t bytes, remaining_out = chunk; remaining_out;
258+
remaining_out -= bytes) {
259+
struct bio *dst_bio;
260+
261+
dst_bio = bio_alloc(params->out_bdev, 0, REQ_OP_COPY_DST,
262+
GFP_NOIO);
263+
if (!dst_bio)
264+
goto enomem;
265+
scoped_guard(spinlock_irqsave, &ctx->lock)
266+
ctx->bio_count++;
267+
bytes = min(remaining_out, params->out_segs[*out_idx].len -
268+
*out_offset);
269+
dst_bio->bi_iter.bi_size = bytes;
270+
dst_bio->bi_iter.bi_sector = (params->out_segs[*out_idx].pos +
271+
*out_offset) >> SECTOR_SHIFT;
272+
dst_bio->bi_copy_ctx = ctx;
273+
dst_bio->bi_end_io = blkdev_req_op_copy_done;
274+
*out_offset += bytes;
275+
if (*out_offset >= params->out_segs[*out_idx].len) {
276+
*out_offset -= params->out_segs[*out_idx].len;
277+
(*out_idx)++;
278+
}
279+
submit_bio(dst_bio);
280+
}
281+
282+
dec_bio_count:
283+
scoped_guard(spinlock_irqsave, &ctx->lock)
284+
bio_count = --ctx->bio_count;
285+
if (bio_count == 0)
286+
ctx->translation_complete(ctx);
287+
return chunk;
288+
289+
enomem:
290+
scoped_guard(spinlock_irqsave, &ctx->lock)
291+
if (!ctx->status)
292+
ctx->status = BLK_STS_RESOURCE;
293+
chunk = -ENOMEM;
294+
goto dec_bio_count;
295+
296+
free_ctx:
297+
kfree(ctx);
298+
return -ENOMEM;
299+
}
300+
301+
/**
302+
* blkdev_copy_offload() - copy data and offload copying if possible.
303+
* @params: Source and destination block device, data ranges and completion
304+
* callback.
305+
*
306+
* If @params->end_io != NULL, data is copied asynchronously. If @params->end_io
307+
* == NULL, this function only returns after data copying finished.
308+
*
309+
* Return: 0 upon success; -EIOCBQUEUED if the completion callback function will
310+
* be called or has already been called; -EOPNOTSUPP if copy offloading is
311+
* not supported by the block device or if the source or destination
312+
* address ranges span more than one dm device.
313+
*/
314+
int blkdev_copy_offload(struct blk_copy_params *params)
315+
{
316+
loff_t in_offset = 0, out_offset = 0;
317+
u32 in_idx = 0, out_idx = 0;
318+
loff_t len, chunk, max_chunk;
319+
int ret;
320+
321+
might_sleep();
322+
323+
if (!params->end_io)
324+
return blkdev_copy_sync(params);
325+
326+
spin_lock_init(&params->lock);
327+
328+
if (!bdev_max_copy_sectors(params->in_bdev) ||
329+
!bdev_max_copy_sectors(params->out_bdev))
330+
return -EOPNOTSUPP;
331+
332+
ret = blkdev_copy_check_params(params, &len);
333+
if (ret)
334+
return ret;
335+
336+
params->len = len;
337+
338+
max_chunk = (u64)min(bdev_max_copy_sectors(params->in_bdev),
339+
bdev_max_copy_sectors(params->out_bdev))
340+
<< SECTOR_SHIFT;
341+
342+
atomic_set(&params->copy_ctx_count, 1);
343+
344+
for (loff_t offset = 0; offset < len; offset += chunk) {
345+
chunk = min(len - offset, max_chunk);
346+
chunk = blkdev_copy_chunk(params, &in_idx, &out_idx, &in_offset,
347+
&out_offset, chunk);
348+
}
349+
350+
if (atomic_dec_and_test(&params->copy_ctx_count))
351+
params->end_io(params);
352+
353+
return -EIOCBQUEUED;
354+
}
355+
EXPORT_SYMBOL_GPL(blkdev_copy_offload);

0 commit comments

Comments
 (0)