Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion block/bio.c
Original file line number Diff line number Diff line change
Expand Up @@ -981,7 +981,7 @@ void __bio_add_page(struct bio *bio, struct page *page,
WARN_ON_ONCE(bio_full(bio, len));

if (is_pci_p2pdma_page(page))
bio->bi_opf |= REQ_P2PDMA | REQ_NOMERGE;
bio->bi_opf |= REQ_NOMERGE;

bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, off);
bio->bi_iter.bi_size += len;
Expand Down
237 changes: 143 additions & 94 deletions block/blk-mq-dma.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,41 @@
/*
* Copyright (C) 2025 Christoph Hellwig
*/
#include <linux/blk-integrity.h>
#include <linux/blk-mq-dma.h>
#include "blk.h"

struct phys_vec {
phys_addr_t paddr;
u32 len;
};
static bool __blk_map_iter_next(struct blk_map_iter *iter)
{
if (iter->iter.bi_size)
return true;
if (!iter->bio || !iter->bio->bi_next)
return false;

static bool blk_map_iter_next(struct request *req, struct req_iterator *iter,
struct phys_vec *vec)
iter->bio = iter->bio->bi_next;
if (iter->is_integrity) {
iter->iter = iter->bio->bi_integrity->bip_iter;
iter->bvec = iter->bio->bi_integrity->bip_vec;
} else {
iter->iter = iter->bio->bi_iter;
iter->bvec = iter->bio->bi_io_vec;
}
return true;
}

static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter)
{
unsigned int max_size;
struct bio_vec bv;

if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
if (!iter->bio)
return false;
vec->paddr = bvec_phys(&req->special_vec);
vec->len = req->special_vec.bv_len;
iter->bio = NULL;
return true;
}

if (!iter->iter.bi_size)
return false;

bv = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
vec->paddr = bvec_phys(&bv);
max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX);
bv = mp_bvec_iter_bvec(iter->bvec, iter->iter);
iter->paddr = bvec_phys(&bv);
max_size = get_max_segment_size(&req->q->limits, iter->paddr, UINT_MAX);
bv.bv_len = min(bv.bv_len, max_size);
bio_advance_iter_single(iter->bio, &iter->iter, bv.bv_len);
bvec_iter_advance_single(iter->bvec, &iter->iter, bv.bv_len);

/*
* If we are entirely done with this bi_io_vec entry, check if the next
Expand All @@ -42,23 +46,19 @@ static bool blk_map_iter_next(struct request *req, struct req_iterator *iter,
while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) {
struct bio_vec next;

if (!iter->iter.bi_size) {
if (!iter->bio->bi_next)
break;
iter->bio = iter->bio->bi_next;
iter->iter = iter->bio->bi_iter;
}
if (!__blk_map_iter_next(iter))
break;

next = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
next = mp_bvec_iter_bvec(iter->bvec, iter->iter);
if (bv.bv_len + next.bv_len > max_size ||
!biovec_phys_mergeable(req->q, &bv, &next))
break;

bv.bv_len += next.bv_len;
bio_advance_iter_single(iter->bio, &iter->iter, next.bv_len);
bvec_iter_advance_single(iter->bvec, &iter->iter, next.bv_len);
}

vec->len = bv.bv_len;
iter->len = bv.bv_len;
return true;
}

Expand All @@ -77,29 +77,29 @@ static inline bool blk_can_dma_map_iova(struct request *req,
dma_get_merge_boundary(dma_dev));
}

static bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec)
static bool blk_dma_map_bus(struct blk_dma_iter *iter)
{
iter->addr = pci_p2pdma_bus_addr_map(&iter->p2pdma, vec->paddr);
iter->len = vec->len;
iter->addr = pci_p2pdma_bus_addr_map(&iter->p2pdma, iter->iter.paddr);
iter->len = iter->iter.len;
return true;
}

static bool blk_dma_map_direct(struct request *req, struct device *dma_dev,
struct blk_dma_iter *iter, struct phys_vec *vec)
struct blk_dma_iter *iter)
{
iter->addr = dma_map_page(dma_dev, phys_to_page(vec->paddr),
offset_in_page(vec->paddr), vec->len, rq_dma_dir(req));
iter->addr = dma_map_page(dma_dev, phys_to_page(iter->iter.paddr),
offset_in_page(iter->iter.paddr), iter->iter.len,
rq_dma_dir(req));
if (dma_mapping_error(dma_dev, iter->addr)) {
iter->status = BLK_STS_RESOURCE;
return false;
}
iter->len = vec->len;
iter->len = iter->iter.len;
return true;
}

static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev,
struct dma_iova_state *state, struct blk_dma_iter *iter,
struct phys_vec *vec)
struct dma_iova_state *state, struct blk_dma_iter *iter)
{
enum dma_data_direction dir = rq_dma_dir(req);
unsigned int mapped = 0;
Expand All @@ -109,12 +109,12 @@ static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev,
iter->len = dma_iova_size(state);

do {
error = dma_iova_link(dma_dev, state, vec->paddr, mapped,
vec->len, dir, 0);
error = dma_iova_link(dma_dev, state, iter->iter.paddr, mapped,
iter->iter.len, dir, 0);
if (error)
break;
mapped += vec->len;
} while (blk_map_iter_next(req, &iter->iter, vec));
mapped += iter->iter.len;
} while (blk_map_iter_next(req, &iter->iter));

error = dma_iova_sync(dma_dev, state, 0, mapped);
if (error) {
Expand All @@ -125,6 +125,66 @@ static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev,
return true;
}

static struct blk_map_iter blk_rq_map_iter(struct request *rq)
{
struct bio *bio = rq->bio;

if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) {
return (struct blk_map_iter) {
.bvec = &rq->special_vec,
.iter = {
.bi_size = rq->special_vec.bv_len,
}
};
}

/* the internal flush request may not have bio attached */
if (!bio)
return (struct blk_map_iter) {};

return (struct blk_map_iter) {
.bio = bio,
.bvec = bio->bi_io_vec,
.iter = bio->bi_iter,
};
}

static bool blk_dma_map_iter_start(struct request *req, struct device *dma_dev,
struct dma_iova_state *state, struct blk_dma_iter *iter,
unsigned int total_len)
{
memset(&iter->p2pdma, 0, sizeof(iter->p2pdma));
iter->status = BLK_STS_OK;

/*
* Grab the first segment ASAP because we'll need it to check for P2P
* transfers.
*/
if (!blk_map_iter_next(req, &iter->iter))
return false;

switch (pci_p2pdma_state(&iter->p2pdma, dma_dev,
phys_to_page(iter->iter.paddr))) {
case PCI_P2PDMA_MAP_BUS_ADDR:
return blk_dma_map_bus(iter);
case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
/*
* P2P transfers through the host bridge are treated the
* same as non-P2P transfers below and during unmap.
*/
case PCI_P2PDMA_MAP_NONE:
break;
default:
iter->status = BLK_STS_INVAL;
return false;
}

if (blk_can_dma_map_iova(req, dma_dev) &&
dma_iova_try_alloc(dma_dev, state, iter->iter.paddr, total_len))
return blk_rq_dma_map_iova(req, dma_dev, state, iter);
return blk_dma_map_direct(req, dma_dev, iter);
}

/**
* blk_rq_dma_map_iter_start - map the first DMA segment for a request
* @req: request to map
Expand All @@ -150,43 +210,9 @@ static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev,
bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev,
struct dma_iova_state *state, struct blk_dma_iter *iter)
{
unsigned int total_len = blk_rq_payload_bytes(req);
struct phys_vec vec;

iter->iter.bio = req->bio;
iter->iter.iter = req->bio->bi_iter;
memset(&iter->p2pdma, 0, sizeof(iter->p2pdma));
iter->status = BLK_STS_OK;

/*
* Grab the first segment ASAP because we'll need it to check for P2P
* transfers.
*/
if (!blk_map_iter_next(req, &iter->iter, &vec))
return false;

if (IS_ENABLED(CONFIG_PCI_P2PDMA) && (req->cmd_flags & REQ_P2PDMA)) {
switch (pci_p2pdma_state(&iter->p2pdma, dma_dev,
phys_to_page(vec.paddr))) {
case PCI_P2PDMA_MAP_BUS_ADDR:
return blk_dma_map_bus(iter, &vec);
case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
/*
* P2P transfers through the host bridge are treated the
* same as non-P2P transfers below and during unmap.
*/
req->cmd_flags &= ~REQ_P2PDMA;
break;
default:
iter->status = BLK_STS_INVAL;
return false;
}
}

if (blk_can_dma_map_iova(req, dma_dev) &&
dma_iova_try_alloc(dma_dev, state, vec.paddr, total_len))
return blk_rq_dma_map_iova(req, dma_dev, state, iter, &vec);
return blk_dma_map_direct(req, dma_dev, iter, &vec);
iter->iter = blk_rq_map_iter(req);
return blk_dma_map_iter_start(req, dma_dev, state, iter,
blk_rq_payload_bytes(req));
}
EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start);

Expand All @@ -211,17 +237,47 @@ EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start);
bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev,
struct dma_iova_state *state, struct blk_dma_iter *iter)
{
struct phys_vec vec;

if (!blk_map_iter_next(req, &iter->iter, &vec))
if (!blk_map_iter_next(req, &iter->iter))
return false;

if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR)
return blk_dma_map_bus(iter, &vec);
return blk_dma_map_direct(req, dma_dev, iter, &vec);
return blk_dma_map_bus(iter);
return blk_dma_map_direct(req, dma_dev, iter);
}
EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_next);

#ifdef CONFIG_BLK_DEV_INTEGRITY
bool blk_rq_integrity_dma_map_iter_start(struct request *req,
struct device *dma_dev, struct dma_iova_state *state,
struct blk_dma_iter *iter)
{
unsigned len = bio_integrity_bytes(&req->q->limits.integrity,
blk_rq_sectors(req));
struct bio *bio = req->bio;

iter->iter = (struct blk_map_iter) {
.bio = bio,
.iter = bio->bi_integrity->bip_iter,
.bvec = bio->bi_integrity->bip_vec,
.is_integrity = true,
};
return blk_dma_map_iter_start(req, dma_dev, state, iter, len);
}
EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_start);

bool blk_rq_integrity_dma_map_iter_next(struct request *req,
struct device *dma_dev, struct blk_dma_iter *iter)
{
if (!blk_map_iter_next(req, &iter->iter))
return false;

if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR)
return blk_dma_map_bus(iter);
return blk_dma_map_direct(req, dma_dev, iter);
}
EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_next);
#endif

static inline struct scatterlist *
blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist)
{
Expand All @@ -246,20 +302,13 @@ blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist)
int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist,
struct scatterlist **last_sg)
{
struct req_iterator iter = {
.bio = rq->bio,
};
struct phys_vec vec;
struct blk_map_iter iter = blk_rq_map_iter(rq);
int nsegs = 0;

/* the internal flush request may not have bio attached */
if (iter.bio)
iter.iter = iter.bio->bi_iter;

while (blk_map_iter_next(rq, &iter, &vec)) {
while (blk_map_iter_next(rq, &iter)) {
*last_sg = blk_next_sg(last_sg, sglist);
sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len,
offset_in_page(vec.paddr));
sg_set_page(*last_sg, phys_to_page(iter.paddr), iter.len,
offset_in_page(iter.paddr));
nsegs++;
}

Expand Down
Loading
Loading