diff --git a/block/bio.c b/block/bio.c index 3b371a5da159e..44c43b9703875 100644 --- a/block/bio.c +++ b/block/bio.c @@ -981,7 +981,7 @@ void __bio_add_page(struct bio *bio, struct page *page, WARN_ON_ONCE(bio_full(bio, len)); if (is_pci_p2pdma_page(page)) - bio->bi_opf |= REQ_P2PDMA | REQ_NOMERGE; + bio->bi_opf |= REQ_NOMERGE; bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, off); bio->bi_iter.bi_size += len; diff --git a/block/blk-mq-dma.c b/block/blk-mq-dma.c index ad283017caef2..faa36ff6465ee 100644 --- a/block/blk-mq-dma.c +++ b/block/blk-mq-dma.c @@ -2,37 +2,41 @@ /* * Copyright (C) 2025 Christoph Hellwig */ +#include #include #include "blk.h" -struct phys_vec { - phys_addr_t paddr; - u32 len; -}; +static bool __blk_map_iter_next(struct blk_map_iter *iter) +{ + if (iter->iter.bi_size) + return true; + if (!iter->bio || !iter->bio->bi_next) + return false; -static bool blk_map_iter_next(struct request *req, struct req_iterator *iter, - struct phys_vec *vec) + iter->bio = iter->bio->bi_next; + if (iter->is_integrity) { + iter->iter = iter->bio->bi_integrity->bip_iter; + iter->bvec = iter->bio->bi_integrity->bip_vec; + } else { + iter->iter = iter->bio->bi_iter; + iter->bvec = iter->bio->bi_io_vec; + } + return true; +} + +static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter) { unsigned int max_size; struct bio_vec bv; - if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { - if (!iter->bio) - return false; - vec->paddr = bvec_phys(&req->special_vec); - vec->len = req->special_vec.bv_len; - iter->bio = NULL; - return true; - } - if (!iter->iter.bi_size) return false; - bv = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter); - vec->paddr = bvec_phys(&bv); - max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX); + bv = mp_bvec_iter_bvec(iter->bvec, iter->iter); + iter->paddr = bvec_phys(&bv); + max_size = get_max_segment_size(&req->q->limits, iter->paddr, UINT_MAX); bv.bv_len = min(bv.bv_len, max_size); - bio_advance_iter_single(iter->bio, &iter->iter, bv.bv_len); + bvec_iter_advance_single(iter->bvec, &iter->iter, bv.bv_len); /* * If we are entirely done with this bi_io_vec entry, check if the next @@ -42,23 +46,19 @@ static bool blk_map_iter_next(struct request *req, struct req_iterator *iter, while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) { struct bio_vec next; - if (!iter->iter.bi_size) { - if (!iter->bio->bi_next) - break; - iter->bio = iter->bio->bi_next; - iter->iter = iter->bio->bi_iter; - } + if (!__blk_map_iter_next(iter)) + break; - next = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter); + next = mp_bvec_iter_bvec(iter->bvec, iter->iter); if (bv.bv_len + next.bv_len > max_size || !biovec_phys_mergeable(req->q, &bv, &next)) break; bv.bv_len += next.bv_len; - bio_advance_iter_single(iter->bio, &iter->iter, next.bv_len); + bvec_iter_advance_single(iter->bvec, &iter->iter, next.bv_len); } - vec->len = bv.bv_len; + iter->len = bv.bv_len; return true; } @@ -77,29 +77,29 @@ static inline bool blk_can_dma_map_iova(struct request *req, dma_get_merge_boundary(dma_dev)); } -static bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec) +static bool blk_dma_map_bus(struct blk_dma_iter *iter) { - iter->addr = pci_p2pdma_bus_addr_map(&iter->p2pdma, vec->paddr); - iter->len = vec->len; + iter->addr = pci_p2pdma_bus_addr_map(&iter->p2pdma, iter->iter.paddr); + iter->len = iter->iter.len; return true; } static bool blk_dma_map_direct(struct request *req, struct device *dma_dev, - struct blk_dma_iter *iter, struct phys_vec *vec) + struct blk_dma_iter *iter) { - iter->addr = dma_map_page(dma_dev, phys_to_page(vec->paddr), - offset_in_page(vec->paddr), vec->len, rq_dma_dir(req)); + iter->addr = dma_map_page(dma_dev, phys_to_page(iter->iter.paddr), + offset_in_page(iter->iter.paddr), iter->iter.len, + rq_dma_dir(req)); if (dma_mapping_error(dma_dev, iter->addr)) { iter->status = BLK_STS_RESOURCE; return false; } - iter->len = vec->len; + iter->len = iter->iter.len; return true; } static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev, - struct dma_iova_state *state, struct blk_dma_iter *iter, - struct phys_vec *vec) + struct dma_iova_state *state, struct blk_dma_iter *iter) { enum dma_data_direction dir = rq_dma_dir(req); unsigned int mapped = 0; @@ -109,12 +109,12 @@ static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev, iter->len = dma_iova_size(state); do { - error = dma_iova_link(dma_dev, state, vec->paddr, mapped, - vec->len, dir, 0); + error = dma_iova_link(dma_dev, state, iter->iter.paddr, mapped, + iter->iter.len, dir, 0); if (error) break; - mapped += vec->len; - } while (blk_map_iter_next(req, &iter->iter, vec)); + mapped += iter->iter.len; + } while (blk_map_iter_next(req, &iter->iter)); error = dma_iova_sync(dma_dev, state, 0, mapped); if (error) { @@ -125,6 +125,66 @@ static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev, return true; } +static struct blk_map_iter blk_rq_map_iter(struct request *rq) +{ + struct bio *bio = rq->bio; + + if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) { + return (struct blk_map_iter) { + .bvec = &rq->special_vec, + .iter = { + .bi_size = rq->special_vec.bv_len, + } + }; + } + + /* the internal flush request may not have bio attached */ + if (!bio) + return (struct blk_map_iter) {}; + + return (struct blk_map_iter) { + .bio = bio, + .bvec = bio->bi_io_vec, + .iter = bio->bi_iter, + }; +} + +static bool blk_dma_map_iter_start(struct request *req, struct device *dma_dev, + struct dma_iova_state *state, struct blk_dma_iter *iter, + unsigned int total_len) +{ + memset(&iter->p2pdma, 0, sizeof(iter->p2pdma)); + iter->status = BLK_STS_OK; + + /* + * Grab the first segment ASAP because we'll need it to check for P2P + * transfers. + */ + if (!blk_map_iter_next(req, &iter->iter)) + return false; + + switch (pci_p2pdma_state(&iter->p2pdma, dma_dev, + phys_to_page(iter->iter.paddr))) { + case PCI_P2PDMA_MAP_BUS_ADDR: + return blk_dma_map_bus(iter); + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: + /* + * P2P transfers through the host bridge are treated the + * same as non-P2P transfers below and during unmap. + */ + case PCI_P2PDMA_MAP_NONE: + break; + default: + iter->status = BLK_STS_INVAL; + return false; + } + + if (blk_can_dma_map_iova(req, dma_dev) && + dma_iova_try_alloc(dma_dev, state, iter->iter.paddr, total_len)) + return blk_rq_dma_map_iova(req, dma_dev, state, iter); + return blk_dma_map_direct(req, dma_dev, iter); +} + /** * blk_rq_dma_map_iter_start - map the first DMA segment for a request * @req: request to map @@ -150,43 +210,9 @@ static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev, bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev, struct dma_iova_state *state, struct blk_dma_iter *iter) { - unsigned int total_len = blk_rq_payload_bytes(req); - struct phys_vec vec; - - iter->iter.bio = req->bio; - iter->iter.iter = req->bio->bi_iter; - memset(&iter->p2pdma, 0, sizeof(iter->p2pdma)); - iter->status = BLK_STS_OK; - - /* - * Grab the first segment ASAP because we'll need it to check for P2P - * transfers. - */ - if (!blk_map_iter_next(req, &iter->iter, &vec)) - return false; - - if (IS_ENABLED(CONFIG_PCI_P2PDMA) && (req->cmd_flags & REQ_P2PDMA)) { - switch (pci_p2pdma_state(&iter->p2pdma, dma_dev, - phys_to_page(vec.paddr))) { - case PCI_P2PDMA_MAP_BUS_ADDR: - return blk_dma_map_bus(iter, &vec); - case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: - /* - * P2P transfers through the host bridge are treated the - * same as non-P2P transfers below and during unmap. - */ - req->cmd_flags &= ~REQ_P2PDMA; - break; - default: - iter->status = BLK_STS_INVAL; - return false; - } - } - - if (blk_can_dma_map_iova(req, dma_dev) && - dma_iova_try_alloc(dma_dev, state, vec.paddr, total_len)) - return blk_rq_dma_map_iova(req, dma_dev, state, iter, &vec); - return blk_dma_map_direct(req, dma_dev, iter, &vec); + iter->iter = blk_rq_map_iter(req); + return blk_dma_map_iter_start(req, dma_dev, state, iter, + blk_rq_payload_bytes(req)); } EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start); @@ -211,17 +237,47 @@ EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start); bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev, struct dma_iova_state *state, struct blk_dma_iter *iter) { - struct phys_vec vec; - - if (!blk_map_iter_next(req, &iter->iter, &vec)) + if (!blk_map_iter_next(req, &iter->iter)) return false; if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) - return blk_dma_map_bus(iter, &vec); - return blk_dma_map_direct(req, dma_dev, iter, &vec); + return blk_dma_map_bus(iter); + return blk_dma_map_direct(req, dma_dev, iter); } EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_next); +#ifdef CONFIG_BLK_DEV_INTEGRITY +bool blk_rq_integrity_dma_map_iter_start(struct request *req, + struct device *dma_dev, struct dma_iova_state *state, + struct blk_dma_iter *iter) +{ + unsigned len = bio_integrity_bytes(&req->q->limits.integrity, + blk_rq_sectors(req)); + struct bio *bio = req->bio; + + iter->iter = (struct blk_map_iter) { + .bio = bio, + .iter = bio->bi_integrity->bip_iter, + .bvec = bio->bi_integrity->bip_vec, + .is_integrity = true, + }; + return blk_dma_map_iter_start(req, dma_dev, state, iter, len); +} +EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_start); + +bool blk_rq_integrity_dma_map_iter_next(struct request *req, + struct device *dma_dev, struct blk_dma_iter *iter) +{ + if (!blk_map_iter_next(req, &iter->iter)) + return false; + + if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) + return blk_dma_map_bus(iter); + return blk_dma_map_direct(req, dma_dev, iter); +} +EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_next); +#endif + static inline struct scatterlist * blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist) { @@ -246,20 +302,13 @@ blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist) int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, struct scatterlist **last_sg) { - struct req_iterator iter = { - .bio = rq->bio, - }; - struct phys_vec vec; + struct blk_map_iter iter = blk_rq_map_iter(rq); int nsegs = 0; - /* the internal flush request may not have bio attached */ - if (iter.bio) - iter.iter = iter.bio->bi_iter; - - while (blk_map_iter_next(rq, &iter, &vec)) { + while (blk_map_iter_next(rq, &iter)) { *last_sg = blk_next_sg(last_sg, sglist); - sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len, - offset_in_page(vec.paddr)); + sg_set_page(*last_sg, phys_to_page(iter.paddr), iter.len, + offset_in_page(iter.paddr)); nsegs++; } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2c6d9506b1725..4a23729f399ac 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -172,9 +172,7 @@ struct nvme_dev { u32 last_ps; bool hmb; struct sg_table *hmb_sgt; - mempool_t *dmavec_mempool; - mempool_t *iod_meta_mempool; /* shadow doorbell buffer support: */ __le32 *dbbuf_dbs; @@ -261,6 +259,15 @@ enum nvme_iod_flags { /* single segment dma mapping */ IOD_SINGLE_SEGMENT = 1U << 2, + + /* DMA mapped with PCI_P2PDMA_MAP_BUS_ADDR */ + IOD_P2P_BUS_ADDR = 1U << 3, + + /* Metadata DMA mapped with PCI_P2PDMA_MAP_BUS_ADDR */ + IOD_META_P2P_BUS_ADDR = 1U << 4, + + /* Metadata using non-coalesced MPTR */ + IOD_META_MPTR = 1U << 5, }; struct nvme_dma_vec { @@ -278,13 +285,14 @@ struct nvme_iod { u8 nr_descriptors; unsigned int total_len; + unsigned int meta_total_len; struct dma_iova_state dma_state; + struct dma_iova_state meta_dma_state; void *descriptors[NVME_MAX_NR_DESCRIPTORS]; struct nvme_dma_vec *dma_vecs; unsigned int nr_dma_vecs; dma_addr_t meta_dma; - struct sg_table meta_sgt; struct nvme_sgl_desc *meta_descriptor; }; @@ -641,6 +649,11 @@ static inline struct dma_pool *nvme_dma_pool(struct nvme_queue *nvmeq, return nvmeq->descriptor_pools.large; } +static inline bool nvme_pci_cmd_use_meta_sgl(struct nvme_command *cmd) +{ + return (cmd->common.flags & NVME_CMD_SGL_ALL) == NVME_CMD_SGL_METASEG; +} + static inline bool nvme_pci_cmd_use_sgl(struct nvme_command *cmd) { return cmd->common.flags & @@ -690,25 +703,34 @@ static void nvme_free_prps(struct request *req) mempool_free(iod->dma_vecs, nvmeq->dev->dmavec_mempool); } + +static void __nvme_free_sgls(struct device *dma_dev, struct nvme_sgl_desc *sge, + struct nvme_sgl_desc *sg_list, enum dma_data_direction dir) +{ + unsigned int len = le32_to_cpu(sge->length); + unsigned int i, nr_entries; + + if (sge->type == (NVME_SGL_FMT_DATA_DESC << 4)) { + dma_unmap_page(dma_dev, le64_to_cpu(sge->addr), len, dir); + return; + } + + nr_entries = len / sizeof(*sg_list); + for (i = 0; i < nr_entries; i++) + dma_unmap_page(dma_dev, le64_to_cpu(sg_list[i].addr), + le32_to_cpu(sg_list[i].length), dir); +} + static void nvme_free_sgls(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = req->mq_hctx->driver_data; struct device *dma_dev = nvmeq->dev->dev; - dma_addr_t sqe_dma_addr = le64_to_cpu(iod->cmd.common.dptr.sgl.addr); - unsigned int sqe_dma_len = le32_to_cpu(iod->cmd.common.dptr.sgl.length); struct nvme_sgl_desc *sg_list = iod->descriptors[0]; + struct nvme_sgl_desc *sge = &iod->cmd.common.dptr.sgl; enum dma_data_direction dir = rq_dma_dir(req); - if (iod->nr_descriptors) { - unsigned int nr_entries = sqe_dma_len / sizeof(*sg_list), i; - - for (i = 0; i < nr_entries; i++) - dma_unmap_page(dma_dev, le64_to_cpu(sg_list[i].addr), - le32_to_cpu(sg_list[i].length), dir); - } else { - dma_unmap_page(dma_dev, sqe_dma_addr, sqe_dma_len, dir); - } + __nvme_free_sgls(dma_dev, sge, sg_list, dir); } static void nvme_unmap_data(struct request *req) @@ -725,7 +747,8 @@ static void nvme_unmap_data(struct request *req) return; } - if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len)) { + if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len, + iod->flags & IOD_P2P_BUS_ADDR)) { if (nvme_pci_cmd_use_sgl(&iod->cmd)) nvme_free_sgls(req); else @@ -1000,6 +1023,9 @@ static blk_status_t nvme_map_data(struct request *req) if (!blk_rq_dma_map_iter_start(req, dev->dev, &iod->dma_state, &iter)) return iter.status; + if (iter.p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) + iod->flags |= IOD_P2P_BUS_ADDR; + if (use_sgl == SGL_FORCED || (use_sgl == SGL_SUPPORTED && (sgl_threshold && nvme_pci_avg_seg_size(req) >= sgl_threshold))) @@ -1007,70 +1033,96 @@ static blk_status_t nvme_map_data(struct request *req) return nvme_pci_setup_data_prp(req, &iter); } -static void nvme_pci_sgl_set_data_sg(struct nvme_sgl_desc *sge, - struct scatterlist *sg) +static void nvme_free_meta_sgls(struct nvme_iod *iod, struct device *dma_dev, + enum dma_data_direction dir) { - sge->addr = cpu_to_le64(sg_dma_address(sg)); - sge->length = cpu_to_le32(sg_dma_len(sg)); - sge->type = NVME_SGL_FMT_DATA_DESC << 4; + struct nvme_sgl_desc *sge = iod->meta_descriptor; + + __nvme_free_sgls(dma_dev, sge, &sge[1], dir); +} + +static void nvme_unmap_metadata(struct request *req) +{ + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + enum dma_data_direction dir = rq_dma_dir(req); + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct device *dma_dev = nvmeq->dev->dev; + + if (iod->flags & IOD_META_MPTR) { + dma_unmap_page(dma_dev, iod->meta_dma, + rq_integrity_vec(req).bv_len, + rq_dma_dir(req)); + return; + } + + if (!blk_rq_dma_unmap(req, dma_dev, &iod->meta_dma_state, + iod->meta_total_len, + iod->flags & IOD_META_P2P_BUS_ADDR)) { + if (nvme_pci_cmd_use_meta_sgl(&iod->cmd)) + nvme_free_meta_sgls(iod, dma_dev, dir); + else + dma_unmap_page(dma_dev, iod->meta_dma, + iod->meta_total_len, dir); + } + + if (iod->meta_descriptor) + dma_pool_free(nvmeq->descriptor_pools.small, + iod->meta_descriptor, iod->meta_dma); } static blk_status_t nvme_pci_setup_meta_sgls(struct request *req) { struct nvme_queue *nvmeq = req->mq_hctx->driver_data; - struct nvme_dev *dev = nvmeq->dev; + unsigned int entries = req->nr_integrity_segments; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_dev *dev = nvmeq->dev; struct nvme_sgl_desc *sg_list; - struct scatterlist *sgl, *sg; - unsigned int entries; + struct blk_dma_iter iter; dma_addr_t sgl_dma; - int rc, i; + int i = 0; - iod->meta_sgt.sgl = mempool_alloc(dev->iod_meta_mempool, GFP_ATOMIC); - if (!iod->meta_sgt.sgl) - return BLK_STS_RESOURCE; + if (!blk_rq_integrity_dma_map_iter_start(req, dev->dev, + &iod->meta_dma_state, &iter)) + return iter.status; - sg_init_table(iod->meta_sgt.sgl, req->nr_integrity_segments); - iod->meta_sgt.orig_nents = blk_rq_map_integrity_sg(req, - iod->meta_sgt.sgl); - if (!iod->meta_sgt.orig_nents) - goto out_free_sg; + if (iter.p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) + iod->flags |= IOD_META_P2P_BUS_ADDR; + else if (blk_rq_dma_map_coalesce(&iod->meta_dma_state)) + entries = 1; - rc = dma_map_sgtable(dev->dev, &iod->meta_sgt, rq_dma_dir(req), - DMA_ATTR_NO_WARN); - if (rc) - goto out_free_sg; + if (entries == 1 && !(nvme_req(req)->flags & NVME_REQ_USERCMD)) { + iod->cmd.common.metadata = cpu_to_le64(iter.addr); + iod->meta_total_len = iter.len; + iod->meta_dma = iter.addr; + iod->meta_descriptor = NULL; + return BLK_STS_OK; + } sg_list = dma_pool_alloc(nvmeq->descriptor_pools.small, GFP_ATOMIC, &sgl_dma); if (!sg_list) - goto out_unmap_sg; + return BLK_STS_RESOURCE; - entries = iod->meta_sgt.nents; iod->meta_descriptor = sg_list; iod->meta_dma = sgl_dma; - iod->cmd.common.flags = NVME_CMD_SGL_METASEG; iod->cmd.common.metadata = cpu_to_le64(sgl_dma); - - sgl = iod->meta_sgt.sgl; if (entries == 1) { - nvme_pci_sgl_set_data_sg(sg_list, sgl); + iod->meta_total_len = iter.len; + nvme_pci_sgl_set_data(sg_list, &iter); return BLK_STS_OK; } sgl_dma += sizeof(*sg_list); - nvme_pci_sgl_set_seg(sg_list, sgl_dma, entries); - for_each_sg(sgl, sg, entries, i) - nvme_pci_sgl_set_data_sg(&sg_list[i + 1], sg); - - return BLK_STS_OK; + do { + nvme_pci_sgl_set_data(&sg_list[++i], &iter); + iod->meta_total_len += iter.len; + } while (blk_rq_integrity_dma_map_iter_next(req, dev->dev, &iter)); -out_unmap_sg: - dma_unmap_sgtable(dev->dev, &iod->meta_sgt, rq_dma_dir(req), 0); -out_free_sg: - mempool_free(iod->meta_sgt.sgl, dev->iod_meta_mempool); - return BLK_STS_RESOURCE; + nvme_pci_sgl_set_seg(sg_list, sgl_dma, i); + if (unlikely(iter.status)) + nvme_unmap_metadata(req); + return iter.status; } static blk_status_t nvme_pci_setup_meta_mptr(struct request *req) @@ -1083,6 +1135,7 @@ static blk_status_t nvme_pci_setup_meta_mptr(struct request *req) if (dma_mapping_error(nvmeq->dev->dev, iod->meta_dma)) return BLK_STS_IOERR; iod->cmd.common.metadata = cpu_to_le64(iod->meta_dma); + iod->flags |= IOD_META_MPTR; return BLK_STS_OK; } @@ -1104,7 +1157,6 @@ static blk_status_t nvme_prep_rq(struct request *req) iod->flags = 0; iod->nr_descriptors = 0; iod->total_len = 0; - iod->meta_sgt.nents = 0; ret = nvme_setup_cmd(req->q->queuedata, req); if (ret) @@ -1215,25 +1267,6 @@ static void nvme_queue_rqs(struct rq_list *rqlist) *rqlist = requeue_list; } -static __always_inline void nvme_unmap_metadata(struct request *req) -{ - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_queue *nvmeq = req->mq_hctx->driver_data; - struct nvme_dev *dev = nvmeq->dev; - - if (!iod->meta_sgt.nents) { - dma_unmap_page(dev->dev, iod->meta_dma, - rq_integrity_vec(req).bv_len, - rq_dma_dir(req)); - return; - } - - dma_pool_free(nvmeq->descriptor_pools.small, iod->meta_descriptor, - iod->meta_dma); - dma_unmap_sgtable(dev->dev, &iod->meta_sgt, rq_dma_dir(req), 0); - mempool_free(iod->meta_sgt.sgl, dev->iod_meta_mempool); -} - static __always_inline void nvme_pci_unmap_rq(struct request *req) { if (blk_integrity_rq(req)) @@ -3039,7 +3072,6 @@ static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown) static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev) { - size_t meta_size = sizeof(struct scatterlist) * (NVME_MAX_META_SEGS + 1); size_t alloc_size = sizeof(struct nvme_dma_vec) * NVME_MAX_SEGS; dev->dmavec_mempool = mempool_create_node(1, @@ -3048,17 +3080,7 @@ static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev) dev_to_node(dev->dev)); if (!dev->dmavec_mempool) return -ENOMEM; - - dev->iod_meta_mempool = mempool_create_node(1, - mempool_kmalloc, mempool_kfree, - (void *)meta_size, GFP_KERNEL, - dev_to_node(dev->dev)); - if (!dev->iod_meta_mempool) - goto free; return 0; -free: - mempool_destroy(dev->dmavec_mempool); - return -ENOMEM; } static void nvme_free_tagset(struct nvme_dev *dev) @@ -3508,7 +3530,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) nvme_free_queues(dev, 0); out_release_iod_mempool: mempool_destroy(dev->dmavec_mempool); - mempool_destroy(dev->iod_meta_mempool); out_dev_unmap: nvme_dev_unmap(dev); out_uninit_ctrl: @@ -3572,7 +3593,6 @@ static void nvme_remove(struct pci_dev *pdev) nvme_dbbuf_dma_free(dev); nvme_free_queues(dev, 0); mempool_destroy(dev->dmavec_mempool); - mempool_destroy(dev->iod_meta_mempool); nvme_release_descriptor_pools(dev); nvme_dev_unmap(dev); nvme_uninit_ctrl(&dev->ctrl); diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index e67a2b6e8f111..78fe2459e6612 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -4,6 +4,7 @@ #include #include +#include struct request; @@ -31,6 +32,11 @@ int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, ssize_t bytes); int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd, struct logical_block_metadata_cap __user *argp); +bool blk_rq_integrity_dma_map_iter_start(struct request *req, + struct device *dma_dev, struct dma_iova_state *state, + struct blk_dma_iter *iter); +bool blk_rq_integrity_dma_map_iter_next(struct request *req, + struct device *dma_dev, struct blk_dma_iter *iter); static inline bool blk_integrity_queue_supports_integrity(struct request_queue *q) @@ -115,6 +121,17 @@ static inline int blk_rq_integrity_map_user(struct request *rq, { return -EINVAL; } +static inline bool blk_rq_integrity_dma_map_iter_start(struct request *req, + struct device *dma_dev, struct dma_iova_state *state, + struct blk_dma_iter *iter) +{ + return false; +} +static inline bool blk_rq_integrity_dma_map_iter_next(struct request *req, + struct device *dma_dev, struct blk_dma_iter *iter) +{ + return false; +} static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) { return NULL; diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h index c26a01aeae006..4b7b85f9e23e6 100644 --- a/include/linux/blk-mq-dma.h +++ b/include/linux/blk-mq-dma.h @@ -5,6 +5,15 @@ #include #include +struct blk_map_iter { + phys_addr_t paddr; + u32 len; + struct bio_vec *bvec; + struct bvec_iter iter; + struct bio *bio; + bool is_integrity; +}; + struct blk_dma_iter { /* Output address range for this iteration */ dma_addr_t addr; @@ -14,7 +23,7 @@ struct blk_dma_iter { blk_status_t status; /* Internal to blk_rq_dma_map_iter_* */ - struct req_iterator iter; + struct blk_map_iter iter; struct pci_p2pdma_map_state p2pdma; }; @@ -41,14 +50,15 @@ static inline bool blk_rq_dma_map_coalesce(struct dma_iova_state *state) * @dma_dev: device to unmap from * @state: DMA IOVA state * @mapped_len: number of bytes to unmap + * @is_p2p: true if mapped with PCI_P2PDMA_MAP_BUS_ADDR * * Returns %false if the callers need to manually unmap every DMA segment * mapped using @iter or %true if no work is left to be done. */ static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev, - struct dma_iova_state *state, size_t mapped_len) + struct dma_iova_state *state, size_t mapped_len, bool is_p2p) { - if (req->cmd_flags & REQ_P2PDMA) + if (is_p2p) return true; if (dma_use_iova(state)) { diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 09b99d52fd365..0a29b20939d17 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -419,7 +419,6 @@ enum req_flag_bits { #define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV) #define REQ_FS_PRIVATE (__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE) #define REQ_ATOMIC (__force blk_opf_t)(1ULL << __REQ_ATOMIC) -#define REQ_P2PDMA (__force blk_opf_t)(1ULL << __REQ_P2PDMA) #define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP)