|
12 | 12 | #include <sys/mman.h> |
13 | 13 |
|
14 | 14 | #include <uapi/linux/types.h> |
| 15 | +#include <linux/iommufd.h> |
15 | 16 | #include <linux/limits.h> |
16 | 17 | #include <linux/mman.h> |
| 18 | +#include <linux/overflow.h> |
17 | 19 | #include <linux/types.h> |
18 | 20 | #include <linux/vfio.h> |
19 | | -#include <linux/iommufd.h> |
20 | 21 |
|
21 | 22 | #include "../../../kselftest.h" |
22 | 23 | #include <vfio_util.h> |
|
29 | 30 | VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \ |
30 | 31 | } while (0) |
31 | 32 |
|
| 33 | +static struct vfio_info_cap_header *next_cap_hdr(void *buf, u32 bufsz, |
| 34 | + u32 *cap_offset) |
| 35 | +{ |
| 36 | + struct vfio_info_cap_header *hdr; |
| 37 | + |
| 38 | + if (!*cap_offset) |
| 39 | + return NULL; |
| 40 | + |
| 41 | + VFIO_ASSERT_LT(*cap_offset, bufsz); |
| 42 | + VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr)); |
| 43 | + |
| 44 | + hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset); |
| 45 | + *cap_offset = hdr->next; |
| 46 | + |
| 47 | + return hdr; |
| 48 | +} |
| 49 | + |
| 50 | +static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info, |
| 51 | + u16 cap_id) |
| 52 | +{ |
| 53 | + struct vfio_info_cap_header *hdr; |
| 54 | + u32 cap_offset = info->cap_offset; |
| 55 | + u32 max_depth; |
| 56 | + u32 depth = 0; |
| 57 | + |
| 58 | + if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) |
| 59 | + return NULL; |
| 60 | + |
| 61 | + if (cap_offset) |
| 62 | + VFIO_ASSERT_GE(cap_offset, sizeof(*info)); |
| 63 | + |
| 64 | + max_depth = (info->argsz - sizeof(*info)) / sizeof(*hdr); |
| 65 | + |
| 66 | + while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) { |
| 67 | + depth++; |
| 68 | + VFIO_ASSERT_LE(depth, max_depth, "Capability chain contains a cycle\n"); |
| 69 | + |
| 70 | + if (hdr->id == cap_id) |
| 71 | + return hdr; |
| 72 | + } |
| 73 | + |
| 74 | + return NULL; |
| 75 | +} |
| 76 | + |
| 77 | +/* Return buffer including capability chain, if present. Free with free() */ |
| 78 | +static struct vfio_iommu_type1_info *vfio_iommu_get_info(struct vfio_pci_device *device) |
| 79 | +{ |
| 80 | + struct vfio_iommu_type1_info *info; |
| 81 | + |
| 82 | + info = malloc(sizeof(*info)); |
| 83 | + VFIO_ASSERT_NOT_NULL(info); |
| 84 | + |
| 85 | + *info = (struct vfio_iommu_type1_info) { |
| 86 | + .argsz = sizeof(*info), |
| 87 | + }; |
| 88 | + |
| 89 | + ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info); |
| 90 | + VFIO_ASSERT_GE(info->argsz, sizeof(*info)); |
| 91 | + |
| 92 | + info = realloc(info, info->argsz); |
| 93 | + VFIO_ASSERT_NOT_NULL(info); |
| 94 | + |
| 95 | + ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info); |
| 96 | + VFIO_ASSERT_GE(info->argsz, sizeof(*info)); |
| 97 | + |
| 98 | + return info; |
| 99 | +} |
| 100 | + |
| 101 | +/* |
| 102 | + * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to |
| 103 | + * report iommufd's iommu_iova_range. Free with free(). |
| 104 | + */ |
| 105 | +static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device, |
| 106 | + u32 *nranges) |
| 107 | +{ |
| 108 | + struct vfio_iommu_type1_info_cap_iova_range *cap_range; |
| 109 | + struct vfio_iommu_type1_info *info; |
| 110 | + struct vfio_info_cap_header *hdr; |
| 111 | + struct iommu_iova_range *ranges = NULL; |
| 112 | + |
| 113 | + info = vfio_iommu_get_info(device); |
| 114 | + hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE); |
| 115 | + VFIO_ASSERT_NOT_NULL(hdr); |
| 116 | + |
| 117 | + cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header); |
| 118 | + VFIO_ASSERT_GT(cap_range->nr_iovas, 0); |
| 119 | + |
| 120 | + ranges = calloc(cap_range->nr_iovas, sizeof(*ranges)); |
| 121 | + VFIO_ASSERT_NOT_NULL(ranges); |
| 122 | + |
| 123 | + for (u32 i = 0; i < cap_range->nr_iovas; i++) { |
| 124 | + ranges[i] = (struct iommu_iova_range){ |
| 125 | + .start = cap_range->iova_ranges[i].start, |
| 126 | + .last = cap_range->iova_ranges[i].end, |
| 127 | + }; |
| 128 | + } |
| 129 | + |
| 130 | + *nranges = cap_range->nr_iovas; |
| 131 | + |
| 132 | + free(info); |
| 133 | + return ranges; |
| 134 | +} |
| 135 | + |
| 136 | +/* Return iova ranges of the device's IOAS. Free with free() */ |
| 137 | +static struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device, |
| 138 | + u32 *nranges) |
| 139 | +{ |
| 140 | + struct iommu_iova_range *ranges; |
| 141 | + int ret; |
| 142 | + |
| 143 | + struct iommu_ioas_iova_ranges query = { |
| 144 | + .size = sizeof(query), |
| 145 | + .ioas_id = device->ioas_id, |
| 146 | + }; |
| 147 | + |
| 148 | + ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query); |
| 149 | + VFIO_ASSERT_EQ(ret, -1); |
| 150 | + VFIO_ASSERT_EQ(errno, EMSGSIZE); |
| 151 | + VFIO_ASSERT_GT(query.num_iovas, 0); |
| 152 | + |
| 153 | + ranges = calloc(query.num_iovas, sizeof(*ranges)); |
| 154 | + VFIO_ASSERT_NOT_NULL(ranges); |
| 155 | + |
| 156 | + query.allowed_iovas = (uintptr_t)ranges; |
| 157 | + |
| 158 | + ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query); |
| 159 | + *nranges = query.num_iovas; |
| 160 | + |
| 161 | + return ranges; |
| 162 | +} |
| 163 | + |
| 164 | +static int iova_range_comp(const void *a, const void *b) |
| 165 | +{ |
| 166 | + const struct iommu_iova_range *ra = a, *rb = b; |
| 167 | + |
| 168 | + if (ra->start < rb->start) |
| 169 | + return -1; |
| 170 | + |
| 171 | + if (ra->start > rb->start) |
| 172 | + return 1; |
| 173 | + |
| 174 | + return 0; |
| 175 | +} |
| 176 | + |
| 177 | +/* Return sorted IOVA ranges of the device. Free with free(). */ |
| 178 | +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device, |
| 179 | + u32 *nranges) |
| 180 | +{ |
| 181 | + struct iommu_iova_range *ranges; |
| 182 | + |
| 183 | + if (device->iommufd) |
| 184 | + ranges = iommufd_iova_ranges(device, nranges); |
| 185 | + else |
| 186 | + ranges = vfio_iommu_iova_ranges(device, nranges); |
| 187 | + |
| 188 | + if (!ranges) |
| 189 | + return NULL; |
| 190 | + |
| 191 | + VFIO_ASSERT_GT(*nranges, 0); |
| 192 | + |
| 193 | + /* Sort and check that ranges are sane and non-overlapping */ |
| 194 | + qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp); |
| 195 | + VFIO_ASSERT_LT(ranges[0].start, ranges[0].last); |
| 196 | + |
| 197 | + for (u32 i = 1; i < *nranges; i++) { |
| 198 | + VFIO_ASSERT_LT(ranges[i].start, ranges[i].last); |
| 199 | + VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start); |
| 200 | + } |
| 201 | + |
| 202 | + return ranges; |
| 203 | +} |
| 204 | + |
| 205 | +struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device) |
| 206 | +{ |
| 207 | + struct iova_allocator *allocator; |
| 208 | + struct iommu_iova_range *ranges; |
| 209 | + u32 nranges; |
| 210 | + |
| 211 | + ranges = vfio_pci_iova_ranges(device, &nranges); |
| 212 | + VFIO_ASSERT_NOT_NULL(ranges); |
| 213 | + |
| 214 | + allocator = malloc(sizeof(*allocator)); |
| 215 | + VFIO_ASSERT_NOT_NULL(allocator); |
| 216 | + |
| 217 | + *allocator = (struct iova_allocator){ |
| 218 | + .ranges = ranges, |
| 219 | + .nranges = nranges, |
| 220 | + .range_idx = 0, |
| 221 | + .range_offset = 0, |
| 222 | + }; |
| 223 | + |
| 224 | + return allocator; |
| 225 | +} |
| 226 | + |
| 227 | +void iova_allocator_cleanup(struct iova_allocator *allocator) |
| 228 | +{ |
| 229 | + free(allocator->ranges); |
| 230 | + free(allocator); |
| 231 | +} |
| 232 | + |
| 233 | +iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size) |
| 234 | +{ |
| 235 | + VFIO_ASSERT_GT(size, 0, "Invalid size arg, zero\n"); |
| 236 | + VFIO_ASSERT_EQ(size & (size - 1), 0, "Invalid size arg, non-power-of-2\n"); |
| 237 | + |
| 238 | + for (;;) { |
| 239 | + struct iommu_iova_range *range; |
| 240 | + iova_t iova, last; |
| 241 | + |
| 242 | + VFIO_ASSERT_LT(allocator->range_idx, allocator->nranges, |
| 243 | + "IOVA allocator out of space\n"); |
| 244 | + |
| 245 | + range = &allocator->ranges[allocator->range_idx]; |
| 246 | + iova = range->start + allocator->range_offset; |
| 247 | + |
| 248 | + /* Check for sufficient space at the current offset */ |
| 249 | + if (check_add_overflow(iova, size - 1, &last) || |
| 250 | + last > range->last) |
| 251 | + goto next_range; |
| 252 | + |
| 253 | + /* Align iova to size */ |
| 254 | + iova = last & ~(size - 1); |
| 255 | + |
| 256 | + /* Check for sufficient space at the aligned iova */ |
| 257 | + if (check_add_overflow(iova, size - 1, &last) || |
| 258 | + last > range->last) |
| 259 | + goto next_range; |
| 260 | + |
| 261 | + if (last == range->last) { |
| 262 | + allocator->range_idx++; |
| 263 | + allocator->range_offset = 0; |
| 264 | + } else { |
| 265 | + allocator->range_offset = last - range->start + 1; |
| 266 | + } |
| 267 | + |
| 268 | + return iova; |
| 269 | + |
| 270 | +next_range: |
| 271 | + allocator->range_idx++; |
| 272 | + allocator->range_offset = 0; |
| 273 | + } |
| 274 | +} |
| 275 | + |
32 | 276 | iova_t __to_iova(struct vfio_pci_device *device, void *vaddr) |
33 | 277 | { |
34 | 278 | struct vfio_dma_region *region; |
|
0 commit comments