Skip to content

Commit f2fc5ae

Browse files
authored
Merge pull request #410 from CodeConstruct/mi+mpr
MI: Strict response checking, add support for More Processing Required responses
2 parents 7155c48 + 5031f01 commit f2fc5ae

6 files changed

Lines changed: 680 additions & 45 deletions

File tree

src/nvme/mi-mctp.c

Lines changed: 189 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
#include <errno.h>
10+
#include <stdbool.h>
1011
#include <stdio.h>
1112
#include <stdlib.h>
1213
#include <unistd.h>
@@ -84,10 +85,16 @@ struct nvme_mi_transport_mctp {
8485
int sd;
8586
};
8687

88+
static int ioctl_tag(int sd, unsigned long req, struct mctp_ioc_tag_ctl *ctl)
89+
{
90+
return ioctl(sd, req, ctl);
91+
}
92+
8793
static struct __mi_mctp_socket_ops ops = {
8894
socket,
8995
sendmsg,
9096
recvmsg,
97+
ioctl_tag,
9198
};
9299

93100
void __nvme_mi_mctp_set_ops(const struct __mi_mctp_socket_ops *newops)
@@ -96,30 +103,134 @@ void __nvme_mi_mctp_set_ops(const struct __mi_mctp_socket_ops *newops)
96103
}
97104
static const struct nvme_mi_transport nvme_mi_transport_mctp;
98105

106+
#ifdef SIOCMCTPALLOCTAG
107+
static __u8 nvme_mi_mctp_tag_alloc(struct nvme_mi_ep *ep)
108+
{
109+
struct nvme_mi_transport_mctp *mctp;
110+
struct mctp_ioc_tag_ctl ctl = { 0 };
111+
static bool logged;
112+
int rc;
113+
114+
mctp = ep->transport_data;
115+
116+
ctl.peer_addr = mctp->eid;
117+
118+
errno = 0;
119+
rc = ops.ioctl_tag(mctp->sd, SIOCMCTPALLOCTAG, &ctl);
120+
if (rc) {
121+
if (!logged) {
122+
/* not necessarily fatal, just means we can't handle
123+
* "more processing required" messages */
124+
nvme_msg(ep->root, LOG_INFO,
125+
"System does not support explicit tag allocation\n");
126+
logged = true;
127+
}
128+
return MCTP_TAG_OWNER;
129+
}
130+
131+
return ctl.tag;
132+
}
133+
134+
static void nvme_mi_mctp_tag_drop(struct nvme_mi_ep *ep, __u8 tag)
135+
{
136+
struct nvme_mi_transport_mctp *mctp;
137+
struct mctp_ioc_tag_ctl ctl = { 0 };
138+
139+
mctp = ep->transport_data;
140+
141+
if (!(tag & MCTP_TAG_PREALLOC))
142+
return;
143+
144+
ctl.peer_addr = mctp->eid;
145+
ctl.tag = tag;
146+
147+
ops.ioctl_tag(mctp->sd, SIOCMCTPDROPTAG, &ctl);
148+
}
149+
150+
#else /* !defined SIOMCTPTAGALLOC */
151+
152+
static __u8 nvme_mi_mctp_tag_alloc(struct nvme_mi_ep *ep)
153+
{
154+
static bool logged;
155+
if (!logged) {
156+
nvme_msg(ep->root, LOG_INFO,
157+
"Build does not support explicit tag allocation\n");
158+
logged = true;
159+
}
160+
return MCTP_TAG_OWNER;
161+
}
162+
163+
static void nvme_mi_mctp_tag_drop(struct nvme_mi_ep *ep, __u8 tag)
164+
{
165+
}
166+
167+
#endif /* !defined SIOMCTPTAGALLOC */
168+
169+
static bool nvme_mi_mctp_resp_is_mpr(struct nvme_mi_resp *resp, size_t len)
170+
{
171+
struct nvme_mi_msg_resp *msg;
172+
__le32 mic;
173+
__u32 crc;
174+
175+
if (len != sizeof(*msg) + sizeof(mic))
176+
return false;
177+
178+
msg = (struct nvme_mi_msg_resp *)resp->hdr;
179+
180+
if (msg->status != NVME_MI_RESP_MPR)
181+
return false;
182+
183+
/* We can't use verify_resp_mic here, as the response structure has
184+
* not been laid-out properly in resp yet (this is deferred until
185+
* we have the actual response).
186+
*
187+
* We know the data is a fixed size, and linear in the hdr buf, so
188+
* calculation is fairly simple. We do need to find the MIC data
189+
* though, which could either be in the header buf (if the original
190+
* header was larger than the minimal header message), or the start of
191+
* the data buf (otherwise).
192+
*/
193+
if (resp->hdr_len > sizeof(*msg))
194+
mic = *(__le32 *)(msg + 1);
195+
else
196+
mic = *(__le32 *)(resp->data);
197+
198+
crc = ~nvme_mi_crc32_update(0xffffffff, msg, sizeof(*msg));
199+
if (le32_to_cpu(mic) != crc)
200+
return false;
201+
202+
return true;
203+
}
204+
99205
static int nvme_mi_mctp_submit(struct nvme_mi_ep *ep,
100206
struct nvme_mi_req *req,
101207
struct nvme_mi_resp *resp)
102208
{
103209
struct nvme_mi_transport_mctp *mctp;
104-
struct iovec req_iov[3], resp_iov[2];
210+
struct iovec req_iov[3], resp_iov[3];
105211
struct msghdr req_msg, resp_msg;
106212
struct sockaddr_mctp addr;
107-
unsigned char *rspbuf;
108213
ssize_t len;
109214
__le32 mic;
110-
int i;
215+
int i, rc;
216+
__u8 tag;
111217

112218
if (ep->transport != &nvme_mi_transport_mctp)
113219
return -EINVAL;
114220

221+
/* we need enough space for at least a generic (/error) response */
222+
if (resp->hdr_len < sizeof(struct nvme_mi_msg_resp))
223+
return -EINVAL;
224+
115225
mctp = ep->transport_data;
226+
tag = nvme_mi_mctp_tag_alloc(ep);
116227

117228
memset(&addr, 0, sizeof(addr));
118229
addr.smctp_family = AF_MCTP;
119230
addr.smctp_network = mctp->net;
120231
addr.smctp_addr.s_addr = mctp->eid;
121232
addr.smctp_type = MCTP_TYPE_NVME | MCTP_TYPE_MIC;
122-
addr.smctp_tag = MCTP_TAG_OWNER;
233+
addr.smctp_tag = tag;
123234

124235
i = 0;
125236
req_iov[i].iov_base = ((__u8 *)req->hdr) + 1;
@@ -147,59 +258,109 @@ static int nvme_mi_mctp_submit(struct nvme_mi_ep *ep,
147258
if (len < 0) {
148259
nvme_msg(ep->root, LOG_ERR,
149260
"Failure sending MCTP message: %m\n");
150-
return len;
261+
rc = len;
262+
goto out;
151263
}
152264

153265
resp_iov[0].iov_base = ((__u8 *)resp->hdr) + 1;
154266
resp_iov[0].iov_len = resp->hdr_len - 1;
155267

156-
/* we use a temporary buffer to receive the response, and then
157-
* split into data & mic. This avoids having to re-arrange response
158-
* data on a recv that was shorter than expected */
159-
rspbuf = malloc(resp->data_len + sizeof(mic));
160-
if (!rspbuf)
161-
return -ENOMEM;
268+
resp_iov[1].iov_base = ((__u8 *)resp->data);
269+
resp_iov[1].iov_len = resp->data_len;
162270

163-
resp_iov[1].iov_base = rspbuf;
164-
resp_iov[1].iov_len = resp->data_len + sizeof(mic);
271+
resp_iov[2].iov_base = &mic;
272+
resp_iov[2].iov_len = sizeof(mic);
165273

166274
memset(&resp_msg, 0, sizeof(resp_msg));
167275
resp_msg.msg_name = &addr;
168276
resp_msg.msg_namelen = sizeof(addr);
169277
resp_msg.msg_iov = resp_iov;
170-
resp_msg.msg_iovlen = 2;
278+
resp_msg.msg_iovlen = 3;
171279

280+
retry:
281+
rc = -1;
172282
len = ops.recvmsg(mctp->sd, &resp_msg, 0);
173283

174284
if (len < 0) {
175285
nvme_msg(ep->root, LOG_ERR,
176286
"Failure receiving MCTP message: %m\n");
177-
free(rspbuf);
178-
return len;
287+
goto out;
288+
}
289+
290+
291+
if (len == 0) {
292+
nvme_msg(ep->root, LOG_WARNING, "No data from MCTP endpoint\n");
293+
goto out;
179294
}
180295

181-
if (len < resp->hdr_len + sizeof(mic) - 1) {
296+
/* Re-add the type byte, so we can work on aligned lengths from here */
297+
resp->hdr->type = MCTP_TYPE_NVME | MCTP_TYPE_MIC;
298+
len += 1;
299+
300+
/* The smallest response data is 8 bytes: generic 4-byte message header
301+
* plus four bytes of error data (excluding MIC). Ensure we have enough.
302+
*/
303+
if (len < 8 + sizeof(mic)) {
182304
nvme_msg(ep->root, LOG_ERR,
183305
"Invalid MCTP response: too short (%zd bytes, needed %zd)\n",
184-
len, resp->hdr_len + sizeof(mic) - 1);
185-
free(rspbuf);
186-
return -EIO;
306+
len, 8 + sizeof(mic));
307+
goto out;
187308
}
188-
resp->hdr->type = MCTP_TYPE_NVME | MCTP_TYPE_MIC;
189309

190-
len -= resp->hdr_len - 1;
310+
/* We can't have header/payload data that isn't a multiple of 4 bytes */
311+
if (len & 0x3) {
312+
nvme_msg(ep->root, LOG_WARNING,
313+
"Response message has unaligned length (%zd)!\n",
314+
len);
315+
goto out;
316+
}
191317

192-
memcpy(&mic, rspbuf + len - sizeof(mic), sizeof(mic));
193-
len -= sizeof(mic);
318+
/* Check for a More Processing Required response. This is a slight
319+
* layering violation, as we're pre-checking the MIC and inspecting
320+
* header fields. However, we need to do this in the transport in order
321+
* to keep the tag allocated and retry the recvmsg
322+
*/
323+
if (nvme_mi_mctp_resp_is_mpr(resp, len)) {
324+
nvme_msg(ep->root, LOG_DEBUG,
325+
"Received More Processing Required, waiting for response\n");
326+
/* TODO: when we implement timeouts, inspect the MPR response
327+
* for the estimated completion time. */
328+
goto retry;
329+
}
194330

195-
memcpy(resp->data, rspbuf, len);
196-
resp->data_len = len;
331+
/* If we have a shorter than expected response, we need to find the
332+
* MIC and the correct split between header & data. We know that the
333+
* split is 4-byte aligned, so the MIC will be entirely within one
334+
* of the iovecs.
335+
*/
336+
if (len == resp->hdr_len + resp->data_len + sizeof(mic)) {
337+
/* Common case: expected data length. Header, data and MIC
338+
* are already laid-out correctly. Nothing to do. */
339+
340+
} else if (len < resp->hdr_len + sizeof(mic)) {
341+
/* Response is smaller than the expected header. MIC is
342+
* somewhere in the header buf */
343+
resp->hdr_len = len - sizeof(mic);
344+
resp->data_len = 0;
345+
memcpy(&mic, ((uint8_t *)resp->hdr) + resp->hdr_len,
346+
sizeof(mic));
197347

198-
free(rspbuf);
348+
} else {
349+
/* We have a full header, but data is truncated - possibly
350+
* zero bytes. MIC is somewhere in the data buf */
351+
resp->data_len = len - resp->hdr_len - sizeof(mic);
352+
memcpy(&mic, ((uint8_t *)resp->data) + resp->data_len,
353+
sizeof(mic));
354+
}
199355

200356
resp->mic = le32_to_cpu(mic);
201357

202-
return 0;
358+
rc = 0;
359+
360+
out:
361+
nvme_mi_mctp_tag_drop(ep, tag);
362+
363+
return rc;
203364
}
204365

205366
static void nvme_mi_mctp_close(struct nvme_mi_ep *ep)

0 commit comments

Comments
 (0)