Skip to content

Commit 5031f01

Browse files
committed
mi-mctp: Add support for More Processing Required responses
This change allows the mi-mctp transport to receive "more processing required" responses from MI commands. When we detect a MPR response, we keep listening for a subsequent message containing the actual response. The NVMe-MI spec allows for only one MPR response per request. To do this, we need to perform explicit tag control for MCTP messaging; we cannot use the kernel's default tag handling, which would expire the tag value once we've received the first (MPR) response. Instead, we use the tag control ioctl()s to allocate tags, and expire once we receive the final response. Signed-off-by: Jeremy Kerr <[email protected]>
1 parent d311be8 commit 5031f01

4 files changed

Lines changed: 282 additions & 8 deletions

File tree

src/nvme/mi-mctp.c

Lines changed: 142 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
#include <errno.h>
10+
#include <stdbool.h>
1011
#include <stdio.h>
1112
#include <stdlib.h>
1213
#include <unistd.h>
@@ -84,10 +85,16 @@ struct nvme_mi_transport_mctp {
8485
int sd;
8586
};
8687

88+
static int ioctl_tag(int sd, unsigned long req, struct mctp_ioc_tag_ctl *ctl)
89+
{
90+
return ioctl(sd, req, ctl);
91+
}
92+
8793
static struct __mi_mctp_socket_ops ops = {
8894
socket,
8995
sendmsg,
9096
recvmsg,
97+
ioctl_tag,
9198
};
9299

93100
void __nvme_mi_mctp_set_ops(const struct __mi_mctp_socket_ops *newops)
@@ -96,6 +103,105 @@ void __nvme_mi_mctp_set_ops(const struct __mi_mctp_socket_ops *newops)
96103
}
97104
static const struct nvme_mi_transport nvme_mi_transport_mctp;
98105

106+
#ifdef SIOCMCTPALLOCTAG
107+
static __u8 nvme_mi_mctp_tag_alloc(struct nvme_mi_ep *ep)
108+
{
109+
struct nvme_mi_transport_mctp *mctp;
110+
struct mctp_ioc_tag_ctl ctl = { 0 };
111+
static bool logged;
112+
int rc;
113+
114+
mctp = ep->transport_data;
115+
116+
ctl.peer_addr = mctp->eid;
117+
118+
errno = 0;
119+
rc = ops.ioctl_tag(mctp->sd, SIOCMCTPALLOCTAG, &ctl);
120+
if (rc) {
121+
if (!logged) {
122+
/* not necessarily fatal, just means we can't handle
123+
* "more processing required" messages */
124+
nvme_msg(ep->root, LOG_INFO,
125+
"System does not support explicit tag allocation\n");
126+
logged = true;
127+
}
128+
return MCTP_TAG_OWNER;
129+
}
130+
131+
return ctl.tag;
132+
}
133+
134+
static void nvme_mi_mctp_tag_drop(struct nvme_mi_ep *ep, __u8 tag)
135+
{
136+
struct nvme_mi_transport_mctp *mctp;
137+
struct mctp_ioc_tag_ctl ctl = { 0 };
138+
139+
mctp = ep->transport_data;
140+
141+
if (!(tag & MCTP_TAG_PREALLOC))
142+
return;
143+
144+
ctl.peer_addr = mctp->eid;
145+
ctl.tag = tag;
146+
147+
ops.ioctl_tag(mctp->sd, SIOCMCTPDROPTAG, &ctl);
148+
}
149+
150+
#else /* !defined SIOMCTPTAGALLOC */
151+
152+
static __u8 nvme_mi_mctp_tag_alloc(struct nvme_mi_ep *ep)
153+
{
154+
static bool logged;
155+
if (!logged) {
156+
nvme_msg(ep->root, LOG_INFO,
157+
"Build does not support explicit tag allocation\n");
158+
logged = true;
159+
}
160+
return MCTP_TAG_OWNER;
161+
}
162+
163+
static void nvme_mi_mctp_tag_drop(struct nvme_mi_ep *ep, __u8 tag)
164+
{
165+
}
166+
167+
#endif /* !defined SIOMCTPTAGALLOC */
168+
169+
static bool nvme_mi_mctp_resp_is_mpr(struct nvme_mi_resp *resp, size_t len)
170+
{
171+
struct nvme_mi_msg_resp *msg;
172+
__le32 mic;
173+
__u32 crc;
174+
175+
if (len != sizeof(*msg) + sizeof(mic))
176+
return false;
177+
178+
msg = (struct nvme_mi_msg_resp *)resp->hdr;
179+
180+
if (msg->status != NVME_MI_RESP_MPR)
181+
return false;
182+
183+
/* We can't use verify_resp_mic here, as the response structure has
184+
* not been laid-out properly in resp yet (this is deferred until
185+
* we have the actual response).
186+
*
187+
* We know the data is a fixed size, and linear in the hdr buf, so
188+
* calculation is fairly simple. We do need to find the MIC data
189+
* though, which could either be in the header buf (if the original
190+
* header was larger than the minimal header message), or the start of
191+
* the data buf (otherwise).
192+
*/
193+
if (resp->hdr_len > sizeof(*msg))
194+
mic = *(__le32 *)(msg + 1);
195+
else
196+
mic = *(__le32 *)(resp->data);
197+
198+
crc = ~nvme_mi_crc32_update(0xffffffff, msg, sizeof(*msg));
199+
if (le32_to_cpu(mic) != crc)
200+
return false;
201+
202+
return true;
203+
}
204+
99205
static int nvme_mi_mctp_submit(struct nvme_mi_ep *ep,
100206
struct nvme_mi_req *req,
101207
struct nvme_mi_resp *resp)
@@ -106,19 +212,25 @@ static int nvme_mi_mctp_submit(struct nvme_mi_ep *ep,
106212
struct sockaddr_mctp addr;
107213
ssize_t len;
108214
__le32 mic;
109-
int i;
215+
int i, rc;
216+
__u8 tag;
110217

111218
if (ep->transport != &nvme_mi_transport_mctp)
112219
return -EINVAL;
113220

221+
/* we need enough space for at least a generic (/error) response */
222+
if (resp->hdr_len < sizeof(struct nvme_mi_msg_resp))
223+
return -EINVAL;
224+
114225
mctp = ep->transport_data;
226+
tag = nvme_mi_mctp_tag_alloc(ep);
115227

116228
memset(&addr, 0, sizeof(addr));
117229
addr.smctp_family = AF_MCTP;
118230
addr.smctp_network = mctp->net;
119231
addr.smctp_addr.s_addr = mctp->eid;
120232
addr.smctp_type = MCTP_TYPE_NVME | MCTP_TYPE_MIC;
121-
addr.smctp_tag = MCTP_TAG_OWNER;
233+
addr.smctp_tag = tag;
122234

123235
i = 0;
124236
req_iov[i].iov_base = ((__u8 *)req->hdr) + 1;
@@ -146,7 +258,8 @@ static int nvme_mi_mctp_submit(struct nvme_mi_ep *ep,
146258
if (len < 0) {
147259
nvme_msg(ep->root, LOG_ERR,
148260
"Failure sending MCTP message: %m\n");
149-
return len;
261+
rc = len;
262+
goto out;
150263
}
151264

152265
resp_iov[0].iov_base = ((__u8 *)resp->hdr) + 1;
@@ -164,17 +277,20 @@ static int nvme_mi_mctp_submit(struct nvme_mi_ep *ep,
164277
resp_msg.msg_iov = resp_iov;
165278
resp_msg.msg_iovlen = 3;
166279

280+
retry:
281+
rc = -1;
167282
len = ops.recvmsg(mctp->sd, &resp_msg, 0);
168283

169284
if (len < 0) {
170285
nvme_msg(ep->root, LOG_ERR,
171286
"Failure receiving MCTP message: %m\n");
172-
return len;
287+
goto out;
173288
}
174289

290+
175291
if (len == 0) {
176292
nvme_msg(ep->root, LOG_WARNING, "No data from MCTP endpoint\n");
177-
return -1;
293+
goto out;
178294
}
179295

180296
/* Re-add the type byte, so we can work on aligned lengths from here */
@@ -188,15 +304,28 @@ static int nvme_mi_mctp_submit(struct nvme_mi_ep *ep,
188304
nvme_msg(ep->root, LOG_ERR,
189305
"Invalid MCTP response: too short (%zd bytes, needed %zd)\n",
190306
len, 8 + sizeof(mic));
191-
return -EIO;
307+
goto out;
192308
}
193309

194310
/* We can't have header/payload data that isn't a multiple of 4 bytes */
195311
if (len & 0x3) {
196312
nvme_msg(ep->root, LOG_WARNING,
197313
"Response message has unaligned length (%zd)!\n",
198314
len);
199-
return -EIO;
315+
goto out;
316+
}
317+
318+
/* Check for a More Processing Required response. This is a slight
319+
* layering violation, as we're pre-checking the MIC and inspecting
320+
* header fields. However, we need to do this in the transport in order
321+
* to keep the tag allocated and retry the recvmsg
322+
*/
323+
if (nvme_mi_mctp_resp_is_mpr(resp, len)) {
324+
nvme_msg(ep->root, LOG_DEBUG,
325+
"Received More Processing Required, waiting for response\n");
326+
/* TODO: when we implement timeouts, inspect the MPR response
327+
* for the estimated completion time. */
328+
goto retry;
200329
}
201330

202331
/* If we have a shorter than expected response, we need to find the
@@ -226,7 +355,12 @@ static int nvme_mi_mctp_submit(struct nvme_mi_ep *ep,
226355

227356
resp->mic = le32_to_cpu(mic);
228357

229-
return 0;
358+
rc = 0;
359+
360+
out:
361+
nvme_mi_mctp_tag_drop(ep, tag);
362+
363+
return rc;
230364
}
231365

232366
static void nvme_mi_mctp_close(struct nvme_mi_ep *ep)

src/nvme/mi.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,28 @@ enum nvme_mi_ror {
106106
NVME_MI_ROR_RSP = 1,
107107
};
108108

109+
/**
110+
* enum nvme_mi_resp_status - values for the response status field
111+
* @NVME_MI_RESP_SUCCESS: success
112+
* @NVME_MI_RESP_MPR: More Processing Required
113+
* @NVME_MI_RESP_INTERNAL_ERR: Internal Error
114+
* @NVME_MI_RESP_INVALID_OPCODE: Invalid command opcode
115+
* @NVME_MI_RESP_INVALID_PARAM: Invalid command parameter
116+
* @NVME_MI_RESP_INVALID_CMD_SIZE: Invalid command size
117+
* @NVME_MI_RESP_INVALID_INPUT_SIZE: Invalid command input data size
118+
* @NVME_MI_RESP_ACCESS_DENIED: Access Denied
119+
*/
120+
enum nvme_mi_resp_status {
121+
NVME_MI_RESP_SUCCESS = 0x00,
122+
NVME_MI_RESP_MPR = 0x01,
123+
NVME_MI_RESP_INTERNAL_ERR = 0x02,
124+
NVME_MI_RESP_INVALID_OPCODE = 0x03,
125+
NVME_MI_RESP_INVALID_PARAM = 0x04,
126+
NVME_MI_RESP_INVALID_CMD_SIZE = 0x05,
127+
NVME_MI_RESP_INVALID_INPUT_SIZE = 0x06,
128+
NVME_MI_RESP_ACCESS_DENIED = 0x07,
129+
};
130+
109131
/**
110132
* struct nvme_mi_msg_hdr - General MI message header.
111133
* @type: MCTP message type, will always be NVME_MI_MSGTYPE_NVME
@@ -123,6 +145,21 @@ struct nvme_mi_msg_hdr {
123145
__u8 rsvd0;
124146
} __attribute__((packed));
125147

148+
/**
149+
* struct nvme_mi_msg_resp - Generic response type.
150+
* @hdr: the general request/response message header
151+
* @status: response status value (see &enum nvme_mi_resp_status)
152+
* @rsvd0: reserved data, may be defined by specific response
153+
*
154+
* Every response will start with one of these; command-specific responses
155+
* will define parts of the reserved data, and may add further fields.
156+
*/
157+
struct nvme_mi_msg_resp {
158+
struct nvme_mi_msg_hdr hdr;
159+
__u8 status;
160+
__u8 rsvd0[3];
161+
};
162+
126163
/**
127164
* enum nvme_mi_mi_opcode - Operation code for supported NVMe-MI commands.
128165
* @nvme_mi_mi_opcode_mi_data_read: Read NVMe-MI Data Structure

src/nvme/private.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,10 +208,12 @@ __u32 nvme_mi_crc32_update(__u32 crc, void *data, size_t len);
208208
/* we have a facility to mock MCTP socket operations in the mi-mctp transport,
209209
* using this ops type. This should only be used for test, and isn't exposed
210210
* in the shared lib */;
211+
struct mctp_ioc_tag_ctl;
211212
struct __mi_mctp_socket_ops {
212213
int (*socket)(int, int, int);
213214
ssize_t (*sendmsg)(int, const struct msghdr *, int);
214215
ssize_t (*recvmsg)(int, struct msghdr *, int);
216+
int (*ioctl_tag)(int, unsigned long, struct mctp_ioc_tag_ctl *);
215217
};
216218
void __nvme_mi_mctp_set_ops(const struct __mi_mctp_socket_ops *newops);
217219

0 commit comments

Comments
 (0)