77 */
88
99#include <errno.h>
10+ #include <stdbool.h>
1011#include <stdio.h>
1112#include <stdlib.h>
1213#include <unistd.h>
@@ -84,10 +85,16 @@ struct nvme_mi_transport_mctp {
8485 int sd ;
8586};
8687
88+ static int ioctl_tag (int sd , unsigned long req , struct mctp_ioc_tag_ctl * ctl )
89+ {
90+ return ioctl (sd , req , ctl );
91+ }
92+
8793static struct __mi_mctp_socket_ops ops = {
8894 socket ,
8995 sendmsg ,
9096 recvmsg ,
97+ ioctl_tag ,
9198};
9299
93100void __nvme_mi_mctp_set_ops (const struct __mi_mctp_socket_ops * newops )
@@ -96,30 +103,134 @@ void __nvme_mi_mctp_set_ops(const struct __mi_mctp_socket_ops *newops)
96103}
97104static const struct nvme_mi_transport nvme_mi_transport_mctp ;
98105
106+ #ifdef SIOCMCTPALLOCTAG
107+ static __u8 nvme_mi_mctp_tag_alloc (struct nvme_mi_ep * ep )
108+ {
109+ struct nvme_mi_transport_mctp * mctp ;
110+ struct mctp_ioc_tag_ctl ctl = { 0 };
111+ static bool logged ;
112+ int rc ;
113+
114+ mctp = ep -> transport_data ;
115+
116+ ctl .peer_addr = mctp -> eid ;
117+
118+ errno = 0 ;
119+ rc = ops .ioctl_tag (mctp -> sd , SIOCMCTPALLOCTAG , & ctl );
120+ if (rc ) {
121+ if (!logged ) {
122+ /* not necessarily fatal, just means we can't handle
123+ * "more processing required" messages */
124+ nvme_msg (ep -> root , LOG_INFO ,
125+ "System does not support explicit tag allocation\n" );
126+ logged = true;
127+ }
128+ return MCTP_TAG_OWNER ;
129+ }
130+
131+ return ctl .tag ;
132+ }
133+
134+ static void nvme_mi_mctp_tag_drop (struct nvme_mi_ep * ep , __u8 tag )
135+ {
136+ struct nvme_mi_transport_mctp * mctp ;
137+ struct mctp_ioc_tag_ctl ctl = { 0 };
138+
139+ mctp = ep -> transport_data ;
140+
141+ if (!(tag & MCTP_TAG_PREALLOC ))
142+ return ;
143+
144+ ctl .peer_addr = mctp -> eid ;
145+ ctl .tag = tag ;
146+
147+ ops .ioctl_tag (mctp -> sd , SIOCMCTPDROPTAG , & ctl );
148+ }
149+
150+ #else /* !defined SIOMCTPTAGALLOC */
151+
152+ static __u8 nvme_mi_mctp_tag_alloc (struct nvme_mi_ep * ep )
153+ {
154+ static bool logged ;
155+ if (!logged ) {
156+ nvme_msg (ep -> root , LOG_INFO ,
157+ "Build does not support explicit tag allocation\n" );
158+ logged = true;
159+ }
160+ return MCTP_TAG_OWNER ;
161+ }
162+
163+ static void nvme_mi_mctp_tag_drop (struct nvme_mi_ep * ep , __u8 tag )
164+ {
165+ }
166+
167+ #endif /* !defined SIOMCTPTAGALLOC */
168+
169+ static bool nvme_mi_mctp_resp_is_mpr (struct nvme_mi_resp * resp , size_t len )
170+ {
171+ struct nvme_mi_msg_resp * msg ;
172+ __le32 mic ;
173+ __u32 crc ;
174+
175+ if (len != sizeof (* msg ) + sizeof (mic ))
176+ return false;
177+
178+ msg = (struct nvme_mi_msg_resp * )resp -> hdr ;
179+
180+ if (msg -> status != NVME_MI_RESP_MPR )
181+ return false;
182+
183+ /* We can't use verify_resp_mic here, as the response structure has
184+ * not been laid-out properly in resp yet (this is deferred until
185+ * we have the actual response).
186+ *
187+ * We know the data is a fixed size, and linear in the hdr buf, so
188+ * calculation is fairly simple. We do need to find the MIC data
189+ * though, which could either be in the header buf (if the original
190+ * header was larger than the minimal header message), or the start of
191+ * the data buf (otherwise).
192+ */
193+ if (resp -> hdr_len > sizeof (* msg ))
194+ mic = * (__le32 * )(msg + 1 );
195+ else
196+ mic = * (__le32 * )(resp -> data );
197+
198+ crc = ~nvme_mi_crc32_update (0xffffffff , msg , sizeof (* msg ));
199+ if (le32_to_cpu (mic ) != crc )
200+ return false;
201+
202+ return true;
203+ }
204+
99205static int nvme_mi_mctp_submit (struct nvme_mi_ep * ep ,
100206 struct nvme_mi_req * req ,
101207 struct nvme_mi_resp * resp )
102208{
103209 struct nvme_mi_transport_mctp * mctp ;
104- struct iovec req_iov [3 ], resp_iov [2 ];
210+ struct iovec req_iov [3 ], resp_iov [3 ];
105211 struct msghdr req_msg , resp_msg ;
106212 struct sockaddr_mctp addr ;
107- unsigned char * rspbuf ;
108213 ssize_t len ;
109214 __le32 mic ;
110- int i ;
215+ int i , rc ;
216+ __u8 tag ;
111217
112218 if (ep -> transport != & nvme_mi_transport_mctp )
113219 return - EINVAL ;
114220
221+ /* we need enough space for at least a generic (/error) response */
222+ if (resp -> hdr_len < sizeof (struct nvme_mi_msg_resp ))
223+ return - EINVAL ;
224+
115225 mctp = ep -> transport_data ;
226+ tag = nvme_mi_mctp_tag_alloc (ep );
116227
117228 memset (& addr , 0 , sizeof (addr ));
118229 addr .smctp_family = AF_MCTP ;
119230 addr .smctp_network = mctp -> net ;
120231 addr .smctp_addr .s_addr = mctp -> eid ;
121232 addr .smctp_type = MCTP_TYPE_NVME | MCTP_TYPE_MIC ;
122- addr .smctp_tag = MCTP_TAG_OWNER ;
233+ addr .smctp_tag = tag ;
123234
124235 i = 0 ;
125236 req_iov [i ].iov_base = ((__u8 * )req -> hdr ) + 1 ;
@@ -147,59 +258,109 @@ static int nvme_mi_mctp_submit(struct nvme_mi_ep *ep,
147258 if (len < 0 ) {
148259 nvme_msg (ep -> root , LOG_ERR ,
149260 "Failure sending MCTP message: %m\n" );
150- return len ;
261+ rc = len ;
262+ goto out ;
151263 }
152264
153265 resp_iov [0 ].iov_base = ((__u8 * )resp -> hdr ) + 1 ;
154266 resp_iov [0 ].iov_len = resp -> hdr_len - 1 ;
155267
156- /* we use a temporary buffer to receive the response, and then
157- * split into data & mic. This avoids having to re-arrange response
158- * data on a recv that was shorter than expected */
159- rspbuf = malloc (resp -> data_len + sizeof (mic ));
160- if (!rspbuf )
161- return - ENOMEM ;
268+ resp_iov [1 ].iov_base = ((__u8 * )resp -> data );
269+ resp_iov [1 ].iov_len = resp -> data_len ;
162270
163- resp_iov [1 ].iov_base = rspbuf ;
164- resp_iov [1 ].iov_len = resp -> data_len + sizeof (mic );
271+ resp_iov [2 ].iov_base = & mic ;
272+ resp_iov [2 ].iov_len = sizeof (mic );
165273
166274 memset (& resp_msg , 0 , sizeof (resp_msg ));
167275 resp_msg .msg_name = & addr ;
168276 resp_msg .msg_namelen = sizeof (addr );
169277 resp_msg .msg_iov = resp_iov ;
170- resp_msg .msg_iovlen = 2 ;
278+ resp_msg .msg_iovlen = 3 ;
171279
280+ retry :
281+ rc = -1 ;
172282 len = ops .recvmsg (mctp -> sd , & resp_msg , 0 );
173283
174284 if (len < 0 ) {
175285 nvme_msg (ep -> root , LOG_ERR ,
176286 "Failure receiving MCTP message: %m\n" );
177- free (rspbuf );
178- return len ;
287+ goto out ;
288+ }
289+
290+
291+ if (len == 0 ) {
292+ nvme_msg (ep -> root , LOG_WARNING , "No data from MCTP endpoint\n" );
293+ goto out ;
179294 }
180295
181- if (len < resp -> hdr_len + sizeof (mic ) - 1 ) {
296+ /* Re-add the type byte, so we can work on aligned lengths from here */
297+ resp -> hdr -> type = MCTP_TYPE_NVME | MCTP_TYPE_MIC ;
298+ len += 1 ;
299+
300+ /* The smallest response data is 8 bytes: generic 4-byte message header
301+ * plus four bytes of error data (excluding MIC). Ensure we have enough.
302+ */
303+ if (len < 8 + sizeof (mic )) {
182304 nvme_msg (ep -> root , LOG_ERR ,
183305 "Invalid MCTP response: too short (%zd bytes, needed %zd)\n" ,
184- len , resp -> hdr_len + sizeof (mic ) - 1 );
185- free (rspbuf );
186- return - EIO ;
306+ len , 8 + sizeof (mic ));
307+ goto out ;
187308 }
188- resp -> hdr -> type = MCTP_TYPE_NVME | MCTP_TYPE_MIC ;
189309
190- len -= resp -> hdr_len - 1 ;
310+ /* We can't have header/payload data that isn't a multiple of 4 bytes */
311+ if (len & 0x3 ) {
312+ nvme_msg (ep -> root , LOG_WARNING ,
313+ "Response message has unaligned length (%zd)!\n" ,
314+ len );
315+ goto out ;
316+ }
191317
192- memcpy (& mic , rspbuf + len - sizeof (mic ), sizeof (mic ));
193- len -= sizeof (mic );
318+ /* Check for a More Processing Required response. This is a slight
319+ * layering violation, as we're pre-checking the MIC and inspecting
320+ * header fields. However, we need to do this in the transport in order
321+ * to keep the tag allocated and retry the recvmsg
322+ */
323+ if (nvme_mi_mctp_resp_is_mpr (resp , len )) {
324+ nvme_msg (ep -> root , LOG_DEBUG ,
325+ "Received More Processing Required, waiting for response\n" );
326+ /* TODO: when we implement timeouts, inspect the MPR response
327+ * for the estimated completion time. */
328+ goto retry ;
329+ }
194330
195- memcpy (resp -> data , rspbuf , len );
196- resp -> data_len = len ;
331+ /* If we have a shorter than expected response, we need to find the
332+ * MIC and the correct split between header & data. We know that the
333+ * split is 4-byte aligned, so the MIC will be entirely within one
334+ * of the iovecs.
335+ */
336+ if (len == resp -> hdr_len + resp -> data_len + sizeof (mic )) {
337+ /* Common case: expected data length. Header, data and MIC
338+ * are already laid-out correctly. Nothing to do. */
339+
340+ } else if (len < resp -> hdr_len + sizeof (mic )) {
341+ /* Response is smaller than the expected header. MIC is
342+ * somewhere in the header buf */
343+ resp -> hdr_len = len - sizeof (mic );
344+ resp -> data_len = 0 ;
345+ memcpy (& mic , ((uint8_t * )resp -> hdr ) + resp -> hdr_len ,
346+ sizeof (mic ));
197347
198- free (rspbuf );
348+ } else {
349+ /* We have a full header, but data is truncated - possibly
350+ * zero bytes. MIC is somewhere in the data buf */
351+ resp -> data_len = len - resp -> hdr_len - sizeof (mic );
352+ memcpy (& mic , ((uint8_t * )resp -> data ) + resp -> data_len ,
353+ sizeof (mic ));
354+ }
199355
200356 resp -> mic = le32_to_cpu (mic );
201357
202- return 0 ;
358+ rc = 0 ;
359+
360+ out :
361+ nvme_mi_mctp_tag_drop (ep , tag );
362+
363+ return rc ;
203364}
204365
205366static void nvme_mi_mctp_close (struct nvme_mi_ep * ep )
0 commit comments