@@ -1624,19 +1624,7 @@ void smbd_destroy(struct TCP_Server_Info *server)
16241624 log_rdma_event (INFO , "free receive buffers\n" );
16251625 destroy_receive_buffers (sc );
16261626
1627- /*
1628- * For performance reasons, memory registration and deregistration
1629- * are not locked by srv_mutex. It is possible some processes are
1630- * blocked on transport srv_mutex while holding memory registration.
1631- * Release the transport srv_mutex to allow them to hit the failure
1632- * path when sending data, and then release memory registrations.
1633- */
16341627 log_rdma_event (INFO , "freeing mr list\n" );
1635- while (atomic_read (& sc -> mr_io .used .count )) {
1636- cifs_server_unlock (server );
1637- msleep (1000 );
1638- cifs_server_lock (server );
1639- }
16401628 destroy_mr_list (sc );
16411629
16421630 ib_free_cq (sc -> ib .send_cq );
@@ -2352,6 +2340,46 @@ static void smbd_mr_recovery_work(struct work_struct *work)
23522340 }
23532341}
23542342
2343+ static void smbd_mr_disable_locked (struct smbdirect_mr_io * mr )
2344+ {
2345+ struct smbdirect_socket * sc = mr -> socket ;
2346+
2347+ lockdep_assert_held (& mr -> mutex );
2348+
2349+ if (mr -> state == SMBDIRECT_MR_DISABLED )
2350+ return ;
2351+
2352+ if (mr -> mr )
2353+ ib_dereg_mr (mr -> mr );
2354+ if (mr -> sgt .nents )
2355+ ib_dma_unmap_sg (sc -> ib .dev , mr -> sgt .sgl , mr -> sgt .nents , mr -> dir );
2356+ kfree (mr -> sgt .sgl );
2357+
2358+ mr -> mr = NULL ;
2359+ mr -> sgt .sgl = NULL ;
2360+ mr -> sgt .nents = 0 ;
2361+
2362+ mr -> state = SMBDIRECT_MR_DISABLED ;
2363+ }
2364+
2365+ static void smbd_mr_free_locked (struct kref * kref )
2366+ {
2367+ struct smbdirect_mr_io * mr =
2368+ container_of (kref , struct smbdirect_mr_io , kref );
2369+
2370+ lockdep_assert_held (& mr -> mutex );
2371+
2372+ /*
2373+ * smbd_mr_disable_locked() should already be called!
2374+ */
2375+ if (WARN_ON_ONCE (mr -> state != SMBDIRECT_MR_DISABLED ))
2376+ smbd_mr_disable_locked (mr );
2377+
2378+ mutex_unlock (& mr -> mutex );
2379+ mutex_destroy (& mr -> mutex );
2380+ kfree (mr );
2381+ }
2382+
23552383static void destroy_mr_list (struct smbdirect_socket * sc )
23562384{
23572385 struct smbdirect_mr_io * mr , * tmp ;
@@ -2365,13 +2393,31 @@ static void destroy_mr_list(struct smbdirect_socket *sc)
23652393 spin_unlock_irqrestore (& sc -> mr_io .all .lock , flags );
23662394
23672395 list_for_each_entry_safe (mr , tmp , & all_list , list ) {
2368- if (mr -> mr )
2369- ib_dereg_mr (mr -> mr );
2370- if (mr -> sgt .nents )
2371- ib_dma_unmap_sg (sc -> ib .dev , mr -> sgt .sgl , mr -> sgt .nents , mr -> dir );
2372- kfree (mr -> sgt .sgl );
2396+ mutex_lock (& mr -> mutex );
2397+
2398+ smbd_mr_disable_locked (mr );
23732399 list_del (& mr -> list );
2374- kfree (mr );
2400+ mr -> socket = NULL ;
2401+
2402+ /*
2403+ * No kref_put_mutex() as it's already locked.
2404+ *
2405+ * If smbd_mr_free_locked() is called
2406+ * and the mutex is unlocked and mr is gone,
2407+ * in that case kref_put() returned 1.
2408+ *
2409+ * If kref_put() returned 0 we know that
2410+ * smbd_mr_free_locked() didn't
2411+ * run. Not by us nor by anyone else, as we
2412+ * still hold the mutex, so we need to unlock.
2413+ *
2414+ * If the mr is still registered it will
2415+ * be dangling (detached from the connection
2416+ * waiting for smbd_deregister_mr() to be
2417+ * called in order to free the memory.
2418+ */
2419+ if (!kref_put (& mr -> kref , smbd_mr_free_locked ))
2420+ mutex_unlock (& mr -> mutex );
23752421 }
23762422}
23772423
@@ -2402,6 +2448,9 @@ static int allocate_mr_list(struct smbdirect_socket *sc)
24022448 goto kzalloc_mr_failed ;
24032449 }
24042450
2451+ kref_init (& mr -> kref );
2452+ mutex_init (& mr -> mutex );
2453+
24052454 mr -> mr = ib_alloc_mr (sc -> ib .pd ,
24062455 sc -> mr_io .type ,
24072456 sp -> max_frmr_depth );
@@ -2434,6 +2483,7 @@ static int allocate_mr_list(struct smbdirect_socket *sc)
24342483kcalloc_sgl_failed :
24352484 ib_dereg_mr (mr -> mr );
24362485ib_alloc_mr_failed :
2486+ mutex_destroy (& mr -> mutex );
24372487 kfree (mr );
24382488kzalloc_mr_failed :
24392489 destroy_mr_list (sc );
@@ -2471,6 +2521,7 @@ static struct smbdirect_mr_io *get_mr(struct smbdirect_socket *sc)
24712521 list_for_each_entry (ret , & sc -> mr_io .all .list , list ) {
24722522 if (ret -> state == SMBDIRECT_MR_READY ) {
24732523 ret -> state = SMBDIRECT_MR_REGISTERED ;
2524+ kref_get (& ret -> kref );
24742525 spin_unlock_irqrestore (& sc -> mr_io .all .lock , flags );
24752526 atomic_dec (& sc -> mr_io .ready .count );
24762527 atomic_inc (& sc -> mr_io .used .count );
@@ -2535,6 +2586,8 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info,
25352586 return NULL ;
25362587 }
25372588
2589+ mutex_lock (& mr -> mutex );
2590+
25382591 mr -> dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE ;
25392592 mr -> need_invalidate = need_invalidate ;
25402593 mr -> sgt .nents = 0 ;
@@ -2578,8 +2631,16 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info,
25782631 * on the next ib_post_send when we actually send I/O to remote peer
25792632 */
25802633 rc = ib_post_send (sc -> ib .qp , & reg_wr -> wr , NULL );
2581- if (!rc )
2634+ if (!rc ) {
2635+ /*
2636+ * get_mr() gave us a reference
2637+ * via kref_get(&mr->kref), we keep that and let
2638+ * the caller use smbd_deregister_mr()
2639+ * to remove it again.
2640+ */
2641+ mutex_unlock (& mr -> mutex );
25822642 return mr ;
2643+ }
25832644
25842645 log_rdma_mr (ERR , "ib_post_send failed rc=%x reg_wr->key=%x\n" ,
25852646 rc , reg_wr -> key );
@@ -2596,6 +2657,25 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info,
25962657
25972658 smbd_disconnect_rdma_connection (sc );
25982659
2660+ /*
2661+ * get_mr() gave us a reference
2662+ * via kref_get(&mr->kref), we need to remove it again
2663+ * on error.
2664+ *
2665+ * No kref_put_mutex() as it's already locked.
2666+ *
2667+ * If smbd_mr_free_locked() is called
2668+ * and the mutex is unlocked and mr is gone,
2669+ * in that case kref_put() returned 1.
2670+ *
2671+ * If kref_put() returned 0 we know that
2672+ * smbd_mr_free_locked() didn't
2673+ * run. Not by us nor by anyone else, as we
2674+ * still hold the mutex, so we need to unlock.
2675+ */
2676+ if (!kref_put (& mr -> kref , smbd_mr_free_locked ))
2677+ mutex_unlock (& mr -> mutex );
2678+
25992679 return NULL ;
26002680}
26012681
@@ -2624,6 +2704,15 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr)
26242704{
26252705 struct smbdirect_socket * sc = mr -> socket ;
26262706
2707+ mutex_lock (& mr -> mutex );
2708+ if (mr -> state == SMBDIRECT_MR_DISABLED )
2709+ goto put_kref ;
2710+
2711+ if (sc -> status != SMBDIRECT_SOCKET_CONNECTED ) {
2712+ smbd_mr_disable_locked (mr );
2713+ goto put_kref ;
2714+ }
2715+
26272716 if (mr -> need_invalidate ) {
26282717 struct ib_send_wr * wr = & mr -> inv_wr ;
26292718 int rc ;
@@ -2640,6 +2729,7 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr)
26402729 rc = ib_post_send (sc -> ib .qp , wr , NULL );
26412730 if (rc ) {
26422731 log_rdma_mr (ERR , "ib_post_send failed rc=%x\n" , rc );
2732+ smbd_mr_disable_locked (mr );
26432733 smbd_disconnect_rdma_connection (sc );
26442734 goto done ;
26452735 }
@@ -2671,6 +2761,24 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr)
26712761done :
26722762 if (atomic_dec_and_test (& sc -> mr_io .used .count ))
26732763 wake_up (& sc -> mr_io .cleanup .wait_queue );
2764+
2765+ put_kref :
2766+ /*
2767+ * No kref_put_mutex() as it's already locked.
2768+ *
2769+ * If smbd_mr_free_locked() is called
2770+ * and the mutex is unlocked and mr is gone,
2771+ * in that case kref_put() returned 1.
2772+ *
2773+ * If kref_put() returned 0 we know that
2774+ * smbd_mr_free_locked() didn't
2775+ * run. Not by us nor by anyone else, as we
2776+ * still hold the mutex, so we need to unlock
2777+ * and keep the mr in SMBDIRECT_MR_READY or
2778+ * SMBDIRECT_MR_ERROR state.
2779+ */
2780+ if (!kref_put (& mr -> kref , smbd_mr_free_locked ))
2781+ mutex_unlock (& mr -> mutex );
26742782}
26752783
26762784static bool smb_set_sge (struct smb_extract_to_rdma * rdma ,
0 commit comments