Skip to content

Commit ac345c5

Browse files
committed
Merge branch 'mptcp-various-rare-sending-issues'
Matthieu Baerts says: ==================== mptcp: various rare sending issues Here are various fixes from Paolo, addressing very occasional issues on the sending side: - Patch 1: drop an optimisation that could lead to timeout in case of race conditions. A fix for up to v5.11. - Patch 2: fix stream corruption under very specific conditions. A fix for up to v5.13. - Patch 3: restore MPTCP-level zero window probe after a recent fix. A fix for up to v5.16. - Patch 4: new MIB counter to track MPTCP-level zero windows probe to help catching issues similar to the one fixed by the previous patch. ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 00764aa + fe11dfa commit ac345c5

4 files changed

Lines changed: 39 additions & 22 deletions

File tree

net/mptcp/mib.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
8585
SNMP_MIB_ITEM("DssFallback", MPTCP_MIB_DSSFALLBACK),
8686
SNMP_MIB_ITEM("SimultConnectFallback", MPTCP_MIB_SIMULTCONNFALLBACK),
8787
SNMP_MIB_ITEM("FallbackFailed", MPTCP_MIB_FALLBACKFAILED),
88+
SNMP_MIB_ITEM("WinProbe", MPTCP_MIB_WINPROBE),
8889
};
8990

9091
/* mptcp_mib_alloc - allocate percpu mib counters

net/mptcp/mib.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ enum linux_mptcp_mib_field {
8888
MPTCP_MIB_DSSFALLBACK, /* Bad or missing DSS */
8989
MPTCP_MIB_SIMULTCONNFALLBACK, /* Simultaneous connect */
9090
MPTCP_MIB_FALLBACKFAILED, /* Can't fallback due to msk status */
91+
MPTCP_MIB_WINPROBE, /* MPTCP-level zero window probe */
9192
__MPTCP_MIB_MAX
9293
};
9394

net/mptcp/protocol.c

Lines changed: 36 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,7 +1007,7 @@ static void __mptcp_clean_una(struct sock *sk)
10071007
if (WARN_ON_ONCE(!msk->recovery))
10081008
break;
10091009

1010-
WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
1010+
msk->first_pending = mptcp_send_next(sk);
10111011
}
10121012

10131013
dfrag_clear(sk, dfrag);
@@ -1299,7 +1299,12 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
12991299
if (copy == 0) {
13001300
u64 snd_una = READ_ONCE(msk->snd_una);
13011301

1302-
if (snd_una != msk->snd_nxt || tcp_write_queue_tail(ssk)) {
1302+
/* No need for zero probe if there are any data pending
1303+
* either at the msk or ssk level; skb is the current write
1304+
* queue tail and can be empty at this point.
1305+
*/
1306+
if (snd_una != msk->snd_nxt || skb->len ||
1307+
skb != tcp_send_head(ssk)) {
13031308
tcp_remove_empty_skb(ssk);
13041309
return 0;
13051310
}
@@ -1350,6 +1355,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
13501355
mpext->dsn64);
13511356

13521357
if (zero_window_probe) {
1358+
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_WINPROBE);
13531359
mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
13541360
mpext->frozen = 1;
13551361
if (READ_ONCE(msk->csum_enabled))
@@ -1552,7 +1558,7 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
15521558

15531559
mptcp_update_post_push(msk, dfrag, ret);
15541560
}
1555-
WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
1561+
msk->first_pending = mptcp_send_next(sk);
15561562

15571563
if (msk->snd_burst <= 0 ||
15581564
!sk_stream_memory_free(ssk) ||
@@ -1912,7 +1918,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
19121918
get_page(dfrag->page);
19131919
list_add_tail(&dfrag->list, &msk->rtx_queue);
19141920
if (!msk->first_pending)
1915-
WRITE_ONCE(msk->first_pending, dfrag);
1921+
msk->first_pending = dfrag;
19161922
}
19171923
pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d\n", msk,
19181924
dfrag->data_seq, dfrag->data_len, dfrag->already_sent,
@@ -1945,22 +1951,36 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
19451951

19461952
static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied);
19471953

1948-
static int __mptcp_recvmsg_mskq(struct sock *sk,
1949-
struct msghdr *msg,
1950-
size_t len, int flags,
1954+
static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg,
1955+
size_t len, int flags, int copied_total,
19511956
struct scm_timestamping_internal *tss,
19521957
int *cmsg_flags)
19531958
{
19541959
struct mptcp_sock *msk = mptcp_sk(sk);
19551960
struct sk_buff *skb, *tmp;
1961+
int total_data_len = 0;
19561962
int copied = 0;
19571963

19581964
skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) {
1959-
u32 offset = MPTCP_SKB_CB(skb)->offset;
1965+
u32 delta, offset = MPTCP_SKB_CB(skb)->offset;
19601966
u32 data_len = skb->len - offset;
1961-
u32 count = min_t(size_t, len - copied, data_len);
1967+
u32 count;
19621968
int err;
19631969

1970+
if (flags & MSG_PEEK) {
1971+
/* skip already peeked skbs */
1972+
if (total_data_len + data_len <= copied_total) {
1973+
total_data_len += data_len;
1974+
continue;
1975+
}
1976+
1977+
/* skip the already peeked data in the current skb */
1978+
delta = copied_total - total_data_len;
1979+
offset += delta;
1980+
data_len -= delta;
1981+
}
1982+
1983+
count = min_t(size_t, len - copied, data_len);
19641984
if (!(flags & MSG_TRUNC)) {
19651985
err = skb_copy_datagram_msg(skb, offset, msg, count);
19661986
if (unlikely(err < 0)) {
@@ -1977,24 +1997,21 @@ static int __mptcp_recvmsg_mskq(struct sock *sk,
19771997

19781998
copied += count;
19791999

1980-
if (count < data_len) {
1981-
if (!(flags & MSG_PEEK)) {
2000+
if (!(flags & MSG_PEEK)) {
2001+
msk->bytes_consumed += count;
2002+
if (count < data_len) {
19822003
MPTCP_SKB_CB(skb)->offset += count;
19832004
MPTCP_SKB_CB(skb)->map_seq += count;
1984-
msk->bytes_consumed += count;
2005+
break;
19852006
}
1986-
break;
1987-
}
19882007

1989-
if (!(flags & MSG_PEEK)) {
19902008
/* avoid the indirect call, we know the destructor is sock_rfree */
19912009
skb->destructor = NULL;
19922010
skb->sk = NULL;
19932011
atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
19942012
sk_mem_uncharge(sk, skb->truesize);
19952013
__skb_unlink(skb, &sk->sk_receive_queue);
19962014
skb_attempt_defer_free(skb);
1997-
msk->bytes_consumed += count;
19982015
}
19992016

20002017
if (copied >= len)
@@ -2191,7 +2208,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
21912208
while (copied < len) {
21922209
int err, bytes_read;
21932210

2194-
bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags);
2211+
bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags,
2212+
copied, &tss, &cmsg_flags);
21952213
if (unlikely(bytes_read < 0)) {
21962214
if (!copied)
21972215
copied = bytes_read;
@@ -2882,7 +2900,7 @@ static void __mptcp_clear_xmit(struct sock *sk)
28822900
struct mptcp_sock *msk = mptcp_sk(sk);
28832901
struct mptcp_data_frag *dtmp, *dfrag;
28842902

2885-
WRITE_ONCE(msk->first_pending, NULL);
2903+
msk->first_pending = NULL;
28862904
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list)
28872905
dfrag_clear(sk, dfrag);
28882906
}
@@ -3422,9 +3440,6 @@ void __mptcp_data_acked(struct sock *sk)
34223440

34233441
void __mptcp_check_push(struct sock *sk, struct sock *ssk)
34243442
{
3425-
if (!mptcp_send_head(sk))
3426-
return;
3427-
34283443
if (!sock_owned_by_user(sk))
34293444
__mptcp_subflow_push_pending(sk, ssk, false);
34303445
else

net/mptcp/protocol.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
414414
{
415415
const struct mptcp_sock *msk = mptcp_sk(sk);
416416

417-
return READ_ONCE(msk->first_pending);
417+
return msk->first_pending;
418418
}
419419

420420
static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk)

0 commit comments

Comments
 (0)