From 5ea78a1533fa29bd7473c4bb16bbff0e1433c42b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 21 Jan 2020 16:56:25 -0800 Subject: [PATCH] mptcp: add subflow write space signalling and mptcp_poll Add new SEND_SPACE flag to indicate that a subflow has enough space to accept more data for transmission. It gets cleared at the end of mptcp_sendmsg() in case ssk has run below the free watermark. It is (re-set) from the wspace callback. This allows us to use msk->flags to determine the poll mask. Co-developed-by: Peter Krystad Signed-off-by: Peter Krystad Signed-off-by: Florian Westphal Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 53 ++++++++++++++++++++++++++++++++++++++++++++ net/mptcp/protocol.h | 1 + net/mptcp/subflow.c | 3 +++ 3 files changed, 57 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 71250149180b2..408efbe347534 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -176,6 +176,23 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, return ret; } +static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk) +{ + struct socket *sock; + + if (likely(sk_stream_is_writeable(ssk))) + return; + + sock = READ_ONCE(ssk->sk_socket); + + if (sock) { + clear_bit(MPTCP_SEND_SPACE, &msk->flags); + smp_mb__after_atomic(); + /* set NOSPACE only after clearing SEND_SPACE flag */ + set_bit(SOCK_NOSPACE, &sock->flags); + } +} + static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -219,6 +236,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (copied > 0) ret = copied; + ssk_check_wmem(msk, ssk); release_sock(ssk); release_sock(sk); return ret; @@ -315,6 +333,7 @@ static int mptcp_init_sock(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(sk); INIT_LIST_HEAD(&msk->conn_list); + __set_bit(MPTCP_SEND_SPACE, &msk->flags); return 0; } @@ -576,6 +595,13 @@ static void mptcp_sock_graft(struct sock *sk, struct socket *parent) write_unlock_bh(&sk->sk_callback_lock); } +static bool mptcp_memory_free(const struct sock *sk, int wake) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + return wake ? test_bit(MPTCP_SEND_SPACE, &msk->flags) : true; +} + static struct proto mptcp_prot = { .name = "MPTCP", .owner = THIS_MODULE, @@ -591,6 +617,7 @@ static struct proto mptcp_prot = { .hash = inet_hash, .unhash = inet_unhash, .get_port = mptcp_get_port, + .stream_memory_free = mptcp_memory_free, .obj_size = sizeof(struct mptcp_sock), .no_autobind = true, }; @@ -767,8 +794,34 @@ unlock_fail: static __poll_t mptcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait) { + const struct mptcp_sock *msk; + struct sock *sk = sock->sk; + struct socket *ssock; __poll_t mask = 0; + msk = mptcp_sk(sk); + lock_sock(sk); + ssock = __mptcp_nmpc_socket(msk); + if (ssock) { + mask = ssock->ops->poll(file, ssock, wait); + release_sock(sk); + return mask; + } + + release_sock(sk); + sock_poll_wait(file, sock, wait); + lock_sock(sk); + + if (test_bit(MPTCP_DATA_READY, &msk->flags)) + mask = EPOLLIN | EPOLLRDNORM; + if (sk_stream_is_writeable(sk) && + test_bit(MPTCP_SEND_SPACE, &msk->flags)) + mask |= EPOLLOUT | EPOLLWRNORM; + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; + + release_sock(sk); + return mask; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index c6d8217e24d40..59a83eb64d375 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -56,6 +56,7 @@ /* MPTCP socket flags */ #define MPTCP_DATA_READY BIT(0) +#define MPTCP_SEND_SPACE BIT(1) /* MPTCP connection sock */ struct mptcp_sock { diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 528351e26371f..9fb3eb87a20fa 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -529,6 +529,9 @@ static void subflow_write_space(struct sock *sk) sk_stream_write_space(sk); if (parent && sk_stream_is_writeable(sk)) { + set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags); + smp_mb__after_atomic(); + /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */ sk_stream_write_space(parent); } } -- 2.39.5