]> git.baikalelectronics.ru Git - kernel.git/commitdiff
tcp: Only init congestion control if not initialized already
authorNeal Cardwell <ncardwell@google.com>
Thu, 10 Sep 2020 19:35:32 +0000 (15:35 -0400)
committerAlexei Starovoitov <ast@kernel.org>
Fri, 11 Sep 2020 03:53:01 +0000 (20:53 -0700)
Change tcp_init_transfer() to only initialize congestion control if it
has not been initialized already.

With this new approach, we can arrange things so that if the EBPF code
sets the congestion control by calling setsockopt(TCP_CONGESTION) then
tcp_init_transfer() will not re-initialize the CC module.

This is an approach that has the following beneficial properties:

(1) This allows CC module customizations made by the EBPF called in
    tcp_init_transfer() to persist, and not be wiped out by a later
    call to tcp_init_congestion_control() in tcp_init_transfer().

(2) Does not flip the order of EBPF and CC init, to avoid causing bugs
    for existing code upstream that depends on the current order.

(3) Does not cause 2 initializations for for CC in the case where the
    EBPF called in tcp_init_transfer() wants to set the CC to a new CC
    algorithm.

(4) Allows follow-on simplifications to the code in net/core/filter.c
    and net/ipv4/tcp_cong.c, which currently both have some complexity
    to special-case CC initialization to avoid double CC
    initialization if EBPF sets the CC.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Kevin Yang <yyd@google.com>
Cc: Lawrence Brakmo <brakmo@fb.com>
include/net/inet_connection_sock.h
net/ipv4/tcp.c
net/ipv4/tcp_cong.c
net/ipv4/tcp_input.c

index c738abeb3265c5ff52594546c55df35e6caf8096..dc763ca9413cc9c6279a59f9d1776cf2dbb1e853 100644 (file)
@@ -96,7 +96,8 @@ struct inet_connection_sock {
        void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
        struct hlist_node         icsk_listen_portaddr_node;
        unsigned int              (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
-       __u8                      icsk_ca_state:6,
+       __u8                      icsk_ca_state:5,
+                                 icsk_ca_initialized:1,
                                  icsk_ca_setsockopt:1,
                                  icsk_ca_dst_locked:1;
        __u8                      icsk_retransmits;
index 57a5688755391de9e7651af0a8af8b212a34ebaf..7360d3db2b6161280232863fb55535c09e4f90ed 100644 (file)
@@ -2698,6 +2698,7 @@ int tcp_disconnect(struct sock *sk, int flags)
        if (icsk->icsk_ca_ops->release)
                icsk->icsk_ca_ops->release(sk);
        memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+       icsk->icsk_ca_initialized = 0;
        tcp_set_ca_state(sk, TCP_CA_Open);
        tp->is_sack_reneg = 0;
        tcp_clear_retrans(tp);
index 62878cf26d9cc5c0ae44d5ecdadd0b7a5acf5365..d18d7a1ce4ce72cc56cf614c88d7353347a922ea 100644 (file)
@@ -176,7 +176,7 @@ void tcp_assign_congestion_control(struct sock *sk)
 
 void tcp_init_congestion_control(struct sock *sk)
 {
-       const struct inet_connection_sock *icsk = inet_csk(sk);
+       struct inet_connection_sock *icsk = inet_csk(sk);
 
        tcp_sk(sk)->prior_ssthresh = 0;
        if (icsk->icsk_ca_ops->init)
@@ -185,6 +185,7 @@ void tcp_init_congestion_control(struct sock *sk)
                INET_ECN_xmit(sk);
        else
                INET_ECN_dontxmit(sk);
+       icsk->icsk_ca_initialized = 1;
 }
 
 static void tcp_reinit_congestion_control(struct sock *sk,
index 4337841faeff9723ecb755026069dfe4b63a384f..0e5ac0d33fd3f674a0436b8b2a0c7325fa6fa89d 100644 (file)
@@ -5894,8 +5894,10 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
                tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
        tp->snd_cwnd_stamp = tcp_jiffies32;
 
+       icsk->icsk_ca_initialized = 0;
        bpf_skops_established(sk, bpf_op, skb);
-       tcp_init_congestion_control(sk);
+       if (!icsk->icsk_ca_initialized)
+               tcp_init_congestion_control(sk);
        tcp_init_buffer_space(sk);
 }