]> git.baikalelectronics.ru Git - kernel.git/commitdiff
tcp: Set ECT0 bit in tos/tclass for synack when BPF needs ECN
authorAlexander Duyck <alexanderduyck@fb.com>
Sat, 21 Nov 2020 03:47:44 +0000 (19:47 -0800)
committerJakub Kicinski <kuba@kernel.org>
Tue, 24 Nov 2020 22:12:55 +0000 (14:12 -0800)
When a BPF program is used to select between a type of TCP congestion
control algorithm that uses either ECN or not there is a case where the
synack for the frame was coming up without the ECT0 bit set. A bit of
research found that this was due to the final socket being configured to
dctcp while the listener socket was staying in cubic.

To reproduce it all that is needed is to monitor TCP traffic while running
the sample bpf program "samples/bpf/tcp_cong_kern.c". What is observed,
assuming tcp_dctcp module is loaded or compiled in and the traffic matches
the rules in the sample file, is that for all frames with the exception of
the synack the ECT0 bit is set.

To address that it is necessary to make one additional call to
tcp_bpf_ca_needs_ecn using the request socket and then use the output of
that to set the ECT0 bit for the tos/tclass of the packet.

Fixes: a4161d089e0a ("bpf: Add support for changing congestion control")
Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Link: https://lore.kernel.org/r/160593039663.2604.1374502006916871573.stgit@localhost.localdomain
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/ipv4/tcp_ipv4.c
net/ipv6/tcp_ipv6.c

index c95fcdfeed42909d7ba88cd2dc5570e95d0e4651..8391aa29e7a41ecb06c3feed13e038b827a7ab11 100644 (file)
@@ -980,13 +980,17 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 
        skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
 
-       tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
-                       tcp_rsk(req)->syn_tos & ~INET_ECN_MASK :
-                       inet_sk(sk)->tos;
-
        if (skb) {
                __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
 
+               tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
+                               tcp_rsk(req)->syn_tos & ~INET_ECN_MASK :
+                               inet_sk(sk)->tos;
+
+               if (!INET_ECN_is_capable(tos) &&
+                   tcp_bpf_ca_needs_ecn((struct sock *)req))
+                       tos |= INET_ECN_ECT_0;
+
                rcu_read_lock();
                err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
                                            ireq->ir_rmt_addr,
index d2502911b7fac5c6864aedebb37616b7873e1753..992cbf3eb9e38b3c5318d2aa5b7dc49e08db7439 100644 (file)
@@ -527,11 +527,16 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
                if (np->repflow && ireq->pktopts)
                        fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
 
-               rcu_read_lock();
-               opt = ireq->ipv6_opt;
                tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
                                tcp_rsk(req)->syn_tos & ~INET_ECN_MASK :
                                np->tclass;
+
+               if (!INET_ECN_is_capable(tclass) &&
+                   tcp_bpf_ca_needs_ecn((struct sock *)req))
+                       tclass |= INET_ECN_ECT_0;
+
+               rcu_read_lock();
+               opt = ireq->ipv6_opt;
                if (!opt)
                        opt = rcu_dereference(np->opt);
                err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt,