]> git.baikalelectronics.ru Git - kernel.git/commitdiff
ipv6: gro: flush instead of assuming different flows on hop_limit mismatch
authorJakub Kicinski <kuba@kernel.org>
Tue, 25 Jan 2022 04:44:44 +0000 (20:44 -0800)
committerDavid S. Miller <davem@davemloft.net>
Tue, 25 Jan 2022 13:05:11 +0000 (13:05 +0000)
IPv6 GRO considers packets to belong to different flows when their
hop_limit is different. This seems counter-intuitive, the flow is
the same. hop_limit may vary because of various bugs or hacks but
that doesn't mean it's okay for GRO to reorder packets.

Practical impact of this problem on overall TCP performance
is unclear, but TCP itself detects this reordering and bumps
TCPSACKReorder resulting in user complaints.

Eric warns that there may be performance regressions in setups
which do packet spraying across links with similar RTT but different
hop count. To be safe let's target -next and not treat this
as a fix. If the packet spraying is using flow label there should
be no difference in behavior as flow label is checked first.

Note that the code plays an easy to miss trick by upcasting next_hdr
to a u16 pointer and compares next_hdr and hop_limit in one go.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/ipv6/ip6_offload.c

index b29e9ba5e1136a8f34c1dbf636052c5d5b5cc5fb..d37a79a8554e92a1dcaa6fd023cafe2114841ece 100644 (file)
@@ -249,7 +249,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
                 if ((first_word & htonl(0xF00FFFFF)) ||
                     !ipv6_addr_equal(&iph->saddr, &iph2->saddr) ||
                     !ipv6_addr_equal(&iph->daddr, &iph2->daddr) ||
-                    *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) {
+                    iph->nexthdr != iph2->nexthdr) {
 not_same_flow:
                        NAPI_GRO_CB(p)->same_flow = 0;
                        continue;
@@ -260,7 +260,8 @@ not_same_flow:
                                goto not_same_flow;
                }
                /* flush if Traffic Class fields are different */
-               NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
+               NAPI_GRO_CB(p)->flush |= !!((first_word & htonl(0x0FF00000)) |
+                       (__force __be32)(iph->hop_limit ^ iph2->hop_limit));
                NAPI_GRO_CB(p)->flush |= flush;
 
                /* If the previous IP ID value was based on an atomic