]> git.baikalelectronics.ru Git - kernel.git/commitdiff
mptcp: parse and emit MP_CAPABLE option according to v1 spec
authorChristoph Paasch <cpaasch@apple.com>
Wed, 22 Jan 2020 00:56:31 +0000 (16:56 -0800)
committerDavid S. Miller <davem@davemloft.net>
Fri, 24 Jan 2020 12:44:08 +0000 (13:44 +0100)
This implements MP_CAPABLE options parsing and writing according
to RFC 6824 bis / RFC 8684: MPTCP v1.

Local key is sent on syn/ack, and both keys are sent on 3rd ack.
MP_CAPABLE messages len are updated accordingly. We need the skbuff to
correctly emit the above, so we push the skbuff struct as an argument
all the way from tcp code to the relevant mptcp callbacks.

When processing incoming MP_CAPABLE + data, build a full blown DSS-like
map info, to simplify later processing.  On child socket creation, we
need to record the remote key, if available.

Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/tcp.h
include/net/mptcp.h
net/ipv4/tcp_input.c
net/ipv4/tcp_output.c
net/mptcp/options.c
net/mptcp/protocol.h
net/mptcp/subflow.c

index 0d00dad4b85dd9df1502e41d5c0350ae2ccd9974..4e2124607d325c54f572ce62c1e70581da6b0a72 100644 (file)
@@ -94,7 +94,8 @@ struct mptcp_options_received {
                data_fin:1,
                use_ack:1,
                ack64:1,
-               __unused:3;
+               mpc_map:1,
+               __unused:2;
 };
 #endif
 
index 8619c1fca741ecff4272fb48e2798d5b4711827f..27627e2d1bc2e9a1f6cab4858d54fbed1a04e845 100644 (file)
@@ -23,7 +23,8 @@ struct mptcp_ext {
                        data_fin:1,
                        use_ack:1,
                        ack64:1,
-                       __unused:3;
+                       mpc_map:1,
+                       __unused:2;
        /* one byte hole */
 };
 
@@ -50,10 +51,10 @@ static inline bool rsk_is_mptcp(const struct request_sock *req)
        return tcp_rsk(req)->is_mptcp;
 }
 
-void mptcp_parse_option(const unsigned char *ptr, int opsize,
-                       struct tcp_options_received *opt_rx);
-bool mptcp_syn_options(struct sock *sk, unsigned int *size,
-                      struct mptcp_out_options *opts);
+void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
+                       int opsize, struct tcp_options_received *opt_rx);
+bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
+                      unsigned int *size, struct mptcp_out_options *opts);
 void mptcp_rcv_synsent(struct sock *sk);
 bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
                          struct mptcp_out_options *opts);
@@ -121,12 +122,14 @@ static inline bool rsk_is_mptcp(const struct request_sock *req)
        return false;
 }
 
-static inline void mptcp_parse_option(const unsigned char *ptr, int opsize,
+static inline void mptcp_parse_option(const struct sk_buff *skb,
+                                     const unsigned char *ptr, int opsize,
                                      struct tcp_options_received *opt_rx)
 {
 }
 
-static inline bool mptcp_syn_options(struct sock *sk, unsigned int *size,
+static inline bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
+                                    unsigned int *size,
                                     struct mptcp_out_options *opts)
 {
        return false;
index 28d31f2c14223cd299d48f3920cbc6b893cff450..2f475b897c11632f81486f5a13944aad2dae98be 100644 (file)
@@ -3926,7 +3926,7 @@ void tcp_parse_options(const struct net *net,
                                break;
 #endif
                        case TCPOPT_MPTCP:
-                               mptcp_parse_option(ptr, opsize, opt_rx);
+                               mptcp_parse_option(skb, ptr, opsize, opt_rx);
                                break;
 
                        case TCPOPT_FASTOPEN:
index 5456076166dac3bfbc583e2ceeb2cfbdc2afa646..fec4b3a4b22d7f7ec571f80608b4eea11381426c 100644 (file)
@@ -685,7 +685,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
        if (sk_is_mptcp(sk)) {
                unsigned int size;
 
-               if (mptcp_syn_options(sk, &size, &opts->mptcp)) {
+               if (mptcp_syn_options(sk, skb, &size, &opts->mptcp)) {
                        opts->options |= OPTION_MPTCP;
                        remaining -= size;
                }
index 1aec742ca8e1e4f4644eca5bf142bd43af1717c0..8f82ff9a5a8e4fbbbb325146d4678a9ed91398d3 100644 (file)
@@ -14,8 +14,8 @@ static bool mptcp_cap_flag_sha256(u8 flags)
        return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256;
 }
 
-void mptcp_parse_option(const unsigned char *ptr, int opsize,
-                       struct tcp_options_received *opt_rx)
+void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
+                       int opsize, struct tcp_options_received *opt_rx)
 {
        struct mptcp_options_received *mp_opt = &opt_rx->mptcp;
        u8 subtype = *ptr >> 4;
@@ -25,13 +25,29 @@ void mptcp_parse_option(const unsigned char *ptr, int opsize,
 
        switch (subtype) {
        case MPTCPOPT_MP_CAPABLE:
-               if (opsize != TCPOLEN_MPTCP_MPC_SYN &&
-                   opsize != TCPOLEN_MPTCP_MPC_ACK)
+               /* strict size checking */
+               if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
+                       if (skb->len > tcp_hdr(skb)->doff << 2)
+                               expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA;
+                       else
+                               expected_opsize = TCPOLEN_MPTCP_MPC_ACK;
+               } else {
+                       if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)
+                               expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK;
+                       else
+                               expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
+               }
+               if (opsize != expected_opsize)
                        break;
 
+               /* try to be gentle vs future versions on the initial syn */
                version = *ptr++ & MPTCP_VERSION_MASK;
-               if (version != MPTCP_SUPPORTED_VERSION)
+               if (opsize != TCPOLEN_MPTCP_MPC_SYN) {
+                       if (version != MPTCP_SUPPORTED_VERSION)
+                               break;
+               } else if (version < MPTCP_SUPPORTED_VERSION) {
                        break;
+               }
 
                flags = *ptr++;
                if (!mptcp_cap_flag_sha256(flags) ||
@@ -55,23 +71,40 @@ void mptcp_parse_option(const unsigned char *ptr, int opsize,
                        break;
 
                mp_opt->mp_capable = 1;
-               mp_opt->sndr_key = get_unaligned_be64(ptr);
-               ptr += 8;
-
-               if (opsize == TCPOLEN_MPTCP_MPC_ACK) {
+               if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
+                       mp_opt->sndr_key = get_unaligned_be64(ptr);
+                       ptr += 8;
+               }
+               if (opsize >= TCPOLEN_MPTCP_MPC_ACK) {
                        mp_opt->rcvr_key = get_unaligned_be64(ptr);
                        ptr += 8;
-                       pr_debug("MP_CAPABLE sndr=%llu, rcvr=%llu",
-                                mp_opt->sndr_key, mp_opt->rcvr_key);
-               } else {
-                       pr_debug("MP_CAPABLE sndr=%llu", mp_opt->sndr_key);
                }
+               if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
+                       /* Section 3.1.:
+                        * "the data parameters in a MP_CAPABLE are semantically
+                        * equivalent to those in a DSS option and can be used
+                        * interchangeably."
+                        */
+                       mp_opt->dss = 1;
+                       mp_opt->use_map = 1;
+                       mp_opt->mpc_map = 1;
+                       mp_opt->data_len = get_unaligned_be16(ptr);
+                       ptr += 2;
+               }
+               pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
+                        version, flags, opsize, mp_opt->sndr_key,
+                        mp_opt->rcvr_key, mp_opt->data_len);
                break;
 
        case MPTCPOPT_DSS:
                pr_debug("DSS");
                ptr++;
 
+               /* we must clear 'mpc_map' be able to detect MP_CAPABLE
+                * map vs DSS map in mptcp_incoming_options(), and reconstruct
+                * map info accordingly
+                */
+               mp_opt->mpc_map = 0;
                flags = (*ptr++) & MPTCP_DSS_FLAG_MASK;
                mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0;
                mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0;
@@ -176,18 +209,22 @@ void mptcp_get_options(const struct sk_buff *skb,
                        if (opsize > length)
                                return; /* don't parse partial options */
                        if (opcode == TCPOPT_MPTCP)
-                               mptcp_parse_option(ptr, opsize, opt_rx);
+                               mptcp_parse_option(skb, ptr, opsize, opt_rx);
                        ptr += opsize - 2;
                        length -= opsize;
                }
        }
 }
 
-bool mptcp_syn_options(struct sock *sk, unsigned int *size,
-                      struct mptcp_out_options *opts)
+bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
+                      unsigned int *size, struct mptcp_out_options *opts)
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 
+       /* we will use snd_isn to detect first pkt [re]transmission
+        * in mptcp_established_options_mp()
+        */
+       subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
        if (subflow->request_mptcp) {
                pr_debug("local_key=%llu", subflow->local_key);
                opts->suboptions = OPTION_MPTCP_MPC_SYN;
@@ -212,20 +249,52 @@ void mptcp_rcv_synsent(struct sock *sk)
        }
 }
 
-static bool mptcp_established_options_mp(struct sock *sk, unsigned int *size,
+static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
+                                        unsigned int *size,
                                         unsigned int remaining,
                                         struct mptcp_out_options *opts)
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+       struct mptcp_ext *mpext;
+       unsigned int data_len;
+
+       pr_debug("subflow=%p fourth_ack=%d seq=%x:%x remaining=%d", subflow,
+                subflow->fourth_ack, subflow->snd_isn,
+                skb ? TCP_SKB_CB(skb)->seq : 0, remaining);
+
+       if (subflow->mp_capable && !subflow->fourth_ack && skb &&
+           subflow->snd_isn == TCP_SKB_CB(skb)->seq) {
+               /* When skb is not available, we better over-estimate the
+                * emitted options len. A full DSS option is longer than
+                * TCPOLEN_MPTCP_MPC_ACK_DATA, so let's the caller try to fit
+                * that.
+                */
+               mpext = mptcp_get_ext(skb);
+               data_len = mpext ? mpext->data_len : 0;
 
-       if (!subflow->fourth_ack) {
+               /* we will check ext_copy.data_len in mptcp_write_options() to
+                * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and
+                * TCPOLEN_MPTCP_MPC_ACK
+                */
+               opts->ext_copy.data_len = data_len;
                opts->suboptions = OPTION_MPTCP_MPC_ACK;
                opts->sndr_key = subflow->local_key;
                opts->rcvr_key = subflow->remote_key;
-               *size = TCPOLEN_MPTCP_MPC_ACK;
-               subflow->fourth_ack = 1;
-               pr_debug("subflow=%p, local_key=%llu, remote_key=%llu",
-                        subflow, subflow->local_key, subflow->remote_key);
+
+               /* Section 3.1.
+                * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
+                * packets that start the first subflow of an MPTCP connection,
+                * as well as the first packet that carries data
+                */
+               if (data_len > 0)
+                       *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
+               else
+                       *size = TCPOLEN_MPTCP_MPC_ACK;
+
+               pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
+                        subflow, subflow->local_key, subflow->remote_key,
+                        data_len);
+
                return true;
        }
        return false;
@@ -319,7 +388,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
        unsigned int opt_size = 0;
        bool ret = false;
 
-       if (mptcp_established_options_mp(sk, &opt_size, remaining, opts))
+       if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
                ret = true;
        else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
                                               opts))
@@ -371,11 +440,26 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
        memset(mpext, 0, sizeof(*mpext));
 
        if (mp_opt->use_map) {
-               mpext->data_seq = mp_opt->data_seq;
-               mpext->subflow_seq = mp_opt->subflow_seq;
+               if (mp_opt->mpc_map) {
+                       struct mptcp_subflow_context *subflow =
+                               mptcp_subflow_ctx(sk);
+
+                       /* this is an MP_CAPABLE carrying MPTCP data
+                        * we know this map the first chunk of data
+                        */
+                       mptcp_crypto_key_sha(subflow->remote_key, NULL,
+                                            &mpext->data_seq);
+                       mpext->data_seq++;
+                       mpext->subflow_seq = 1;
+                       mpext->dsn64 = 1;
+                       mpext->mpc_map = 1;
+               } else {
+                       mpext->data_seq = mp_opt->data_seq;
+                       mpext->subflow_seq = mp_opt->subflow_seq;
+                       mpext->dsn64 = mp_opt->dsn64;
+               }
                mpext->data_len = mp_opt->data_len;
                mpext->use_map = 1;
-               mpext->dsn64 = mp_opt->dsn64;
        }
 
        if (mp_opt->use_ack) {
@@ -389,8 +473,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
 
 void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
 {
-       if ((OPTION_MPTCP_MPC_SYN |
-            OPTION_MPTCP_MPC_SYNACK |
+       if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
             OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
                u8 len;
 
@@ -398,6 +481,8 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
                        len = TCPOLEN_MPTCP_MPC_SYN;
                else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
                        len = TCPOLEN_MPTCP_MPC_SYNACK;
+               else if (opts->ext_copy.data_len)
+                       len = TCPOLEN_MPTCP_MPC_ACK_DATA;
                else
                        len = TCPOLEN_MPTCP_MPC_ACK;
 
@@ -405,14 +490,27 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
                               (MPTCPOPT_MP_CAPABLE << 12) |
                               (MPTCP_SUPPORTED_VERSION << 8) |
                               MPTCP_CAP_HMAC_SHA256);
+
+               if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
+                   opts->suboptions))
+                       goto mp_capable_done;
+
                put_unaligned_be64(opts->sndr_key, ptr);
                ptr += 2;
-               if (OPTION_MPTCP_MPC_ACK & opts->suboptions) {
-                       put_unaligned_be64(opts->rcvr_key, ptr);
-                       ptr += 2;
-               }
+               if (!((OPTION_MPTCP_MPC_ACK) & opts->suboptions))
+                       goto mp_capable_done;
+
+               put_unaligned_be64(opts->rcvr_key, ptr);
+               ptr += 2;
+               if (!opts->ext_copy.data_len)
+                       goto mp_capable_done;
+
+               put_unaligned_be32(opts->ext_copy.data_len << 16 |
+                                  TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+               ptr += 1;
        }
 
+mp_capable_done:
        if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
                struct mptcp_ext *mpext = &opts->ext_copy;
                u8 len = TCPOLEN_MPTCP_DSS_BASE;
index a355bb1cf31bdde567fc08ae910d5765aeba6cd0..36b90024d34d82459857d37f482f5e8d243580a6 100644 (file)
@@ -11,7 +11,7 @@
 #include <net/tcp.h>
 #include <net/inet_connection_sock.h>
 
-#define MPTCP_SUPPORTED_VERSION        0
+#define MPTCP_SUPPORTED_VERSION        1
 
 /* MPTCP option bits */
 #define OPTION_MPTCP_MPC_SYN   BIT(0)
 #define MPTCPOPT_MP_FASTCLOSE  7
 
 /* MPTCP suboption lengths */
-#define TCPOLEN_MPTCP_MPC_SYN          12
+#define TCPOLEN_MPTCP_MPC_SYN          4
 #define TCPOLEN_MPTCP_MPC_SYNACK       12
 #define TCPOLEN_MPTCP_MPC_ACK          20
+#define TCPOLEN_MPTCP_MPC_ACK_DATA     22
 #define TCPOLEN_MPTCP_DSS_BASE         4
 #define TCPOLEN_MPTCP_DSS_ACK32                4
 #define TCPOLEN_MPTCP_DSS_ACK64                8
@@ -106,6 +107,7 @@ struct mptcp_subflow_context {
        u64     remote_key;
        u64     idsn;
        u64     map_seq;
+       u32     snd_isn;
        u32     token;
        u32     rel_write_seq;
        u32     map_subflow_seq;
index 9fb3eb87a20faec46181e93ae3176d6a1a58eb34..8892855f4f52edb302c7b98c06400ca274427faa 100644 (file)
@@ -77,7 +77,6 @@ static void subflow_init_req(struct request_sock *req,
                if (err == 0)
                        subflow_req->mp_capable = 1;
 
-               subflow_req->remote_key = rx_opt.mptcp.sndr_key;
                subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
        }
 }
@@ -180,11 +179,22 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
                                          bool *own_req)
 {
        struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
+       struct mptcp_subflow_request_sock *subflow_req;
+       struct tcp_options_received opt_rx;
        struct sock *child;
 
        pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
 
-       /* if the sk is MP_CAPABLE, we already received the client key */
+       /* if the sk is MP_CAPABLE, we need to fetch the client key */
+       subflow_req = mptcp_subflow_rsk(req);
+       if (subflow_req->mp_capable) {
+               opt_rx.mptcp.mp_capable = 0;
+               mptcp_get_options(skb, &opt_rx);
+               if (!opt_rx.mptcp.mp_capable)
+                       subflow_req->mp_capable = 0;
+               else
+                       subflow_req->remote_key = opt_rx.mptcp.sndr_key;
+       }
 
        child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
                                                     req_unhash, own_req);