linux/include/net/inet_ecn.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _INET_ECN_H_
   3#define _INET_ECN_H_
   4
   5#include <linux/ip.h>
   6#include <linux/skbuff.h>
   7#include <linux/if_vlan.h>
   8
   9#include <net/inet_sock.h>
  10#include <net/dsfield.h>
  11#include <net/checksum.h>
  12
  13enum {
  14        INET_ECN_NOT_ECT = 0,
  15        INET_ECN_ECT_1 = 1,
  16        INET_ECN_ECT_0 = 2,
  17        INET_ECN_CE = 3,
  18        INET_ECN_MASK = 3,
  19};
  20
  21extern int sysctl_tunnel_ecn_log;
  22
  23static inline int INET_ECN_is_ce(__u8 dsfield)
  24{
  25        return (dsfield & INET_ECN_MASK) == INET_ECN_CE;
  26}
  27
  28static inline int INET_ECN_is_not_ect(__u8 dsfield)
  29{
  30        return (dsfield & INET_ECN_MASK) == INET_ECN_NOT_ECT;
  31}
  32
  33static inline int INET_ECN_is_capable(__u8 dsfield)
  34{
  35        return dsfield & INET_ECN_ECT_0;
  36}
  37
  38/*
  39 * RFC 3168 9.1.1
  40 *  The full-functionality option for ECN encapsulation is to copy the
  41 *  ECN codepoint of the inside header to the outside header on
  42 *  encapsulation if the inside header is not-ECT or ECT, and to set the
  43 *  ECN codepoint of the outside header to ECT(0) if the ECN codepoint of
  44 *  the inside header is CE.
  45 */
  46static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
  47{
  48        outer &= ~INET_ECN_MASK;
  49        outer |= !INET_ECN_is_ce(inner) ? (inner & INET_ECN_MASK) :
  50                                          INET_ECN_ECT_0;
  51        return outer;
  52}
  53
  54static inline void INET_ECN_xmit(struct sock *sk)
  55{
  56        inet_sk(sk)->tos |= INET_ECN_ECT_0;
  57        if (inet6_sk(sk) != NULL)
  58                inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
  59}
  60
  61static inline void INET_ECN_dontxmit(struct sock *sk)
  62{
  63        inet_sk(sk)->tos &= ~INET_ECN_MASK;
  64        if (inet6_sk(sk) != NULL)
  65                inet6_sk(sk)->tclass &= ~INET_ECN_MASK;
  66}
  67
  68#define IP6_ECN_flow_init(label) do {           \
  69      (label) &= ~htonl(INET_ECN_MASK << 20);   \
  70    } while (0)
  71
  72#define IP6_ECN_flow_xmit(sk, label) do {                               \
  73        if (INET_ECN_is_capable(inet6_sk(sk)->tclass))                  \
  74                (label) |= htonl(INET_ECN_ECT_0 << 20);                 \
  75    } while (0)
  76
  77static inline int IP_ECN_set_ce(struct iphdr *iph)
  78{
  79        u32 ecn = (iph->tos + 1) & INET_ECN_MASK;
  80        __be16 check_add;
  81
  82        /*
  83         * After the last operation we have (in binary):
  84         * INET_ECN_NOT_ECT => 01
  85         * INET_ECN_ECT_1   => 10
  86         * INET_ECN_ECT_0   => 11
  87         * INET_ECN_CE      => 00
  88         */
  89        if (!(ecn & 2))
  90                return !ecn;
  91
  92        /*
  93         * The following gives us:
  94         * INET_ECN_ECT_1 => check += htons(0xFFFD)
  95         * INET_ECN_ECT_0 => check += htons(0xFFFE)
  96         */
  97        check_add = (__force __be16)((__force u16)htons(0xFFFB) +
  98                                     (__force u16)htons(ecn));
  99
 100        iph->check = csum16_add(iph->check, check_add);
 101        iph->tos |= INET_ECN_CE;
 102        return 1;
 103}
 104
 105static inline int IP_ECN_set_ect1(struct iphdr *iph)
 106{
 107        if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0)
 108                return 0;
 109
 110        iph->check = csum16_add(iph->check, htons(0x1));
 111        iph->tos ^= INET_ECN_MASK;
 112        return 1;
 113}
 114
 115static inline void IP_ECN_clear(struct iphdr *iph)
 116{
 117        iph->tos &= ~INET_ECN_MASK;
 118}
 119
 120static inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner)
 121{
 122        dscp &= ~INET_ECN_MASK;
 123        ipv4_change_dsfield(inner, INET_ECN_MASK, dscp);
 124}
 125
 126struct ipv6hdr;
 127
 128/* Note:
 129 * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE,
 130 * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE
 131 * In IPv6 case, no checksum compensates the change in IPv6 header,
 132 * so we have to update skb->csum.
 133 */
 134static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
 135{
 136        __be32 from, to;
 137
 138        if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph)))
 139                return 0;
 140
 141        from = *(__be32 *)iph;
 142        to = from | htonl(INET_ECN_CE << 20);
 143        *(__be32 *)iph = to;
 144        if (skb->ip_summed == CHECKSUM_COMPLETE)
 145                skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
 146                                     (__force __wsum)to);
 147        return 1;
 148}
 149
 150static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph)
 151{
 152        __be32 from, to;
 153
 154        if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0)
 155                return 0;
 156
 157        from = *(__be32 *)iph;
 158        to = from ^ htonl(INET_ECN_MASK << 20);
 159        *(__be32 *)iph = to;
 160        if (skb->ip_summed == CHECKSUM_COMPLETE)
 161                skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
 162                                     (__force __wsum)to);
 163        return 1;
 164}
 165
 166static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
 167{
 168        dscp &= ~INET_ECN_MASK;
 169        ipv6_change_dsfield(inner, INET_ECN_MASK, dscp);
 170}
 171
 172static inline int INET_ECN_set_ce(struct sk_buff *skb)
 173{
 174        switch (skb_protocol(skb, true)) {
 175        case cpu_to_be16(ETH_P_IP):
 176                if (skb_network_header(skb) + sizeof(struct iphdr) <=
 177                    skb_tail_pointer(skb))
 178                        return IP_ECN_set_ce(ip_hdr(skb));
 179                break;
 180
 181        case cpu_to_be16(ETH_P_IPV6):
 182                if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
 183                    skb_tail_pointer(skb))
 184                        return IP6_ECN_set_ce(skb, ipv6_hdr(skb));
 185                break;
 186        }
 187
 188        return 0;
 189}
 190
 191static inline int INET_ECN_set_ect1(struct sk_buff *skb)
 192{
 193        switch (skb_protocol(skb, true)) {
 194        case cpu_to_be16(ETH_P_IP):
 195                if (skb_network_header(skb) + sizeof(struct iphdr) <=
 196                    skb_tail_pointer(skb))
 197                        return IP_ECN_set_ect1(ip_hdr(skb));
 198                break;
 199
 200        case cpu_to_be16(ETH_P_IPV6):
 201                if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
 202                    skb_tail_pointer(skb))
 203                        return IP6_ECN_set_ect1(skb, ipv6_hdr(skb));
 204                break;
 205        }
 206
 207        return 0;
 208}
 209
 210/*
 211 * RFC 6040 4.2
 212 *  To decapsulate the inner header at the tunnel egress, a compliant
 213 *  tunnel egress MUST set the outgoing ECN field to the codepoint at the
 214 *  intersection of the appropriate arriving inner header (row) and outer
 215 *  header (column) in Figure 4
 216 *
 217 *      +---------+------------------------------------------------+
 218 *      |Arriving |            Arriving Outer Header               |
 219 *      |   Inner +---------+------------+------------+------------+
 220 *      |  Header | Not-ECT | ECT(0)     | ECT(1)     |     CE     |
 221 *      +---------+---------+------------+------------+------------+
 222 *      | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)|
 223 *      |  ECT(0) |  ECT(0) | ECT(0)     | ECT(1)     |     CE     |
 224 *      |  ECT(1) |  ECT(1) | ECT(1) (!) | ECT(1)     |     CE     |
 225 *      |    CE   |      CE |     CE     |     CE(!!!)|     CE     |
 226 *      +---------+---------+------------+------------+------------+
 227 *
 228 *             Figure 4: New IP in IP Decapsulation Behaviour
 229 *
 230 *  returns 0 on success
 231 *          1 if something is broken and should be logged (!!! above)
 232 *          2 if packet should be dropped
 233 */
 234static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce)
 235{
 236        if (INET_ECN_is_not_ect(inner)) {
 237                switch (outer & INET_ECN_MASK) {
 238                case INET_ECN_NOT_ECT:
 239                        return 0;
 240                case INET_ECN_ECT_0:
 241                case INET_ECN_ECT_1:
 242                        return 1;
 243                case INET_ECN_CE:
 244                        return 2;
 245                }
 246        }
 247
 248        *set_ce = INET_ECN_is_ce(outer);
 249        return 0;
 250}
 251
 252static inline int INET_ECN_decapsulate(struct sk_buff *skb,
 253                                       __u8 outer, __u8 inner)
 254{
 255        bool set_ce = false;
 256        int rc;
 257
 258        rc = __INET_ECN_decapsulate(outer, inner, &set_ce);
 259        if (!rc) {
 260                if (set_ce)
 261                        INET_ECN_set_ce(skb);
 262                else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1)
 263                        INET_ECN_set_ect1(skb);
 264        }
 265
 266        return rc;
 267}
 268
 269static inline int IP_ECN_decapsulate(const struct iphdr *oiph,
 270                                     struct sk_buff *skb)
 271{
 272        __u8 inner;
 273
 274        switch (skb_protocol(skb, true)) {
 275        case htons(ETH_P_IP):
 276                inner = ip_hdr(skb)->tos;
 277                break;
 278        case htons(ETH_P_IPV6):
 279                inner = ipv6_get_dsfield(ipv6_hdr(skb));
 280                break;
 281        default:
 282                return 0;
 283        }
 284
 285        return INET_ECN_decapsulate(skb, oiph->tos, inner);
 286}
 287
 288static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h,
 289                                      struct sk_buff *skb)
 290{
 291        __u8 inner;
 292
 293        switch (skb_protocol(skb, true)) {
 294        case htons(ETH_P_IP):
 295                inner = ip_hdr(skb)->tos;
 296                break;
 297        case htons(ETH_P_IPV6):
 298                inner = ipv6_get_dsfield(ipv6_hdr(skb));
 299                break;
 300        default:
 301                return 0;
 302        }
 303
 304        return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner);
 305}
 306#endif
 307