dpdk/lib/gro/gro_vxlan_udp4.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2020 Inspur Corporation
   3 */
   4
   5#include <rte_malloc.h>
   6#include <rte_mbuf.h>
   7#include <rte_cycles.h>
   8#include <rte_ethdev.h>
   9#include <rte_udp.h>
  10
  11#include "gro_vxlan_udp4.h"
  12
  13void *
  14gro_vxlan_udp4_tbl_create(uint16_t socket_id,
  15                uint16_t max_flow_num,
  16                uint16_t max_item_per_flow)
  17{
  18        struct gro_vxlan_udp4_tbl *tbl;
  19        size_t size;
  20        uint32_t entries_num, i;
  21
  22        entries_num = max_flow_num * max_item_per_flow;
  23        entries_num = RTE_MIN(entries_num, GRO_VXLAN_UDP4_TBL_MAX_ITEM_NUM);
  24
  25        if (entries_num == 0)
  26                return NULL;
  27
  28        tbl = rte_zmalloc_socket(__func__,
  29                        sizeof(struct gro_vxlan_udp4_tbl),
  30                        RTE_CACHE_LINE_SIZE,
  31                        socket_id);
  32        if (tbl == NULL)
  33                return NULL;
  34
  35        size = sizeof(struct gro_vxlan_udp4_item) * entries_num;
  36        tbl->items = rte_zmalloc_socket(__func__,
  37                        size,
  38                        RTE_CACHE_LINE_SIZE,
  39                        socket_id);
  40        if (tbl->items == NULL) {
  41                rte_free(tbl);
  42                return NULL;
  43        }
  44        tbl->max_item_num = entries_num;
  45
  46        size = sizeof(struct gro_vxlan_udp4_flow) * entries_num;
  47        tbl->flows = rte_zmalloc_socket(__func__,
  48                        size,
  49                        RTE_CACHE_LINE_SIZE,
  50                        socket_id);
  51        if (tbl->flows == NULL) {
  52                rte_free(tbl->items);
  53                rte_free(tbl);
  54                return NULL;
  55        }
  56
  57        for (i = 0; i < entries_num; i++)
  58                tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
  59        tbl->max_flow_num = entries_num;
  60
  61        return tbl;
  62}
  63
  64void
  65gro_vxlan_udp4_tbl_destroy(void *tbl)
  66{
  67        struct gro_vxlan_udp4_tbl *vxlan_tbl = tbl;
  68
  69        if (vxlan_tbl) {
  70                rte_free(vxlan_tbl->items);
  71                rte_free(vxlan_tbl->flows);
  72        }
  73        rte_free(vxlan_tbl);
  74}
  75
  76static inline uint32_t
  77find_an_empty_item(struct gro_vxlan_udp4_tbl *tbl)
  78{
  79        uint32_t max_item_num = tbl->max_item_num, i;
  80
  81        for (i = 0; i < max_item_num; i++)
  82                if (tbl->items[i].inner_item.firstseg == NULL)
  83                        return i;
  84        return INVALID_ARRAY_INDEX;
  85}
  86
  87static inline uint32_t
  88find_an_empty_flow(struct gro_vxlan_udp4_tbl *tbl)
  89{
  90        uint32_t max_flow_num = tbl->max_flow_num, i;
  91
  92        for (i = 0; i < max_flow_num; i++)
  93                if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
  94                        return i;
  95        return INVALID_ARRAY_INDEX;
  96}
  97
  98static inline uint32_t
  99insert_new_item(struct gro_vxlan_udp4_tbl *tbl,
 100                struct rte_mbuf *pkt,
 101                uint64_t start_time,
 102                uint32_t prev_idx,
 103                uint16_t frag_offset,
 104                uint8_t is_last_frag)
 105{
 106        uint32_t item_idx;
 107
 108        item_idx = find_an_empty_item(tbl);
 109        if (unlikely(item_idx == INVALID_ARRAY_INDEX))
 110                return INVALID_ARRAY_INDEX;
 111
 112        tbl->items[item_idx].inner_item.firstseg = pkt;
 113        tbl->items[item_idx].inner_item.lastseg = rte_pktmbuf_lastseg(pkt);
 114        tbl->items[item_idx].inner_item.start_time = start_time;
 115        tbl->items[item_idx].inner_item.next_pkt_idx = INVALID_ARRAY_INDEX;
 116        tbl->items[item_idx].inner_item.frag_offset = frag_offset;
 117        tbl->items[item_idx].inner_item.is_last_frag = is_last_frag;
 118        tbl->items[item_idx].inner_item.nb_merged = 1;
 119        tbl->item_num++;
 120
 121        /* If the previous packet exists, chain the new one with it. */
 122        if (prev_idx != INVALID_ARRAY_INDEX) {
 123                tbl->items[item_idx].inner_item.next_pkt_idx =
 124                        tbl->items[prev_idx].inner_item.next_pkt_idx;
 125                tbl->items[prev_idx].inner_item.next_pkt_idx = item_idx;
 126        }
 127
 128        return item_idx;
 129}
 130
 131static inline uint32_t
 132delete_item(struct gro_vxlan_udp4_tbl *tbl,
 133                uint32_t item_idx,
 134                uint32_t prev_item_idx)
 135{
 136        uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
 137
 138        /* NULL indicates an empty item. */
 139        tbl->items[item_idx].inner_item.firstseg = NULL;
 140        tbl->item_num--;
 141        if (prev_item_idx != INVALID_ARRAY_INDEX)
 142                tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
 143
 144        return next_idx;
 145}
 146
 147static inline uint32_t
 148insert_new_flow(struct gro_vxlan_udp4_tbl *tbl,
 149                struct vxlan_udp4_flow_key *src,
 150                uint32_t item_idx)
 151{
 152        struct vxlan_udp4_flow_key *dst;
 153        uint32_t flow_idx;
 154
 155        flow_idx = find_an_empty_flow(tbl);
 156        if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
 157                return INVALID_ARRAY_INDEX;
 158
 159        dst = &(tbl->flows[flow_idx].key);
 160
 161        rte_ether_addr_copy(&(src->inner_key.eth_saddr),
 162                        &(dst->inner_key.eth_saddr));
 163        rte_ether_addr_copy(&(src->inner_key.eth_daddr),
 164                        &(dst->inner_key.eth_daddr));
 165        dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
 166        dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
 167        dst->inner_key.ip_id = src->inner_key.ip_id;
 168
 169        dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
 170        dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
 171        rte_ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
 172        rte_ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
 173        dst->outer_ip_src_addr = src->outer_ip_src_addr;
 174        dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
 175        dst->outer_dst_port = src->outer_dst_port;
 176
 177        tbl->flows[flow_idx].start_index = item_idx;
 178        tbl->flow_num++;
 179
 180        return flow_idx;
 181}
 182
 183static inline int
 184is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
 185                struct vxlan_udp4_flow_key k2)
 186{
 187        /* For VxLAN packet, outer udp src port is calculated from
 188         * inner packet RSS hash, udp src port of the first UDP
 189         * fragment is different from one of other UDP fragments
 190         * even if they are same flow, so we have to skip outer udp
 191         * src port comparison here.
 192         */
 193        return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
 194                                        &k2.outer_eth_saddr) &&
 195                        rte_is_same_ether_addr(&k1.outer_eth_daddr,
 196                                &k2.outer_eth_daddr) &&
 197                        (k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
 198                        (k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
 199                        (k1.outer_dst_port == k2.outer_dst_port) &&
 200                        (k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
 201                        (k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
 202                        is_same_udp4_flow(k1.inner_key, k2.inner_key));
 203}
 204
 205static inline int
 206udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
 207                uint16_t frag_offset,
 208                uint16_t ip_dl)
 209{
 210        struct rte_mbuf *pkt = item->inner_item.firstseg;
 211        int cmp;
 212        uint16_t l2_offset;
 213        int ret = 0;
 214
 215        /* Note: if outer DF bit is set, i.e outer_is_atomic is 0,
 216         * we needn't compare outer_ip_id because they are same,
 217         * for the case outer_is_atomic is 1, we also have no way
 218         * to compare outer_ip_id because the difference between
 219         * outer_ip_ids of two received packets isn't always +/-1.
 220         * So skip outer_ip_id comparison here.
 221         */
 222
 223        l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
 224        cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
 225                                        l2_offset);
 226        if (cmp > 0)
 227                /* Append the new packet. */
 228                ret = 1;
 229        else if (cmp < 0)
 230                /* Prepend the new packet. */
 231                ret = -1;
 232
 233        return ret;
 234}
 235
 236static inline int
 237merge_two_vxlan_udp4_packets(struct gro_vxlan_udp4_item *item,
 238                struct rte_mbuf *pkt,
 239                int cmp,
 240                uint16_t frag_offset,
 241                uint8_t is_last_frag)
 242{
 243        if (merge_two_udp4_packets(&item->inner_item, pkt, cmp, frag_offset,
 244                                is_last_frag,
 245                                pkt->outer_l2_len + pkt->outer_l3_len)) {
 246                return 1;
 247        }
 248
 249        return 0;
 250}
 251
 252static inline void
 253update_vxlan_header(struct gro_vxlan_udp4_item *item)
 254{
 255        struct rte_ipv4_hdr *ipv4_hdr;
 256        struct rte_udp_hdr *udp_hdr;
 257        struct rte_mbuf *pkt = item->inner_item.firstseg;
 258        uint16_t len;
 259        uint16_t frag_offset;
 260
 261        /* Update the outer IPv4 header. */
 262        len = pkt->pkt_len - pkt->outer_l2_len;
 263        ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
 264                        pkt->outer_l2_len);
 265        ipv4_hdr->total_length = rte_cpu_to_be_16(len);
 266
 267        /* Update the outer UDP header. */
 268        len -= pkt->outer_l3_len;
 269        udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr + pkt->outer_l3_len);
 270        udp_hdr->dgram_len = rte_cpu_to_be_16(len);
 271
 272        /* Update the inner IPv4 header. */
 273        len -= pkt->l2_len;
 274        ipv4_hdr = (struct rte_ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
 275        ipv4_hdr->total_length = rte_cpu_to_be_16(len);
 276
 277        /* Clear MF bit if it is last fragment */
 278        if (item->inner_item.is_last_frag) {
 279                frag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
 280                ipv4_hdr->fragment_offset =
 281                        rte_cpu_to_be_16(frag_offset & ~RTE_IPV4_HDR_MF_FLAG);
 282        }
 283}
 284
 285int32_t
 286gro_vxlan_udp4_reassemble(struct rte_mbuf *pkt,
 287                struct gro_vxlan_udp4_tbl *tbl,
 288                uint64_t start_time)
 289{
 290        struct rte_ether_hdr *outer_eth_hdr, *eth_hdr;
 291        struct rte_ipv4_hdr *outer_ipv4_hdr, *ipv4_hdr;
 292        struct rte_udp_hdr *udp_hdr;
 293        struct rte_vxlan_hdr *vxlan_hdr;
 294        uint16_t frag_offset;
 295        uint8_t is_last_frag;
 296        int16_t ip_dl;
 297        uint16_t ip_id;
 298
 299        struct vxlan_udp4_flow_key key;
 300        uint32_t cur_idx, prev_idx, item_idx;
 301        uint32_t i, max_flow_num, remaining_flow_num;
 302        int cmp;
 303        uint16_t hdr_len;
 304        uint8_t find;
 305
 306        outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
 307        outer_ipv4_hdr = (struct rte_ipv4_hdr *)((char *)outer_eth_hdr +
 308                        pkt->outer_l2_len);
 309
 310        udp_hdr = (struct rte_udp_hdr *)((char *)outer_ipv4_hdr +
 311                        pkt->outer_l3_len);
 312        vxlan_hdr = (struct rte_vxlan_hdr *)((char *)udp_hdr +
 313                        sizeof(struct rte_udp_hdr));
 314        eth_hdr = (struct rte_ether_hdr *)((char *)vxlan_hdr +
 315                        sizeof(struct rte_vxlan_hdr));
 316        /* l2_len = outer udp hdr len + vxlan hdr len + inner l2 len */
 317        ipv4_hdr = (struct rte_ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
 318
 319        /*
 320         * Don't process the packet which has non-fragment inner IP.
 321         */
 322        if (!is_ipv4_fragment(ipv4_hdr))
 323                return -1;
 324
 325        hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
 326                pkt->l3_len;
 327        /*
 328         * Don't process the packet whose payload length is less than or
 329         * equal to 0.
 330         */
 331        if (pkt->pkt_len <= hdr_len)
 332                return -1;
 333
 334        ip_dl = pkt->pkt_len - hdr_len;
 335
 336        ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
 337        frag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
 338        is_last_frag = ((frag_offset & RTE_IPV4_HDR_MF_FLAG) == 0) ? 1 : 0;
 339        frag_offset = (uint16_t)(frag_offset & RTE_IPV4_HDR_OFFSET_MASK) << 3;
 340
 341        rte_ether_addr_copy(&(eth_hdr->s_addr), &(key.inner_key.eth_saddr));
 342        rte_ether_addr_copy(&(eth_hdr->d_addr), &(key.inner_key.eth_daddr));
 343        key.inner_key.ip_src_addr = ipv4_hdr->src_addr;
 344        key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr;
 345        key.inner_key.ip_id = ip_id;
 346
 347        key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags;
 348        key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni;
 349        rte_ether_addr_copy(&(outer_eth_hdr->s_addr), &(key.outer_eth_saddr));
 350        rte_ether_addr_copy(&(outer_eth_hdr->d_addr), &(key.outer_eth_daddr));
 351        key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
 352        key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
 353        /* Note: It is unnecessary to save outer_src_port here because it can
 354         * be different for VxLAN UDP fragments from the same flow.
 355         */
 356        key.outer_dst_port = udp_hdr->dst_port;
 357
 358        /* Search for a matched flow. */
 359        max_flow_num = tbl->max_flow_num;
 360        remaining_flow_num = tbl->flow_num;
 361        find = 0;
 362        for (i = 0; i < max_flow_num && remaining_flow_num; i++) {
 363                if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) {
 364                        if (is_same_vxlan_udp4_flow(tbl->flows[i].key, key)) {
 365                                find = 1;
 366                                break;
 367                        }
 368                        remaining_flow_num--;
 369                }
 370        }
 371
 372        /*
 373         * Can't find a matched flow. Insert a new flow and store the
 374         * packet into the flow.
 375         */
 376        if (find == 0) {
 377                item_idx = insert_new_item(tbl, pkt, start_time,
 378                                INVALID_ARRAY_INDEX, frag_offset,
 379                                is_last_frag);
 380                if (unlikely(item_idx == INVALID_ARRAY_INDEX))
 381                        return -1;
 382                if (insert_new_flow(tbl, &key, item_idx) ==
 383                                INVALID_ARRAY_INDEX) {
 384                        /*
 385                         * Fail to insert a new flow, so
 386                         * delete the inserted packet.
 387                         */
 388                        delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
 389                        return -1;
 390                }
 391                return 0;
 392        }
 393
 394        /* Check all packets in the flow and try to find a neighbor. */
 395        cur_idx = tbl->flows[i].start_index;
 396        prev_idx = cur_idx;
 397        do {
 398                cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
 399                                frag_offset, ip_dl);
 400                if (cmp) {
 401                        if (merge_two_vxlan_udp4_packets(
 402                                                &(tbl->items[cur_idx]),
 403                                                pkt, cmp, frag_offset,
 404                                                is_last_frag)) {
 405                                return 1;
 406                        }
 407                        /*
 408                         * Can't merge two packets, as the packet
 409                         * length will be greater than the max value.
 410                         * Insert the packet into the flow.
 411                         */
 412                        if (insert_new_item(tbl, pkt, start_time, prev_idx,
 413                                                frag_offset, is_last_frag) ==
 414                                        INVALID_ARRAY_INDEX)
 415                                return -1;
 416                        return 0;
 417                }
 418
 419                /* Ensure inserted items are ordered by frag_offset */
 420                if (frag_offset
 421                        < tbl->items[cur_idx].inner_item.frag_offset) {
 422                        break;
 423                }
 424
 425                prev_idx = cur_idx;
 426                cur_idx = tbl->items[cur_idx].inner_item.next_pkt_idx;
 427        } while (cur_idx != INVALID_ARRAY_INDEX);
 428
 429        /* Can't find neighbor. Insert the packet into the flow. */
 430        if (cur_idx == tbl->flows[i].start_index) {
 431                /* Insert it before the first packet of the flow */
 432                item_idx = insert_new_item(tbl, pkt, start_time,
 433                                INVALID_ARRAY_INDEX, frag_offset,
 434                                is_last_frag);
 435                if (unlikely(item_idx == INVALID_ARRAY_INDEX))
 436                        return -1;
 437                tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
 438                tbl->flows[i].start_index = item_idx;
 439        } else {
 440                if (insert_new_item(tbl, pkt, start_time, prev_idx,
 441                                        frag_offset, is_last_frag
 442                                        ) == INVALID_ARRAY_INDEX)
 443                        return -1;
 444        }
 445
 446        return 0;
 447}
 448
 449static int
 450gro_vxlan_udp4_merge_items(struct gro_vxlan_udp4_tbl *tbl,
 451                           uint32_t start_idx)
 452{
 453        uint16_t frag_offset;
 454        uint8_t is_last_frag;
 455        int16_t ip_dl;
 456        struct rte_mbuf *pkt;
 457        int cmp;
 458        uint32_t item_idx;
 459        uint16_t hdr_len;
 460
 461        item_idx = tbl->items[start_idx].inner_item.next_pkt_idx;
 462        while (item_idx != INVALID_ARRAY_INDEX) {
 463                pkt = tbl->items[item_idx].inner_item.firstseg;
 464                hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
 465                        pkt->l3_len;
 466                ip_dl = pkt->pkt_len - hdr_len;
 467                frag_offset = tbl->items[item_idx].inner_item.frag_offset;
 468                is_last_frag = tbl->items[item_idx].inner_item.is_last_frag;
 469                cmp = udp4_check_vxlan_neighbor(&(tbl->items[start_idx]),
 470                                        frag_offset, ip_dl);
 471                if (cmp) {
 472                        if (merge_two_vxlan_udp4_packets(
 473                                        &(tbl->items[start_idx]),
 474                                        pkt, cmp, frag_offset,
 475                                        is_last_frag)) {
 476                                item_idx = delete_item(tbl, item_idx,
 477                                                        INVALID_ARRAY_INDEX);
 478                                tbl->items[start_idx].inner_item.next_pkt_idx
 479                                        = item_idx;
 480                        } else
 481                                return 0;
 482                } else
 483                        return 0;
 484        }
 485
 486        return 0;
 487}
 488
 489uint16_t
 490gro_vxlan_udp4_tbl_timeout_flush(struct gro_vxlan_udp4_tbl *tbl,
 491                uint64_t flush_timestamp,
 492                struct rte_mbuf **out,
 493                uint16_t nb_out)
 494{
 495        uint16_t k = 0;
 496        uint32_t i, j;
 497        uint32_t max_flow_num = tbl->max_flow_num;
 498
 499        for (i = 0; i < max_flow_num; i++) {
 500                if (unlikely(tbl->flow_num == 0))
 501                        return k;
 502
 503                j = tbl->flows[i].start_index;
 504                while (j != INVALID_ARRAY_INDEX) {
 505                        if (tbl->items[j].inner_item.start_time <=
 506                                        flush_timestamp) {
 507                                gro_vxlan_udp4_merge_items(tbl, j);
 508                                out[k++] = tbl->items[j].inner_item.firstseg;
 509                                if (tbl->items[j].inner_item.nb_merged > 1)
 510                                        update_vxlan_header(&(tbl->items[j]));
 511                                /*
 512                                 * Delete the item and get the next packet
 513                                 * index.
 514                                 */
 515                                j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
 516                                tbl->flows[i].start_index = j;
 517                                if (j == INVALID_ARRAY_INDEX)
 518                                        tbl->flow_num--;
 519
 520                                if (unlikely(k == nb_out))
 521                                        return k;
 522                        } else
 523                                /*
 524                                 * Flushing packets does not strictly follow
 525                                 * timestamp. It does not flush left packets of
 526                                 * the flow this time once it finds one item
 527                                 * whose start_time is greater than
 528                                 * flush_timestamp. So go to check other flows.
 529                                 */
 530                                break;
 531                }
 532        }
 533        return k;
 534}
 535
 536uint32_t
 537gro_vxlan_udp4_tbl_pkt_count(void *tbl)
 538{
 539        struct gro_vxlan_udp4_tbl *gro_tbl = tbl;
 540
 541        if (gro_tbl)
 542                return gro_tbl->item_num;
 543
 544        return 0;
 545}
 546