linux/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/etherdevice.h>
  34#include <linux/mlx5/driver.h>
  35#include <linux/mlx5/mlx5_ifc.h>
  36#include <linux/mlx5/vport.h>
  37#include <linux/mlx5/fs.h>
  38#include "mlx5_core.h"
  39#include "eswitch.h"
  40#include "rdma.h"
  41#include "en.h"
  42#include "fs_core.h"
  43#include "lib/devcom.h"
  44#include "ecpf.h"
  45#include "lib/eq.h"
  46
  47/* There are two match-all miss flows, one for unicast dst mac and
  48 * one for multicast.
  49 */
  50#define MLX5_ESW_MISS_FLOWS (2)
  51
  52#define fdb_prio_table(esw, chain, prio, level) \
  53        (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)]
  54
  55#define UPLINK_REP_INDEX 0
  56
  57static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
  58                                                     u16 vport_num)
  59{
  60        int idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
  61
  62        WARN_ON(idx > esw->total_vports - 1);
  63        return &esw->offloads.vport_reps[idx];
  64}
  65
  66static struct mlx5_flow_table *
  67esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
  68static void
  69esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
  70
  71bool mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw)
  72{
  73        return (!!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED));
  74}
  75
  76u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw)
  77{
  78        if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)
  79                return FDB_MAX_CHAIN;
  80
  81        return 0;
  82}
  83
  84u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw)
  85{
  86        if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)
  87                return FDB_MAX_PRIO;
  88
  89        return 1;
  90}
  91
  92struct mlx5_flow_handle *
  93mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
  94                                struct mlx5_flow_spec *spec,
  95                                struct mlx5_esw_flow_attr *attr)
  96{
  97        struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
  98        struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
  99        bool split = !!(attr->split_count);
 100        struct mlx5_flow_handle *rule;
 101        struct mlx5_flow_table *fdb;
 102        int j, i = 0;
 103        void *misc;
 104
 105        if (esw->mode != SRIOV_OFFLOADS)
 106                return ERR_PTR(-EOPNOTSUPP);
 107
 108        flow_act.action = attr->action;
 109        /* if per flow vlan pop/push is emulated, don't set that into the firmware */
 110        if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
 111                flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
 112                                     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
 113        else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
 114                flow_act.vlan[0].ethtype = ntohs(attr->vlan_proto[0]);
 115                flow_act.vlan[0].vid = attr->vlan_vid[0];
 116                flow_act.vlan[0].prio = attr->vlan_prio[0];
 117                if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
 118                        flow_act.vlan[1].ethtype = ntohs(attr->vlan_proto[1]);
 119                        flow_act.vlan[1].vid = attr->vlan_vid[1];
 120                        flow_act.vlan[1].prio = attr->vlan_prio[1];
 121                }
 122        }
 123
 124        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 125                if (attr->dest_chain) {
 126                        struct mlx5_flow_table *ft;
 127
 128                        ft = esw_get_prio_table(esw, attr->dest_chain, 1, 0);
 129                        if (IS_ERR(ft)) {
 130                                rule = ERR_CAST(ft);
 131                                goto err_create_goto_table;
 132                        }
 133
 134                        dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 135                        dest[i].ft = ft;
 136                        i++;
 137                } else {
 138                        for (j = attr->split_count; j < attr->out_count; j++) {
 139                                dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 140                                dest[i].vport.num = attr->dests[j].rep->vport;
 141                                dest[i].vport.vhca_id =
 142                                        MLX5_CAP_GEN(attr->dests[j].mdev, vhca_id);
 143                                if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
 144                                        dest[i].vport.flags |=
 145                                                MLX5_FLOW_DEST_VPORT_VHCA_ID;
 146                                if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) {
 147                                        flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
 148                                        flow_act.reformat_id = attr->dests[j].encap_id;
 149                                        dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
 150                                        dest[i].vport.reformat_id =
 151                                                attr->dests[j].encap_id;
 152                                }
 153                                i++;
 154                        }
 155                }
 156        }
 157        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 158                dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
 159                dest[i].counter_id = mlx5_fc_id(attr->counter);
 160                i++;
 161        }
 162
 163        misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
 164        MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
 165
 166        if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
 167                MLX5_SET(fte_match_set_misc, misc,
 168                         source_eswitch_owner_vhca_id,
 169                         MLX5_CAP_GEN(attr->in_mdev, vhca_id));
 170
 171        misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
 172        MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
 173        if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
 174                MLX5_SET_TO_ONES(fte_match_set_misc, misc,
 175                                 source_eswitch_owner_vhca_id);
 176
 177        spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
 178        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
 179                if (attr->tunnel_match_level != MLX5_MATCH_NONE)
 180                        spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
 181                if (attr->match_level != MLX5_MATCH_NONE)
 182                        spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
 183        } else if (attr->match_level != MLX5_MATCH_NONE) {
 184                spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
 185        }
 186
 187        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 188                flow_act.modify_id = attr->mod_hdr_id;
 189
 190        fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split);
 191        if (IS_ERR(fdb)) {
 192                rule = ERR_CAST(fdb);
 193                goto err_esw_get;
 194        }
 195
 196        rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i);
 197        if (IS_ERR(rule))
 198                goto err_add_rule;
 199        else
 200                esw->offloads.num_flows++;
 201
 202        return rule;
 203
 204err_add_rule:
 205        esw_put_prio_table(esw, attr->chain, attr->prio, !!split);
 206err_esw_get:
 207        if (attr->dest_chain)
 208                esw_put_prio_table(esw, attr->dest_chain, 1, 0);
 209err_create_goto_table:
 210        return rule;
 211}
 212
 213struct mlx5_flow_handle *
 214mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 215                          struct mlx5_flow_spec *spec,
 216                          struct mlx5_esw_flow_attr *attr)
 217{
 218        struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
 219        struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
 220        struct mlx5_flow_table *fast_fdb;
 221        struct mlx5_flow_table *fwd_fdb;
 222        struct mlx5_flow_handle *rule;
 223        void *misc;
 224        int i;
 225
 226        fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0);
 227        if (IS_ERR(fast_fdb)) {
 228                rule = ERR_CAST(fast_fdb);
 229                goto err_get_fast;
 230        }
 231
 232        fwd_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 1);
 233        if (IS_ERR(fwd_fdb)) {
 234                rule = ERR_CAST(fwd_fdb);
 235                goto err_get_fwd;
 236        }
 237
 238        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 239        for (i = 0; i < attr->split_count; i++) {
 240                dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 241                dest[i].vport.num = attr->dests[i].rep->vport;
 242                dest[i].vport.vhca_id =
 243                        MLX5_CAP_GEN(attr->dests[i].mdev, vhca_id);
 244                if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
 245                        dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
 246                if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) {
 247                        dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
 248                        dest[i].vport.reformat_id = attr->dests[i].encap_id;
 249                }
 250        }
 251        dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 252        dest[i].ft = fwd_fdb,
 253        i++;
 254
 255        misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
 256        MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
 257
 258        if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
 259                MLX5_SET(fte_match_set_misc, misc,
 260                         source_eswitch_owner_vhca_id,
 261                         MLX5_CAP_GEN(attr->in_mdev, vhca_id));
 262
 263        misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
 264        MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
 265        if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
 266                MLX5_SET_TO_ONES(fte_match_set_misc, misc,
 267                                 source_eswitch_owner_vhca_id);
 268
 269        if (attr->match_level == MLX5_MATCH_NONE)
 270                spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
 271        else
 272                spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
 273                                              MLX5_MATCH_MISC_PARAMETERS;
 274
 275        rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
 276
 277        if (IS_ERR(rule))
 278                goto add_err;
 279
 280        esw->offloads.num_flows++;
 281
 282        return rule;
 283add_err:
 284        esw_put_prio_table(esw, attr->chain, attr->prio, 1);
 285err_get_fwd:
 286        esw_put_prio_table(esw, attr->chain, attr->prio, 0);
 287err_get_fast:
 288        return rule;
 289}
 290
 291static void
 292__mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
 293                        struct mlx5_flow_handle *rule,
 294                        struct mlx5_esw_flow_attr *attr,
 295                        bool fwd_rule)
 296{
 297        bool split = (attr->split_count > 0);
 298
 299        mlx5_del_flow_rules(rule);
 300        esw->offloads.num_flows--;
 301
 302        if (fwd_rule)  {
 303                esw_put_prio_table(esw, attr->chain, attr->prio, 1);
 304                esw_put_prio_table(esw, attr->chain, attr->prio, 0);
 305        } else {
 306                esw_put_prio_table(esw, attr->chain, attr->prio, !!split);
 307                if (attr->dest_chain)
 308                        esw_put_prio_table(esw, attr->dest_chain, 1, 0);
 309        }
 310}
 311
 312void
 313mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
 314                                struct mlx5_flow_handle *rule,
 315                                struct mlx5_esw_flow_attr *attr)
 316{
 317        __mlx5_eswitch_del_rule(esw, rule, attr, false);
 318}
 319
 320void
 321mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
 322                          struct mlx5_flow_handle *rule,
 323                          struct mlx5_esw_flow_attr *attr)
 324{
 325        __mlx5_eswitch_del_rule(esw, rule, attr, true);
 326}
 327
 328static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
 329{
 330        struct mlx5_eswitch_rep *rep;
 331        int vf_vport, err = 0;
 332
 333        esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
 334        for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) {
 335                rep = &esw->offloads.vport_reps[vf_vport];
 336                if (atomic_read(&rep->rep_if[REP_ETH].state) != REP_LOADED)
 337                        continue;
 338
 339                err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val);
 340                if (err)
 341                        goto out;
 342        }
 343
 344out:
 345        return err;
 346}
 347
 348static struct mlx5_eswitch_rep *
 349esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop)
 350{
 351        struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL;
 352
 353        in_rep  = attr->in_rep;
 354        out_rep = attr->dests[0].rep;
 355
 356        if (push)
 357                vport = in_rep;
 358        else if (pop)
 359                vport = out_rep;
 360        else
 361                vport = in_rep;
 362
 363        return vport;
 364}
 365
 366static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
 367                                     bool push, bool pop, bool fwd)
 368{
 369        struct mlx5_eswitch_rep *in_rep, *out_rep;
 370
 371        if ((push || pop) && !fwd)
 372                goto out_notsupp;
 373
 374        in_rep  = attr->in_rep;
 375        out_rep = attr->dests[0].rep;
 376
 377        if (push && in_rep->vport == MLX5_VPORT_UPLINK)
 378                goto out_notsupp;
 379
 380        if (pop && out_rep->vport == MLX5_VPORT_UPLINK)
 381                goto out_notsupp;
 382
 383        /* vport has vlan push configured, can't offload VF --> wire rules w.o it */
 384        if (!push && !pop && fwd)
 385                if (in_rep->vlan && out_rep->vport == MLX5_VPORT_UPLINK)
 386                        goto out_notsupp;
 387
 388        /* protects against (1) setting rules with different vlans to push and
 389         * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0)
 390         */
 391        if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan_vid[0]))
 392                goto out_notsupp;
 393
 394        return 0;
 395
 396out_notsupp:
 397        return -EOPNOTSUPP;
 398}
 399
 400int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 401                                 struct mlx5_esw_flow_attr *attr)
 402{
 403        struct offloads_fdb *offloads = &esw->fdb_table.offloads;
 404        struct mlx5_eswitch_rep *vport = NULL;
 405        bool push, pop, fwd;
 406        int err = 0;
 407
 408        /* nop if we're on the vlan push/pop non emulation mode */
 409        if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
 410                return 0;
 411
 412        push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
 413        pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
 414        fwd  = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
 415                   !attr->dest_chain);
 416
 417        err = esw_add_vlan_action_check(attr, push, pop, fwd);
 418        if (err)
 419                return err;
 420
 421        attr->vlan_handled = false;
 422
 423        vport = esw_vlan_action_get_vport(attr, push, pop);
 424
 425        if (!push && !pop && fwd) {
 426                /* tracks VF --> wire rules without vlan push action */
 427                if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
 428                        vport->vlan_refcount++;
 429                        attr->vlan_handled = true;
 430                }
 431
 432                return 0;
 433        }
 434
 435        if (!push && !pop)
 436                return 0;
 437
 438        if (!(offloads->vlan_push_pop_refcount)) {
 439                /* it's the 1st vlan rule, apply global vlan pop policy */
 440                err = esw_set_global_vlan_pop(esw, SET_VLAN_STRIP);
 441                if (err)
 442                        goto out;
 443        }
 444        offloads->vlan_push_pop_refcount++;
 445
 446        if (push) {
 447                if (vport->vlan_refcount)
 448                        goto skip_set_push;
 449
 450                err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan_vid[0], 0,
 451                                                    SET_VLAN_INSERT | SET_VLAN_STRIP);
 452                if (err)
 453                        goto out;
 454                vport->vlan = attr->vlan_vid[0];
 455skip_set_push:
 456                vport->vlan_refcount++;
 457        }
 458out:
 459        if (!err)
 460                attr->vlan_handled = true;
 461        return err;
 462}
 463
 464int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
 465                                 struct mlx5_esw_flow_attr *attr)
 466{
 467        struct offloads_fdb *offloads = &esw->fdb_table.offloads;
 468        struct mlx5_eswitch_rep *vport = NULL;
 469        bool push, pop, fwd;
 470        int err = 0;
 471
 472        /* nop if we're on the vlan push/pop non emulation mode */
 473        if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
 474                return 0;
 475
 476        if (!attr->vlan_handled)
 477                return 0;
 478
 479        push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
 480        pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
 481        fwd  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
 482
 483        vport = esw_vlan_action_get_vport(attr, push, pop);
 484
 485        if (!push && !pop && fwd) {
 486                /* tracks VF --> wire rules without vlan push action */
 487                if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
 488                        vport->vlan_refcount--;
 489
 490                return 0;
 491        }
 492
 493        if (push) {
 494                vport->vlan_refcount--;
 495                if (vport->vlan_refcount)
 496                        goto skip_unset_push;
 497
 498                vport->vlan = 0;
 499                err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport,
 500                                                    0, 0, SET_VLAN_STRIP);
 501                if (err)
 502                        goto out;
 503        }
 504
 505skip_unset_push:
 506        offloads->vlan_push_pop_refcount--;
 507        if (offloads->vlan_push_pop_refcount)
 508                return 0;
 509
 510        /* no more vlan rules, stop global vlan pop policy */
 511        err = esw_set_global_vlan_pop(esw, 0);
 512
 513out:
 514        return err;
 515}
 516
 517struct mlx5_flow_handle *
 518mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport,
 519                                    u32 sqn)
 520{
 521        struct mlx5_flow_act flow_act = {0};
 522        struct mlx5_flow_destination dest = {};
 523        struct mlx5_flow_handle *flow_rule;
 524        struct mlx5_flow_spec *spec;
 525        void *misc;
 526
 527        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 528        if (!spec) {
 529                flow_rule = ERR_PTR(-ENOMEM);
 530                goto out;
 531        }
 532
 533        misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
 534        MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
 535        /* source vport is the esw manager */
 536        MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport);
 537
 538        misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
 539        MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
 540        MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
 541
 542        spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
 543        dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 544        dest.vport.num = vport;
 545        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 546
 547        flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
 548                                        &flow_act, &dest, 1);
 549        if (IS_ERR(flow_rule))
 550                esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule));
 551out:
 552        kvfree(spec);
 553        return flow_rule;
 554}
 555EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
 556
 557void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
 558{
 559        mlx5_del_flow_rules(rule);
 560}
 561
 562static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
 563                                  struct mlx5_flow_spec *spec,
 564                                  struct mlx5_flow_destination *dest)
 565{
 566        void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 567                                  misc_parameters);
 568
 569        MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
 570                 MLX5_CAP_GEN(peer_dev, vhca_id));
 571
 572        spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
 573
 574        misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 575                            misc_parameters);
 576        MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
 577        MLX5_SET_TO_ONES(fte_match_set_misc, misc,
 578                         source_eswitch_owner_vhca_id);
 579
 580        dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 581        dest->vport.num = peer_dev->priv.eswitch->manager_vport;
 582        dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
 583        dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
 584}
 585
 586static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
 587                                       struct mlx5_core_dev *peer_dev)
 588{
 589        struct mlx5_flow_destination dest = {};
 590        struct mlx5_flow_act flow_act = {0};
 591        struct mlx5_flow_handle **flows;
 592        struct mlx5_flow_handle *flow;
 593        struct mlx5_flow_spec *spec;
 594        /* total vports is the same for both e-switches */
 595        int nvports = esw->total_vports;
 596        void *misc;
 597        int err, i;
 598
 599        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 600        if (!spec)
 601                return -ENOMEM;
 602
 603        peer_miss_rules_setup(peer_dev, spec, &dest);
 604
 605        flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL);
 606        if (!flows) {
 607                err = -ENOMEM;
 608                goto alloc_flows_err;
 609        }
 610
 611        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 612        misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 613                            misc_parameters);
 614
 615        if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
 616                MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF);
 617                flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
 618                                           spec, &flow_act, &dest, 1);
 619                if (IS_ERR(flow)) {
 620                        err = PTR_ERR(flow);
 621                        goto add_pf_flow_err;
 622                }
 623                flows[MLX5_VPORT_PF] = flow;
 624        }
 625
 626        if (mlx5_ecpf_vport_exists(esw->dev)) {
 627                MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF);
 628                flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
 629                                           spec, &flow_act, &dest, 1);
 630                if (IS_ERR(flow)) {
 631                        err = PTR_ERR(flow);
 632                        goto add_ecpf_flow_err;
 633                }
 634                flows[mlx5_eswitch_ecpf_idx(esw)] = flow;
 635        }
 636
 637        mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) {
 638                MLX5_SET(fte_match_set_misc, misc, source_port, i);
 639                flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
 640                                           spec, &flow_act, &dest, 1);
 641                if (IS_ERR(flow)) {
 642                        err = PTR_ERR(flow);
 643                        goto add_vf_flow_err;
 644                }
 645                flows[i] = flow;
 646        }
 647
 648        esw->fdb_table.offloads.peer_miss_rules = flows;
 649
 650        kvfree(spec);
 651        return 0;
 652
 653add_vf_flow_err:
 654        nvports = --i;
 655        mlx5_esw_for_each_vf_vport_num_reverse(esw, i, nvports)
 656                mlx5_del_flow_rules(flows[i]);
 657
 658        if (mlx5_ecpf_vport_exists(esw->dev))
 659                mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
 660add_ecpf_flow_err:
 661        if (mlx5_core_is_ecpf_esw_manager(esw->dev))
 662                mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
 663add_pf_flow_err:
 664        esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
 665        kvfree(flows);
 666alloc_flows_err:
 667        kvfree(spec);
 668        return err;
 669}
 670
 671static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw)
 672{
 673        struct mlx5_flow_handle **flows;
 674        int i;
 675
 676        flows = esw->fdb_table.offloads.peer_miss_rules;
 677
 678        mlx5_esw_for_each_vf_vport_num_reverse(esw, i,
 679                                               mlx5_core_max_vfs(esw->dev))
 680                mlx5_del_flow_rules(flows[i]);
 681
 682        if (mlx5_ecpf_vport_exists(esw->dev))
 683                mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
 684
 685        if (mlx5_core_is_ecpf_esw_manager(esw->dev))
 686                mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
 687
 688        kvfree(flows);
 689}
 690
 691static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 692{
 693        struct mlx5_flow_act flow_act = {0};
 694        struct mlx5_flow_destination dest = {};
 695        struct mlx5_flow_handle *flow_rule = NULL;
 696        struct mlx5_flow_spec *spec;
 697        void *headers_c;
 698        void *headers_v;
 699        int err = 0;
 700        u8 *dmac_c;
 701        u8 *dmac_v;
 702
 703        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 704        if (!spec) {
 705                err = -ENOMEM;
 706                goto out;
 707        }
 708
 709        spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 710        headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 711                                 outer_headers);
 712        dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
 713                              outer_headers.dmac_47_16);
 714        dmac_c[0] = 0x01;
 715
 716        dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 717        dest.vport.num = esw->manager_vport;
 718        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 719
 720        flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
 721                                        &flow_act, &dest, 1);
 722        if (IS_ERR(flow_rule)) {
 723                err = PTR_ERR(flow_rule);
 724                esw_warn(esw->dev,  "FDB: Failed to add unicast miss flow rule err %d\n", err);
 725                goto out;
 726        }
 727
 728        esw->fdb_table.offloads.miss_rule_uni = flow_rule;
 729
 730        headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 731                                 outer_headers);
 732        dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
 733                              outer_headers.dmac_47_16);
 734        dmac_v[0] = 0x01;
 735        flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
 736                                        &flow_act, &dest, 1);
 737        if (IS_ERR(flow_rule)) {
 738                err = PTR_ERR(flow_rule);
 739                esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
 740                mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
 741                goto out;
 742        }
 743
 744        esw->fdb_table.offloads.miss_rule_multi = flow_rule;
 745
 746out:
 747        kvfree(spec);
 748        return err;
 749}
 750
 751#define ESW_OFFLOADS_NUM_GROUPS  4
 752
 753/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS),
 754 * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated
 755 * for each flow table pool. We can allocate up to 16M of each pool,
 756 * and we keep track of how much we used via put/get_sz_to_pool.
 757 * Firmware doesn't report any of this for now.
 758 * ESW_POOL is expected to be sorted from large to small
 759 */
 760#define ESW_SIZE (16 * 1024 * 1024)
 761const unsigned int ESW_POOLS[4] = { 4 * 1024 * 1024, 1 * 1024 * 1024,
 762                                    64 * 1024, 4 * 1024 };
 763
 764static int
 765get_sz_from_pool(struct mlx5_eswitch *esw)
 766{
 767        int sz = 0, i;
 768
 769        for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) {
 770                if (esw->fdb_table.offloads.fdb_left[i]) {
 771                        --esw->fdb_table.offloads.fdb_left[i];
 772                        sz = ESW_POOLS[i];
 773                        break;
 774                }
 775        }
 776
 777        return sz;
 778}
 779
 780static void
 781put_sz_to_pool(struct mlx5_eswitch *esw, int sz)
 782{
 783        int i;
 784
 785        for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) {
 786                if (sz >= ESW_POOLS[i]) {
 787                        ++esw->fdb_table.offloads.fdb_left[i];
 788                        break;
 789                }
 790        }
 791}
 792
 793static struct mlx5_flow_table *
 794create_next_size_table(struct mlx5_eswitch *esw,
 795                       struct mlx5_flow_namespace *ns,
 796                       u16 table_prio,
 797                       int level,
 798                       u32 flags)
 799{
 800        struct mlx5_flow_table *fdb;
 801        int sz;
 802
 803        sz = get_sz_from_pool(esw);
 804        if (!sz)
 805                return ERR_PTR(-ENOSPC);
 806
 807        fdb = mlx5_create_auto_grouped_flow_table(ns,
 808                                                  table_prio,
 809                                                  sz,
 810                                                  ESW_OFFLOADS_NUM_GROUPS,
 811                                                  level,
 812                                                  flags);
 813        if (IS_ERR(fdb)) {
 814                esw_warn(esw->dev, "Failed to create FDB Table err %d (table prio: %d, level: %d, size: %d)\n",
 815                         (int)PTR_ERR(fdb), table_prio, level, sz);
 816                put_sz_to_pool(esw, sz);
 817        }
 818
 819        return fdb;
 820}
 821
 822static struct mlx5_flow_table *
 823esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level)
 824{
 825        struct mlx5_core_dev *dev = esw->dev;
 826        struct mlx5_flow_table *fdb = NULL;
 827        struct mlx5_flow_namespace *ns;
 828        int table_prio, l = 0;
 829        u32 flags = 0;
 830
 831        if (chain == FDB_SLOW_PATH_CHAIN)
 832                return esw->fdb_table.offloads.slow_fdb;
 833
 834        mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock);
 835
 836        fdb = fdb_prio_table(esw, chain, prio, level).fdb;
 837        if (fdb) {
 838                /* take ref on earlier levels as well */
 839                while (level >= 0)
 840                        fdb_prio_table(esw, chain, prio, level--).num_rules++;
 841                mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
 842                return fdb;
 843        }
 844
 845        ns = mlx5_get_fdb_sub_ns(dev, chain);
 846        if (!ns) {
 847                esw_warn(dev, "Failed to get FDB sub namespace\n");
 848                mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
 849                return ERR_PTR(-EOPNOTSUPP);
 850        }
 851
 852        if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
 853                flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
 854                          MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
 855
 856        table_prio = (chain * FDB_MAX_PRIO) + prio - 1;
 857
 858        /* create earlier levels for correct fs_core lookup when
 859         * connecting tables
 860         */
 861        for (l = 0; l <= level; l++) {
 862                if (fdb_prio_table(esw, chain, prio, l).fdb) {
 863                        fdb_prio_table(esw, chain, prio, l).num_rules++;
 864                        continue;
 865                }
 866
 867                fdb = create_next_size_table(esw, ns, table_prio, l, flags);
 868                if (IS_ERR(fdb)) {
 869                        l--;
 870                        goto err_create_fdb;
 871                }
 872
 873                fdb_prio_table(esw, chain, prio, l).fdb = fdb;
 874                fdb_prio_table(esw, chain, prio, l).num_rules = 1;
 875        }
 876
 877        mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
 878        return fdb;
 879
 880err_create_fdb:
 881        mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
 882        if (l >= 0)
 883                esw_put_prio_table(esw, chain, prio, l);
 884
 885        return fdb;
 886}
 887
 888static void
 889esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level)
 890{
 891        int l;
 892
 893        if (chain == FDB_SLOW_PATH_CHAIN)
 894                return;
 895
 896        mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock);
 897
 898        for (l = level; l >= 0; l--) {
 899                if (--(fdb_prio_table(esw, chain, prio, l).num_rules) > 0)
 900                        continue;
 901
 902                put_sz_to_pool(esw, fdb_prio_table(esw, chain, prio, l).fdb->max_fte);
 903                mlx5_destroy_flow_table(fdb_prio_table(esw, chain, prio, l).fdb);
 904                fdb_prio_table(esw, chain, prio, l).fdb = NULL;
 905        }
 906
 907        mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
 908}
 909
 910static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw)
 911{
 912        /* If lazy creation isn't supported, deref the fast path tables */
 913        if (!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)) {
 914                esw_put_prio_table(esw, 0, 1, 1);
 915                esw_put_prio_table(esw, 0, 1, 0);
 916        }
 917}
 918
 919#define MAX_PF_SQ 256
 920#define MAX_SQ_NVPORTS 32
 921
 922static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 923{
 924        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 925        struct mlx5_flow_table_attr ft_attr = {};
 926        struct mlx5_core_dev *dev = esw->dev;
 927        u32 *flow_group_in, max_flow_counter;
 928        struct mlx5_flow_namespace *root_ns;
 929        struct mlx5_flow_table *fdb = NULL;
 930        int table_size, ix, err = 0, i;
 931        struct mlx5_flow_group *g;
 932        u32 flags = 0, fdb_max;
 933        void *match_criteria;
 934        u8 *dmac;
 935
 936        esw_debug(esw->dev, "Create offloads FDB Tables\n");
 937        flow_group_in = kvzalloc(inlen, GFP_KERNEL);
 938        if (!flow_group_in)
 939                return -ENOMEM;
 940
 941        root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
 942        if (!root_ns) {
 943                esw_warn(dev, "Failed to get FDB flow namespace\n");
 944                err = -EOPNOTSUPP;
 945                goto ns_err;
 946        }
 947
 948        max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
 949                            MLX5_CAP_GEN(dev, max_flow_counter_15_0);
 950        fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
 951
 952        esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(2^%d))\n",
 953                  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
 954                  max_flow_counter, ESW_OFFLOADS_NUM_GROUPS,
 955                  fdb_max);
 956
 957        for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++)
 958                esw->fdb_table.offloads.fdb_left[i] =
 959                        ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
 960
 961        table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ +
 962                MLX5_ESW_MISS_FLOWS + esw->total_vports;
 963
 964        /* create the slow path fdb with encap set, so further table instances
 965         * can be created at run time while VFs are probed if the FW allows that.
 966         */
 967        if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
 968                flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
 969                          MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
 970
 971        ft_attr.flags = flags;
 972        ft_attr.max_fte = table_size;
 973        ft_attr.prio = FDB_SLOW_PATH;
 974
 975        fdb = mlx5_create_flow_table(root_ns, &ft_attr);
 976        if (IS_ERR(fdb)) {
 977                err = PTR_ERR(fdb);
 978                esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err);
 979                goto slow_fdb_err;
 980        }
 981        esw->fdb_table.offloads.slow_fdb = fdb;
 982
 983        /* If lazy creation isn't supported, open the fast path tables now */
 984        if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) &&
 985            esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
 986                esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
 987                esw_warn(dev, "Lazy creation of flow tables isn't supported, ignoring priorities\n");
 988                esw_get_prio_table(esw, 0, 1, 0);
 989                esw_get_prio_table(esw, 0, 1, 1);
 990        } else {
 991                esw_debug(dev, "Lazy creation of flow tables supported, deferring table opening\n");
 992                esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
 993        }
 994
 995        /* create send-to-vport group */
 996        memset(flow_group_in, 0, inlen);
 997        MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
 998                 MLX5_MATCH_MISC_PARAMETERS);
 999
1000        match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
1001
1002        MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
1003        MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
1004
1005        ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ;
1006        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1007        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
1008
1009        g = mlx5_create_flow_group(fdb, flow_group_in);
1010        if (IS_ERR(g)) {
1011                err = PTR_ERR(g);
1012                esw_warn(dev, "Failed to create send-to-vport flow group err(%d)\n", err);
1013                goto send_vport_err;
1014        }
1015        esw->fdb_table.offloads.send_to_vport_grp = g;
1016
1017        /* create peer esw miss group */
1018        memset(flow_group_in, 0, inlen);
1019        MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1020                 MLX5_MATCH_MISC_PARAMETERS);
1021
1022        match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1023                                      match_criteria);
1024
1025        MLX5_SET_TO_ONES(fte_match_param, match_criteria,
1026                         misc_parameters.source_port);
1027        MLX5_SET_TO_ONES(fte_match_param, match_criteria,
1028                         misc_parameters.source_eswitch_owner_vhca_id);
1029
1030        MLX5_SET(create_flow_group_in, flow_group_in,
1031                 source_eswitch_owner_vhca_id_valid, 1);
1032        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
1033        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
1034                 ix + esw->total_vports - 1);
1035        ix += esw->total_vports;
1036
1037        g = mlx5_create_flow_group(fdb, flow_group_in);
1038        if (IS_ERR(g)) {
1039                err = PTR_ERR(g);
1040                esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err);
1041                goto peer_miss_err;
1042        }
1043        esw->fdb_table.offloads.peer_miss_grp = g;
1044
1045        /* create miss group */
1046        memset(flow_group_in, 0, inlen);
1047        MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1048                 MLX5_MATCH_OUTER_HEADERS);
1049        match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1050                                      match_criteria);
1051        dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
1052                            outer_headers.dmac_47_16);
1053        dmac[0] = 0x01;
1054
1055        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
1056        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
1057                 ix + MLX5_ESW_MISS_FLOWS);
1058
1059        g = mlx5_create_flow_group(fdb, flow_group_in);
1060        if (IS_ERR(g)) {
1061                err = PTR_ERR(g);
1062                esw_warn(dev, "Failed to create miss flow group err(%d)\n", err);
1063                goto miss_err;
1064        }
1065        esw->fdb_table.offloads.miss_grp = g;
1066
1067        err = esw_add_fdb_miss_rule(esw);
1068        if (err)
1069                goto miss_rule_err;
1070
1071        esw->nvports = nvports;
1072        kvfree(flow_group_in);
1073        return 0;
1074
1075miss_rule_err:
1076        mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
1077miss_err:
1078        mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
1079peer_miss_err:
1080        mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
1081send_vport_err:
1082        esw_destroy_offloads_fast_fdb_tables(esw);
1083        mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
1084slow_fdb_err:
1085ns_err:
1086        kvfree(flow_group_in);
1087        return err;
1088}
1089
1090static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
1091{
1092        if (!esw->fdb_table.offloads.slow_fdb)
1093                return;
1094
1095        esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
1096        mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
1097        mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
1098        mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
1099        mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
1100        mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
1101
1102        mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
1103        esw_destroy_offloads_fast_fdb_tables(esw);
1104}
1105
1106static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports)
1107{
1108        struct mlx5_flow_table_attr ft_attr = {};
1109        struct mlx5_core_dev *dev = esw->dev;
1110        struct mlx5_flow_table *ft_offloads;
1111        struct mlx5_flow_namespace *ns;
1112        int err = 0;
1113
1114        ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
1115        if (!ns) {
1116                esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
1117                return -EOPNOTSUPP;
1118        }
1119
1120        ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS;
1121
1122        ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
1123        if (IS_ERR(ft_offloads)) {
1124                err = PTR_ERR(ft_offloads);
1125                esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err);
1126                return err;
1127        }
1128
1129        esw->offloads.ft_offloads = ft_offloads;
1130        return 0;
1131}
1132
1133static void esw_destroy_offloads_table(struct mlx5_eswitch *esw)
1134{
1135        struct mlx5_esw_offload *offloads = &esw->offloads;
1136
1137        mlx5_destroy_flow_table(offloads->ft_offloads);
1138}
1139
1140static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
1141{
1142        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1143        struct mlx5_flow_group *g;
1144        u32 *flow_group_in;
1145        void *match_criteria, *misc;
1146        int err = 0;
1147
1148        nvports = nvports + MLX5_ESW_MISS_FLOWS;
1149        flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1150        if (!flow_group_in)
1151                return -ENOMEM;
1152
1153        /* create vport rx group */
1154        memset(flow_group_in, 0, inlen);
1155        MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1156                 MLX5_MATCH_MISC_PARAMETERS);
1157
1158        match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
1159        misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters);
1160        MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
1161
1162        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1163        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
1164
1165        g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in);
1166
1167        if (IS_ERR(g)) {
1168                err = PTR_ERR(g);
1169                mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err);
1170                goto out;
1171        }
1172
1173        esw->offloads.vport_rx_group = g;
1174out:
1175        kvfree(flow_group_in);
1176        return err;
1177}
1178
1179static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
1180{
1181        mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
1182}
1183
1184struct mlx5_flow_handle *
1185mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
1186                                  struct mlx5_flow_destination *dest)
1187{
1188        struct mlx5_flow_act flow_act = {0};
1189        struct mlx5_flow_handle *flow_rule;
1190        struct mlx5_flow_spec *spec;
1191        void *misc;
1192
1193        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1194        if (!spec) {
1195                flow_rule = ERR_PTR(-ENOMEM);
1196                goto out;
1197        }
1198
1199        misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
1200        MLX5_SET(fte_match_set_misc, misc, source_port, vport);
1201
1202        misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
1203        MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
1204
1205        spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
1206
1207        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1208        flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
1209                                        &flow_act, dest, 1);
1210        if (IS_ERR(flow_rule)) {
1211                esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule));
1212                goto out;
1213        }
1214
1215out:
1216        kvfree(spec);
1217        return flow_rule;
1218}
1219
1220static int esw_offloads_start(struct mlx5_eswitch *esw,
1221                              struct netlink_ext_ack *extack)
1222{
1223        int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
1224
1225        if (esw->mode != SRIOV_LEGACY &&
1226            !mlx5_core_is_ecpf_esw_manager(esw->dev)) {
1227                NL_SET_ERR_MSG_MOD(extack,
1228                                   "Can't set offloads mode, SRIOV legacy not enabled");
1229                return -EINVAL;
1230        }
1231
1232        mlx5_eswitch_disable_sriov(esw);
1233        err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
1234        if (err) {
1235                NL_SET_ERR_MSG_MOD(extack,
1236                                   "Failed setting eswitch to offloads");
1237                err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
1238                if (err1) {
1239                        NL_SET_ERR_MSG_MOD(extack,
1240                                           "Failed setting eswitch back to legacy");
1241                }
1242        }
1243        if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
1244                if (mlx5_eswitch_inline_mode_get(esw,
1245                                                 num_vfs,
1246                                                 &esw->offloads.inline_mode)) {
1247                        esw->offloads.inline_mode = MLX5_INLINE_MODE_L2;
1248                        NL_SET_ERR_MSG_MOD(extack,
1249                                           "Inline mode is different between vports");
1250                }
1251        }
1252        return err;
1253}
1254
1255void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
1256{
1257        kfree(esw->offloads.vport_reps);
1258}
1259
1260int esw_offloads_init_reps(struct mlx5_eswitch *esw)
1261{
1262        int total_vports = MLX5_TOTAL_VPORTS(esw->dev);
1263        struct mlx5_core_dev *dev = esw->dev;
1264        struct mlx5_eswitch_rep *rep;
1265        u8 hw_id[ETH_ALEN], rep_type;
1266        int vport;
1267
1268        esw->offloads.vport_reps = kcalloc(total_vports,
1269                                           sizeof(struct mlx5_eswitch_rep),
1270                                           GFP_KERNEL);
1271        if (!esw->offloads.vport_reps)
1272                return -ENOMEM;
1273
1274        mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
1275
1276        mlx5_esw_for_all_reps(esw, vport, rep) {
1277                rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport);
1278                ether_addr_copy(rep->hw_id, hw_id);
1279
1280                for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
1281                        atomic_set(&rep->rep_if[rep_type].state,
1282                                   REP_UNREGISTERED);
1283        }
1284
1285        return 0;
1286}
1287
1288static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
1289                                      struct mlx5_eswitch_rep *rep, u8 rep_type)
1290{
1291        if (atomic_cmpxchg(&rep->rep_if[rep_type].state,
1292                           REP_LOADED, REP_REGISTERED) == REP_LOADED)
1293                rep->rep_if[rep_type].unload(rep);
1294}
1295
1296static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
1297{
1298        struct mlx5_eswitch_rep *rep;
1299
1300        if (mlx5_ecpf_vport_exists(esw->dev)) {
1301                rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF);
1302                __esw_offloads_unload_rep(esw, rep, rep_type);
1303        }
1304
1305        if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
1306                rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
1307                __esw_offloads_unload_rep(esw, rep, rep_type);
1308        }
1309
1310        rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
1311        __esw_offloads_unload_rep(esw, rep, rep_type);
1312}
1313
1314static void __unload_reps_vf_vport(struct mlx5_eswitch *esw, int nvports,
1315                                   u8 rep_type)
1316{
1317        struct mlx5_eswitch_rep *rep;
1318        int i;
1319
1320        mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvports)
1321                __esw_offloads_unload_rep(esw, rep, rep_type);
1322}
1323
1324static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports)
1325{
1326        u8 rep_type = NUM_REP_TYPES;
1327
1328        while (rep_type-- > 0)
1329                __unload_reps_vf_vport(esw, nvports, rep_type);
1330}
1331
1332static void __unload_reps_all_vport(struct mlx5_eswitch *esw, int nvports,
1333                                    u8 rep_type)
1334{
1335        __unload_reps_vf_vport(esw, nvports, rep_type);
1336
1337        /* Special vports must be the last to unload. */
1338        __unload_reps_special_vport(esw, rep_type);
1339}
1340
1341static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw, int nvports)
1342{
1343        u8 rep_type = NUM_REP_TYPES;
1344
1345        while (rep_type-- > 0)
1346                __unload_reps_all_vport(esw, nvports, rep_type);
1347}
1348
1349static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
1350                                   struct mlx5_eswitch_rep *rep, u8 rep_type)
1351{
1352        int err = 0;
1353
1354        if (atomic_cmpxchg(&rep->rep_if[rep_type].state,
1355                           REP_REGISTERED, REP_LOADED) == REP_REGISTERED) {
1356                err = rep->rep_if[rep_type].load(esw->dev, rep);
1357                if (err)
1358                        atomic_set(&rep->rep_if[rep_type].state,
1359                                   REP_REGISTERED);
1360        }
1361
1362        return err;
1363}
1364
1365static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
1366{
1367        struct mlx5_eswitch_rep *rep;
1368        int err;
1369
1370        rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
1371        err = __esw_offloads_load_rep(esw, rep, rep_type);
1372        if (err)
1373                return err;
1374
1375        if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
1376                rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
1377                err = __esw_offloads_load_rep(esw, rep, rep_type);
1378                if (err)
1379                        goto err_pf;
1380        }
1381
1382        if (mlx5_ecpf_vport_exists(esw->dev)) {
1383                rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF);
1384                err = __esw_offloads_load_rep(esw, rep, rep_type);
1385                if (err)
1386                        goto err_ecpf;
1387        }
1388
1389        return 0;
1390
1391err_ecpf:
1392        if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
1393                rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
1394                __esw_offloads_unload_rep(esw, rep, rep_type);
1395        }
1396
1397err_pf:
1398        rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
1399        __esw_offloads_unload_rep(esw, rep, rep_type);
1400        return err;
1401}
1402
1403static int __load_reps_vf_vport(struct mlx5_eswitch *esw, int nvports,
1404                                u8 rep_type)
1405{
1406        struct mlx5_eswitch_rep *rep;
1407        int err, i;
1408
1409        mlx5_esw_for_each_vf_rep(esw, i, rep, nvports) {
1410                err = __esw_offloads_load_rep(esw, rep, rep_type);
1411                if (err)
1412                        goto err_vf;
1413        }
1414
1415        return 0;
1416
1417err_vf:
1418        __unload_reps_vf_vport(esw, --i, rep_type);
1419        return err;
1420}
1421
1422static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports)
1423{
1424        u8 rep_type = 0;
1425        int err;
1426
1427        for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
1428                err = __load_reps_vf_vport(esw, nvports, rep_type);
1429                if (err)
1430                        goto err_reps;
1431        }
1432
1433        return err;
1434
1435err_reps:
1436        while (rep_type-- > 0)
1437                __unload_reps_vf_vport(esw, nvports, rep_type);
1438        return err;
1439}
1440
1441static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports,
1442                                 u8 rep_type)
1443{
1444        int err;
1445
1446        /* Special vports must be loaded first. */
1447        err = __load_reps_special_vport(esw, rep_type);
1448        if (err)
1449                return err;
1450
1451        err = __load_reps_vf_vport(esw, nvports, rep_type);
1452        if (err)
1453                goto err_vfs;
1454
1455        return 0;
1456
1457err_vfs:
1458        __unload_reps_special_vport(esw, rep_type);
1459        return err;
1460}
1461
1462static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports)
1463{
1464        u8 rep_type = 0;
1465        int err;
1466
1467        for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
1468                err = __load_reps_all_vport(esw, nvports, rep_type);
1469                if (err)
1470                        goto err_reps;
1471        }
1472
1473        return err;
1474
1475err_reps:
1476        while (rep_type-- > 0)
1477                __unload_reps_all_vport(esw, nvports, rep_type);
1478        return err;
1479}
1480
1481#define ESW_OFFLOADS_DEVCOM_PAIR        (0)
1482#define ESW_OFFLOADS_DEVCOM_UNPAIR      (1)
1483
1484static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
1485                                  struct mlx5_eswitch *peer_esw)
1486{
1487        int err;
1488
1489        err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
1490        if (err)
1491                return err;
1492
1493        return 0;
1494}
1495
1496static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
1497{
1498        mlx5e_tc_clean_fdb_peer_flows(esw);
1499        esw_del_fdb_peer_miss_rules(esw);
1500}
1501
1502static int mlx5_esw_offloads_devcom_event(int event,
1503                                          void *my_data,
1504                                          void *event_data)
1505{
1506        struct mlx5_eswitch *esw = my_data;
1507        struct mlx5_eswitch *peer_esw = event_data;
1508        struct mlx5_devcom *devcom = esw->dev->priv.devcom;
1509        int err;
1510
1511        switch (event) {
1512        case ESW_OFFLOADS_DEVCOM_PAIR:
1513                err = mlx5_esw_offloads_pair(esw, peer_esw);
1514                if (err)
1515                        goto err_out;
1516
1517                err = mlx5_esw_offloads_pair(peer_esw, esw);
1518                if (err)
1519                        goto err_pair;
1520
1521                mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
1522                break;
1523
1524        case ESW_OFFLOADS_DEVCOM_UNPAIR:
1525                if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
1526                        break;
1527
1528                mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
1529                mlx5_esw_offloads_unpair(peer_esw);
1530                mlx5_esw_offloads_unpair(esw);
1531                break;
1532        }
1533
1534        return 0;
1535
1536err_pair:
1537        mlx5_esw_offloads_unpair(esw);
1538
1539err_out:
1540        mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d",
1541                      event, err);
1542        return err;
1543}
1544
1545static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
1546{
1547        struct mlx5_devcom *devcom = esw->dev->priv.devcom;
1548
1549        INIT_LIST_HEAD(&esw->offloads.peer_flows);
1550        mutex_init(&esw->offloads.peer_mutex);
1551
1552        if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
1553                return;
1554
1555        mlx5_devcom_register_component(devcom,
1556                                       MLX5_DEVCOM_ESW_OFFLOADS,
1557                                       mlx5_esw_offloads_devcom_event,
1558                                       esw);
1559
1560        mlx5_devcom_send_event(devcom,
1561                               MLX5_DEVCOM_ESW_OFFLOADS,
1562                               ESW_OFFLOADS_DEVCOM_PAIR, esw);
1563}
1564
1565static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
1566{
1567        struct mlx5_devcom *devcom = esw->dev->priv.devcom;
1568
1569        if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
1570                return;
1571
1572        mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
1573                               ESW_OFFLOADS_DEVCOM_UNPAIR, esw);
1574
1575        mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1576}
1577
1578static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
1579                                             struct mlx5_vport *vport)
1580{
1581        struct mlx5_core_dev *dev = esw->dev;
1582        struct mlx5_flow_act flow_act = {0};
1583        struct mlx5_flow_spec *spec;
1584        int err = 0;
1585
1586        /* For prio tag mode, there is only 1 FTEs:
1587         * 1) Untagged packets - push prio tag VLAN, allow
1588         * Unmatched traffic is allowed by default
1589         */
1590
1591        if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
1592                return -EOPNOTSUPP;
1593
1594        esw_vport_cleanup_ingress_rules(esw, vport);
1595
1596        err = esw_vport_enable_ingress_acl(esw, vport);
1597        if (err) {
1598                mlx5_core_warn(esw->dev,
1599                               "failed to enable prio tag ingress acl (%d) on vport[%d]\n",
1600                               err, vport->vport);
1601                return err;
1602        }
1603
1604        esw_debug(esw->dev,
1605                  "vport[%d] configure ingress rules\n", vport->vport);
1606
1607        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1608        if (!spec) {
1609                err = -ENOMEM;
1610                goto out_no_mem;
1611        }
1612
1613        /* Untagged packets - push prio tag VLAN, allow */
1614        MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
1615        MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 0);
1616        spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1617        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
1618                          MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1619        flow_act.vlan[0].ethtype = ETH_P_8021Q;
1620        flow_act.vlan[0].vid = 0;
1621        flow_act.vlan[0].prio = 0;
1622        vport->ingress.allow_rule =
1623                mlx5_add_flow_rules(vport->ingress.acl, spec,
1624                                    &flow_act, NULL, 0);
1625        if (IS_ERR(vport->ingress.allow_rule)) {
1626                err = PTR_ERR(vport->ingress.allow_rule);
1627                esw_warn(esw->dev,
1628                         "vport[%d] configure ingress untagged allow rule, err(%d)\n",
1629                         vport->vport, err);
1630                vport->ingress.allow_rule = NULL;
1631                goto out;
1632        }
1633
1634out:
1635        kvfree(spec);
1636out_no_mem:
1637        if (err)
1638                esw_vport_cleanup_ingress_rules(esw, vport);
1639        return err;
1640}
1641
1642static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
1643                                            struct mlx5_vport *vport)
1644{
1645        struct mlx5_flow_act flow_act = {0};
1646        struct mlx5_flow_spec *spec;
1647        int err = 0;
1648
1649        /* For prio tag mode, there is only 1 FTEs:
1650         * 1) prio tag packets - pop the prio tag VLAN, allow
1651         * Unmatched traffic is allowed by default
1652         */
1653
1654        esw_vport_cleanup_egress_rules(esw, vport);
1655
1656        err = esw_vport_enable_egress_acl(esw, vport);
1657        if (err) {
1658                mlx5_core_warn(esw->dev,
1659                               "failed to enable egress acl (%d) on vport[%d]\n",
1660                               err, vport->vport);
1661                return err;
1662        }
1663
1664        esw_debug(esw->dev,
1665                  "vport[%d] configure prio tag egress rules\n", vport->vport);
1666
1667        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1668        if (!spec) {
1669                err = -ENOMEM;
1670                goto out_no_mem;
1671        }
1672
1673        /* prio tag vlan rule - pop it so VF receives untagged packets */
1674        MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
1675        MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag);
1676        MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
1677        MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, 0);
1678
1679        spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1680        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
1681                          MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1682        vport->egress.allowed_vlan =
1683                mlx5_add_flow_rules(vport->egress.acl, spec,
1684                                    &flow_act, NULL, 0);
1685        if (IS_ERR(vport->egress.allowed_vlan)) {
1686                err = PTR_ERR(vport->egress.allowed_vlan);
1687                esw_warn(esw->dev,
1688                         "vport[%d] configure egress pop prio tag vlan rule failed, err(%d)\n",
1689                         vport->vport, err);
1690                vport->egress.allowed_vlan = NULL;
1691                goto out;
1692        }
1693
1694out:
1695        kvfree(spec);
1696out_no_mem:
1697        if (err)
1698                esw_vport_cleanup_egress_rules(esw, vport);
1699        return err;
1700}
1701
1702static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports)
1703{
1704        struct mlx5_vport *vport = NULL;
1705        int i, j;
1706        int err;
1707
1708        mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) {
1709                err = esw_vport_ingress_prio_tag_config(esw, vport);
1710                if (err)
1711                        goto err_ingress;
1712                err = esw_vport_egress_prio_tag_config(esw, vport);
1713                if (err)
1714                        goto err_egress;
1715        }
1716
1717        return 0;
1718
1719err_egress:
1720        esw_vport_disable_ingress_acl(esw, vport);
1721err_ingress:
1722        mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) {
1723                esw_vport_disable_egress_acl(esw, vport);
1724                esw_vport_disable_ingress_acl(esw, vport);
1725        }
1726
1727        return err;
1728}
1729
1730static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw)
1731{
1732        struct mlx5_vport *vport;
1733        int i;
1734
1735        mlx5_esw_for_each_vf_vport(esw, i, vport, esw->dev->priv.sriov.num_vfs) {
1736                esw_vport_disable_egress_acl(esw, vport);
1737                esw_vport_disable_ingress_acl(esw, vport);
1738        }
1739}
1740
1741static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int vf_nvports,
1742                                      int nvports)
1743{
1744        int err;
1745
1746        memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
1747        mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
1748
1749        if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
1750                err = esw_prio_tag_acls_config(esw, vf_nvports);
1751                if (err)
1752                        return err;
1753        }
1754
1755        err = esw_create_offloads_fdb_tables(esw, nvports);
1756        if (err)
1757                return err;
1758
1759        err = esw_create_offloads_table(esw, nvports);
1760        if (err)
1761                goto create_ft_err;
1762
1763        err = esw_create_vport_rx_group(esw, nvports);
1764        if (err)
1765                goto create_fg_err;
1766
1767        return 0;
1768
1769create_fg_err:
1770        esw_destroy_offloads_table(esw);
1771
1772create_ft_err:
1773        esw_destroy_offloads_fdb_tables(esw);
1774
1775        return err;
1776}
1777
1778static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
1779{
1780        esw_destroy_vport_rx_group(esw);
1781        esw_destroy_offloads_table(esw);
1782        esw_destroy_offloads_fdb_tables(esw);
1783        if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
1784                esw_prio_tag_acls_cleanup(esw);
1785}
1786
1787static void esw_host_params_event_handler(struct work_struct *work)
1788{
1789        struct mlx5_host_work *host_work;
1790        struct mlx5_eswitch *esw;
1791        int err, num_vf = 0;
1792
1793        host_work = container_of(work, struct mlx5_host_work, work);
1794        esw = host_work->esw;
1795
1796        err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf);
1797        if (err || num_vf == esw->host_info.num_vfs)
1798                goto out;
1799
1800        /* Number of VFs can only change from "0 to x" or "x to 0". */
1801        if (esw->host_info.num_vfs > 0) {
1802                esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs);
1803        } else {
1804                err = esw_offloads_load_vf_reps(esw, num_vf);
1805
1806                if (err)
1807                        goto out;
1808        }
1809
1810        esw->host_info.num_vfs = num_vf;
1811
1812out:
1813        kfree(host_work);
1814}
1815
1816static int esw_host_params_event(struct notifier_block *nb,
1817                                 unsigned long type, void *data)
1818{
1819        struct mlx5_host_work *host_work;
1820        struct mlx5_host_info *host_info;
1821        struct mlx5_eswitch *esw;
1822
1823        host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC);
1824        if (!host_work)
1825                return NOTIFY_DONE;
1826
1827        host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb);
1828        esw = container_of(host_info, struct mlx5_eswitch, host_info);
1829
1830        host_work->esw = esw;
1831
1832        INIT_WORK(&host_work->work, esw_host_params_event_handler);
1833        queue_work(esw->work_queue, &host_work->work);
1834
1835        return NOTIFY_OK;
1836}
1837
1838int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
1839                      int total_nvports)
1840{
1841        int err;
1842
1843        err = esw_offloads_steering_init(esw, vf_nvports, total_nvports);
1844        if (err)
1845                return err;
1846
1847        err = esw_offloads_load_all_reps(esw, vf_nvports);
1848        if (err)
1849                goto err_reps;
1850
1851        esw_offloads_devcom_init(esw);
1852
1853        if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
1854                MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event,
1855                             HOST_PARAMS_CHANGE);
1856                mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb);
1857                esw->host_info.num_vfs = vf_nvports;
1858        }
1859
1860        mlx5_rdma_enable_roce(esw->dev);
1861
1862        return 0;
1863
1864err_reps:
1865        esw_offloads_steering_cleanup(esw);
1866        return err;
1867}
1868
1869static int esw_offloads_stop(struct mlx5_eswitch *esw,
1870                             struct netlink_ext_ack *extack)
1871{
1872        int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
1873
1874        mlx5_eswitch_disable_sriov(esw);
1875        err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
1876        if (err) {
1877                NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
1878                err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
1879                if (err1) {
1880                        NL_SET_ERR_MSG_MOD(extack,
1881                                           "Failed setting eswitch back to offloads");
1882                }
1883        }
1884
1885        return err;
1886}
1887
1888void esw_offloads_cleanup(struct mlx5_eswitch *esw)
1889{
1890        u16 num_vfs;
1891
1892        if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
1893                mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb);
1894                flush_workqueue(esw->work_queue);
1895                num_vfs = esw->host_info.num_vfs;
1896        } else {
1897                num_vfs = esw->dev->priv.sriov.num_vfs;
1898        }
1899
1900        mlx5_rdma_disable_roce(esw->dev);
1901        esw_offloads_devcom_cleanup(esw);
1902        esw_offloads_unload_all_reps(esw, num_vfs);
1903        esw_offloads_steering_cleanup(esw);
1904}
1905
1906static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
1907{
1908        switch (mode) {
1909        case DEVLINK_ESWITCH_MODE_LEGACY:
1910                *mlx5_mode = SRIOV_LEGACY;
1911                break;
1912        case DEVLINK_ESWITCH_MODE_SWITCHDEV:
1913                *mlx5_mode = SRIOV_OFFLOADS;
1914                break;
1915        default:
1916                return -EINVAL;
1917        }
1918
1919        return 0;
1920}
1921
1922static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
1923{
1924        switch (mlx5_mode) {
1925        case SRIOV_LEGACY:
1926                *mode = DEVLINK_ESWITCH_MODE_LEGACY;
1927                break;
1928        case SRIOV_OFFLOADS:
1929                *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
1930                break;
1931        default:
1932                return -EINVAL;
1933        }
1934
1935        return 0;
1936}
1937
1938static int esw_inline_mode_from_devlink(u8 mode, u8 *mlx5_mode)
1939{
1940        switch (mode) {
1941        case DEVLINK_ESWITCH_INLINE_MODE_NONE:
1942                *mlx5_mode = MLX5_INLINE_MODE_NONE;
1943                break;
1944        case DEVLINK_ESWITCH_INLINE_MODE_LINK:
1945                *mlx5_mode = MLX5_INLINE_MODE_L2;
1946                break;
1947        case DEVLINK_ESWITCH_INLINE_MODE_NETWORK:
1948                *mlx5_mode = MLX5_INLINE_MODE_IP;
1949                break;
1950        case DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT:
1951                *mlx5_mode = MLX5_INLINE_MODE_TCP_UDP;
1952                break;
1953        default:
1954                return -EINVAL;
1955        }
1956
1957        return 0;
1958}
1959
1960static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
1961{
1962        switch (mlx5_mode) {
1963        case MLX5_INLINE_MODE_NONE:
1964                *mode = DEVLINK_ESWITCH_INLINE_MODE_NONE;
1965                break;
1966        case MLX5_INLINE_MODE_L2:
1967                *mode = DEVLINK_ESWITCH_INLINE_MODE_LINK;
1968                break;
1969        case MLX5_INLINE_MODE_IP:
1970                *mode = DEVLINK_ESWITCH_INLINE_MODE_NETWORK;
1971                break;
1972        case MLX5_INLINE_MODE_TCP_UDP:
1973                *mode = DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT;
1974                break;
1975        default:
1976                return -EINVAL;
1977        }
1978
1979        return 0;
1980}
1981
1982static int mlx5_devlink_eswitch_check(struct devlink *devlink)
1983{
1984        struct mlx5_core_dev *dev = devlink_priv(devlink);
1985
1986        if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
1987                return -EOPNOTSUPP;
1988
1989        if(!MLX5_ESWITCH_MANAGER(dev))
1990                return -EPERM;
1991
1992        if (dev->priv.eswitch->mode == SRIOV_NONE &&
1993            !mlx5_core_is_ecpf_esw_manager(dev))
1994                return -EOPNOTSUPP;
1995
1996        return 0;
1997}
1998
1999int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
2000                                  struct netlink_ext_ack *extack)
2001{
2002        struct mlx5_core_dev *dev = devlink_priv(devlink);
2003        u16 cur_mlx5_mode, mlx5_mode = 0;
2004        int err;
2005
2006        err = mlx5_devlink_eswitch_check(devlink);
2007        if (err)
2008                return err;
2009
2010        cur_mlx5_mode = dev->priv.eswitch->mode;
2011
2012        if (esw_mode_from_devlink(mode, &mlx5_mode))
2013                return -EINVAL;
2014
2015        if (cur_mlx5_mode == mlx5_mode)
2016                return 0;
2017
2018        if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
2019                return esw_offloads_start(dev->priv.eswitch, extack);
2020        else if (mode == DEVLINK_ESWITCH_MODE_LEGACY)
2021                return esw_offloads_stop(dev->priv.eswitch, extack);
2022        else
2023                return -EINVAL;
2024}
2025
2026int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
2027{
2028        struct mlx5_core_dev *dev = devlink_priv(devlink);
2029        int err;
2030
2031        err = mlx5_devlink_eswitch_check(devlink);
2032        if (err)
2033                return err;
2034
2035        return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
2036}
2037
2038int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
2039                                         struct netlink_ext_ack *extack)
2040{
2041        struct mlx5_core_dev *dev = devlink_priv(devlink);
2042        struct mlx5_eswitch *esw = dev->priv.eswitch;
2043        int err, vport;
2044        u8 mlx5_mode;
2045
2046        err = mlx5_devlink_eswitch_check(devlink);
2047        if (err)
2048                return err;
2049
2050        switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
2051        case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
2052                if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE)
2053                        return 0;
2054                /* fall through */
2055        case MLX5_CAP_INLINE_MODE_L2:
2056                NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set");
2057                return -EOPNOTSUPP;
2058        case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
2059                break;
2060        }
2061
2062        if (esw->offloads.num_flows > 0) {
2063                NL_SET_ERR_MSG_MOD(extack,
2064                                   "Can't set inline mode when flows are configured");
2065                return -EOPNOTSUPP;
2066        }
2067
2068        err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
2069        if (err)
2070                goto out;
2071
2072        for (vport = 1; vport < esw->enabled_vports; vport++) {
2073                err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
2074                if (err) {
2075                        NL_SET_ERR_MSG_MOD(extack,
2076                                           "Failed to set min inline on vport");
2077                        goto revert_inline_mode;
2078                }
2079        }
2080
2081        esw->offloads.inline_mode = mlx5_mode;
2082        return 0;
2083
2084revert_inline_mode:
2085        while (--vport > 0)
2086                mlx5_modify_nic_vport_min_inline(dev,
2087                                                 vport,
2088                                                 esw->offloads.inline_mode);
2089out:
2090        return err;
2091}
2092
2093int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
2094{
2095        struct mlx5_core_dev *dev = devlink_priv(devlink);
2096        struct mlx5_eswitch *esw = dev->priv.eswitch;
2097        int err;
2098
2099        err = mlx5_devlink_eswitch_check(devlink);
2100        if (err)
2101                return err;
2102
2103        return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
2104}
2105
2106int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
2107{
2108        u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
2109        struct mlx5_core_dev *dev = esw->dev;
2110        int vport;
2111
2112        if (!MLX5_CAP_GEN(dev, vport_group_manager))
2113                return -EOPNOTSUPP;
2114
2115        if (esw->mode == SRIOV_NONE)
2116                return -EOPNOTSUPP;
2117
2118        switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
2119        case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
2120                mlx5_mode = MLX5_INLINE_MODE_NONE;
2121                goto out;
2122        case MLX5_CAP_INLINE_MODE_L2:
2123                mlx5_mode = MLX5_INLINE_MODE_L2;
2124                goto out;
2125        case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
2126                goto query_vports;
2127        }
2128
2129query_vports:
2130        for (vport = 1; vport <= nvfs; vport++) {
2131                mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
2132                if (vport > 1 && prev_mlx5_mode != mlx5_mode)
2133                        return -EINVAL;
2134                prev_mlx5_mode = mlx5_mode;
2135        }
2136
2137out:
2138        *mode = mlx5_mode;
2139        return 0;
2140}
2141
2142int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
2143                                        struct netlink_ext_ack *extack)
2144{
2145        struct mlx5_core_dev *dev = devlink_priv(devlink);
2146        struct mlx5_eswitch *esw = dev->priv.eswitch;
2147        int err;
2148
2149        err = mlx5_devlink_eswitch_check(devlink);
2150        if (err)
2151                return err;
2152
2153        if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
2154            (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) ||
2155             !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)))
2156                return -EOPNOTSUPP;
2157
2158        if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC)
2159                return -EOPNOTSUPP;
2160
2161        if (esw->mode == SRIOV_LEGACY) {
2162                esw->offloads.encap = encap;
2163                return 0;
2164        }
2165
2166        if (esw->offloads.encap == encap)
2167                return 0;
2168
2169        if (esw->offloads.num_flows > 0) {
2170                NL_SET_ERR_MSG_MOD(extack,
2171                                   "Can't set encapsulation when flows are configured");
2172                return -EOPNOTSUPP;
2173        }
2174
2175        esw_destroy_offloads_fdb_tables(esw);
2176
2177        esw->offloads.encap = encap;
2178
2179        err = esw_create_offloads_fdb_tables(esw, esw->nvports);
2180
2181        if (err) {
2182                NL_SET_ERR_MSG_MOD(extack,
2183                                   "Failed re-creating fast FDB table");
2184                esw->offloads.encap = !encap;
2185                (void)esw_create_offloads_fdb_tables(esw, esw->nvports);
2186        }
2187
2188        return err;
2189}
2190
2191int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
2192{
2193        struct mlx5_core_dev *dev = devlink_priv(devlink);
2194        struct mlx5_eswitch *esw = dev->priv.eswitch;
2195        int err;
2196
2197        err = mlx5_devlink_eswitch_check(devlink);
2198        if (err)
2199                return err;
2200
2201        *encap = esw->offloads.encap;
2202        return 0;
2203}
2204
2205void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
2206                                      struct mlx5_eswitch_rep_if *__rep_if,
2207                                      u8 rep_type)
2208{
2209        struct mlx5_eswitch_rep_if *rep_if;
2210        struct mlx5_eswitch_rep *rep;
2211        int i;
2212
2213        mlx5_esw_for_all_reps(esw, i, rep) {
2214                rep_if = &rep->rep_if[rep_type];
2215                rep_if->load   = __rep_if->load;
2216                rep_if->unload = __rep_if->unload;
2217                rep_if->get_proto_dev = __rep_if->get_proto_dev;
2218                rep_if->priv = __rep_if->priv;
2219
2220                atomic_set(&rep_if->state, REP_REGISTERED);
2221        }
2222}
2223EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
2224
2225void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
2226{
2227        u16 max_vf = mlx5_core_max_vfs(esw->dev);
2228        struct mlx5_eswitch_rep *rep;
2229        int i;
2230
2231        if (esw->mode == SRIOV_OFFLOADS)
2232                __unload_reps_all_vport(esw, max_vf, rep_type);
2233
2234        mlx5_esw_for_all_reps(esw, i, rep)
2235                atomic_set(&rep->rep_if[rep_type].state, REP_UNREGISTERED);
2236}
2237EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps);
2238
2239void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
2240{
2241        struct mlx5_eswitch_rep *rep;
2242
2243        rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
2244        return rep->rep_if[rep_type].priv;
2245}
2246
2247void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
2248                                 u16 vport,
2249                                 u8 rep_type)
2250{
2251        struct mlx5_eswitch_rep *rep;
2252
2253        rep = mlx5_eswitch_get_rep(esw, vport);
2254
2255        if (atomic_read(&rep->rep_if[rep_type].state) == REP_LOADED &&
2256            rep->rep_if[rep_type].get_proto_dev)
2257                return rep->rep_if[rep_type].get_proto_dev(rep);
2258        return NULL;
2259}
2260EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
2261
2262void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
2263{
2264        return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type);
2265}
2266EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
2267
2268struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
2269                                                u16 vport)
2270{
2271        return mlx5_eswitch_get_rep(esw, vport);
2272}
2273EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
2274