1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <linux/netdevice.h>
34#include <linux/mlx5/driver.h>
35#include <linux/mlx5/eswitch.h>
36#include <linux/mlx5/vport.h>
37#include "lib/devcom.h"
38#include "mlx5_core.h"
39#include "eswitch.h"
40#include "lag.h"
41#include "lag_mp.h"
42
43
44
45
46
47static DEFINE_SPINLOCK(lag_lock);
48
49static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
50 u8 remap_port2, bool shared_fdb)
51{
52 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
53 void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
54
55 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
56
57 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
58 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
59 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
60
61 return mlx5_cmd_exec_in(dev, create_lag, in);
62}
63
64static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
65 u8 remap_port2)
66{
67 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
68 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
69
70 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
71 MLX5_SET(modify_lag_in, in, field_select, 0x1);
72
73 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
74 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
75
76 return mlx5_cmd_exec_in(dev, modify_lag, in);
77}
78
79int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
80{
81 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
82
83 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
84
85 return mlx5_cmd_exec_in(dev, create_vport_lag, in);
86}
87EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
88
89int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
90{
91 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
92
93 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
94
95 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
96}
97EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
98
99static int mlx5_lag_netdev_event(struct notifier_block *this,
100 unsigned long event, void *ptr);
101static void mlx5_do_bond_work(struct work_struct *work);
102
103static void mlx5_ldev_free(struct kref *ref)
104{
105 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
106
107 if (ldev->nb.notifier_call)
108 unregister_netdevice_notifier_net(&init_net, &ldev->nb);
109 mlx5_lag_mp_cleanup(ldev);
110 cancel_delayed_work_sync(&ldev->bond_work);
111 destroy_workqueue(ldev->wq);
112 kfree(ldev);
113}
114
115static void mlx5_ldev_put(struct mlx5_lag *ldev)
116{
117 kref_put(&ldev->ref, mlx5_ldev_free);
118}
119
120static void mlx5_ldev_get(struct mlx5_lag *ldev)
121{
122 kref_get(&ldev->ref);
123}
124
125static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
126{
127 struct mlx5_lag *ldev;
128 int err;
129
130 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
131 if (!ldev)
132 return NULL;
133
134 ldev->wq = create_singlethread_workqueue("mlx5_lag");
135 if (!ldev->wq) {
136 kfree(ldev);
137 return NULL;
138 }
139
140 kref_init(&ldev->ref);
141 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
142
143 ldev->nb.notifier_call = mlx5_lag_netdev_event;
144 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
145 ldev->nb.notifier_call = NULL;
146 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
147 }
148
149 err = mlx5_lag_mp_init(ldev);
150 if (err)
151 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
152 err);
153
154 return ldev;
155}
156
157int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
158 struct net_device *ndev)
159{
160 int i;
161
162 for (i = 0; i < MLX5_MAX_PORTS; i++)
163 if (ldev->pf[i].netdev == ndev)
164 return i;
165
166 return -ENOENT;
167}
168
169static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
170{
171 return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
172}
173
174static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
175{
176 return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
177}
178
179static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
180 u8 *port1, u8 *port2)
181{
182 bool p1en;
183 bool p2en;
184
185 p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled &&
186 tracker->netdev_state[MLX5_LAG_P1].link_up;
187
188 p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled &&
189 tracker->netdev_state[MLX5_LAG_P2].link_up;
190
191 *port1 = 1;
192 *port2 = 2;
193 if ((!p1en && !p2en) || (p1en && p2en))
194 return;
195
196 if (p1en)
197 *port2 = 1;
198 else
199 *port1 = 2;
200}
201
202void mlx5_modify_lag(struct mlx5_lag *ldev,
203 struct lag_tracker *tracker)
204{
205 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
206 u8 v2p_port1, v2p_port2;
207 int err;
208
209 mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
210 &v2p_port2);
211
212 if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
213 v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
214 ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
215 ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
216
217 mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
218 ldev->v2p_map[MLX5_LAG_P1],
219 ldev->v2p_map[MLX5_LAG_P2]);
220
221 err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
222 if (err)
223 mlx5_core_err(dev0,
224 "Failed to modify LAG (%d)\n",
225 err);
226 }
227}
228
229static int mlx5_create_lag(struct mlx5_lag *ldev,
230 struct lag_tracker *tracker,
231 bool shared_fdb)
232{
233 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
234 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
235 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
236 int err;
237
238 mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
239 &ldev->v2p_map[MLX5_LAG_P2]);
240
241 mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
242 ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
243 shared_fdb);
244
245 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
246 ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
247 if (err) {
248 mlx5_core_err(dev0,
249 "Failed to create LAG (%d)\n",
250 err);
251 return err;
252 }
253
254 if (shared_fdb) {
255 err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
256 dev1->priv.eswitch);
257 if (err)
258 mlx5_core_err(dev0, "Can't enable single FDB mode\n");
259 else
260 mlx5_core_info(dev0, "Operation mode is single FDB\n");
261 }
262
263 if (err) {
264 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
265 if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
266 mlx5_core_err(dev0,
267 "Failed to deactivate RoCE LAG; driver restart required\n");
268 }
269
270 return err;
271}
272
273int mlx5_activate_lag(struct mlx5_lag *ldev,
274 struct lag_tracker *tracker,
275 u8 flags,
276 bool shared_fdb)
277{
278 bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
279 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
280 int err;
281
282 err = mlx5_create_lag(ldev, tracker, shared_fdb);
283 if (err) {
284 if (roce_lag) {
285 mlx5_core_err(dev0,
286 "Failed to activate RoCE LAG\n");
287 } else {
288 mlx5_core_err(dev0,
289 "Failed to activate VF LAG\n"
290 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
291 }
292 return err;
293 }
294
295 ldev->flags |= flags;
296 ldev->shared_fdb = shared_fdb;
297 return 0;
298}
299
300static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
301{
302 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
303 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
304 bool roce_lag = __mlx5_lag_is_roce(ldev);
305 int err;
306
307 ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
308 mlx5_lag_mp_reset(ldev);
309
310 if (ldev->shared_fdb) {
311 mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
312 ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
313 ldev->shared_fdb = false;
314 }
315
316 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
317 err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
318 if (err) {
319 if (roce_lag) {
320 mlx5_core_err(dev0,
321 "Failed to deactivate RoCE LAG; driver restart required\n");
322 } else {
323 mlx5_core_err(dev0,
324 "Failed to deactivate VF LAG; driver restart required\n"
325 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
326 }
327 }
328
329 return err;
330}
331
332static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
333{
334 if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
335 return false;
336
337#ifdef CONFIG_MLX5_ESWITCH
338 return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
339 ldev->pf[MLX5_LAG_P2].dev);
340#else
341 return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
342 !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
343#endif
344}
345
346static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
347{
348 int i;
349
350 for (i = 0; i < MLX5_MAX_PORTS; i++) {
351 if (!ldev->pf[i].dev)
352 continue;
353
354 if (ldev->pf[i].dev->priv.flags &
355 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
356 continue;
357
358 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
359 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
360 }
361}
362
363static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
364{
365 int i;
366
367 for (i = 0; i < MLX5_MAX_PORTS; i++) {
368 if (!ldev->pf[i].dev)
369 continue;
370
371 if (ldev->pf[i].dev->priv.flags &
372 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
373 continue;
374
375 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
376 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
377 }
378}
379
380static void mlx5_disable_lag(struct mlx5_lag *ldev)
381{
382 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
383 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
384 bool shared_fdb = ldev->shared_fdb;
385 bool roce_lag;
386 int err;
387
388 roce_lag = __mlx5_lag_is_roce(ldev);
389
390 if (shared_fdb) {
391 mlx5_lag_remove_devices(ldev);
392 } else if (roce_lag) {
393 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
394 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
395 mlx5_rescan_drivers_locked(dev0);
396 }
397 mlx5_nic_vport_disable_roce(dev1);
398 }
399
400 err = mlx5_deactivate_lag(ldev);
401 if (err)
402 return;
403
404 if (shared_fdb || roce_lag)
405 mlx5_lag_add_devices(ldev);
406
407 if (shared_fdb) {
408 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
409 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
410 if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
411 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
412 }
413}
414
415static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
416{
417 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
418 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
419
420 if (is_mdev_switchdev_mode(dev0) &&
421 is_mdev_switchdev_mode(dev1) &&
422 mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
423 mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
424 mlx5_devcom_is_paired(dev0->priv.devcom,
425 MLX5_DEVCOM_ESW_OFFLOADS) &&
426 MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
427 MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
428 MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
429 return true;
430
431 return false;
432}
433
434static void mlx5_do_bond(struct mlx5_lag *ldev)
435{
436 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
437 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
438 struct lag_tracker tracker;
439 bool do_bond, roce_lag;
440 int err;
441
442 if (!mlx5_lag_is_ready(ldev)) {
443 do_bond = false;
444 } else {
445
446 if (mlx5_lag_is_multipath(dev0))
447 return;
448
449 tracker = ldev->tracker;
450
451 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
452 }
453
454 if (do_bond && !__mlx5_lag_is_active(ldev)) {
455 bool shared_fdb = mlx5_shared_fdb_supported(ldev);
456
457 roce_lag = !mlx5_sriov_is_enabled(dev0) &&
458 !mlx5_sriov_is_enabled(dev1);
459
460#ifdef CONFIG_MLX5_ESWITCH
461 roce_lag = roce_lag &&
462 dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
463 dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
464#endif
465
466 if (shared_fdb || roce_lag)
467 mlx5_lag_remove_devices(ldev);
468
469 err = mlx5_activate_lag(ldev, &tracker,
470 roce_lag ? MLX5_LAG_FLAG_ROCE :
471 MLX5_LAG_FLAG_SRIOV,
472 shared_fdb);
473 if (err) {
474 if (shared_fdb || roce_lag)
475 mlx5_lag_add_devices(ldev);
476
477 return;
478 } else if (roce_lag) {
479 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
480 mlx5_rescan_drivers_locked(dev0);
481 mlx5_nic_vport_enable_roce(dev1);
482 } else if (shared_fdb) {
483 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
484 mlx5_rescan_drivers_locked(dev0);
485
486 err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
487 if (!err)
488 err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
489
490 if (err) {
491 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
492 mlx5_rescan_drivers_locked(dev0);
493 mlx5_deactivate_lag(ldev);
494 mlx5_lag_add_devices(ldev);
495 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
496 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
497 mlx5_core_err(dev0, "Failed to enable lag\n");
498 return;
499 }
500 }
501 } else if (do_bond && __mlx5_lag_is_active(ldev)) {
502 mlx5_modify_lag(ldev, &tracker);
503 } else if (!do_bond && __mlx5_lag_is_active(ldev)) {
504 mlx5_disable_lag(ldev);
505 }
506}
507
508static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
509{
510 queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
511}
512
513static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
514 struct mlx5_core_dev *dev1)
515{
516 if (dev0)
517 mlx5_esw_lock(dev0->priv.eswitch);
518 if (dev1)
519 mlx5_esw_lock(dev1->priv.eswitch);
520}
521
522static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
523 struct mlx5_core_dev *dev1)
524{
525 if (dev1)
526 mlx5_esw_unlock(dev1->priv.eswitch);
527 if (dev0)
528 mlx5_esw_unlock(dev0->priv.eswitch);
529}
530
531static void mlx5_do_bond_work(struct work_struct *work)
532{
533 struct delayed_work *delayed_work = to_delayed_work(work);
534 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
535 bond_work);
536 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
537 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
538 int status;
539
540 status = mlx5_dev_list_trylock();
541 if (!status) {
542 mlx5_queue_bond_work(ldev, HZ);
543 return;
544 }
545
546 if (ldev->mode_changes_in_progress) {
547 mlx5_dev_list_unlock();
548 mlx5_queue_bond_work(ldev, HZ);
549 return;
550 }
551
552 mlx5_lag_lock_eswitches(dev0, dev1);
553 mlx5_do_bond(ldev);
554 mlx5_lag_unlock_eswitches(dev0, dev1);
555 mlx5_dev_list_unlock();
556}
557
558static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
559 struct lag_tracker *tracker,
560 struct net_device *ndev,
561 struct netdev_notifier_changeupper_info *info)
562{
563 struct net_device *upper = info->upper_dev, *ndev_tmp;
564 struct netdev_lag_upper_info *lag_upper_info = NULL;
565 bool is_bonded, is_in_lag, mode_supported;
566 int bond_status = 0;
567 int num_slaves = 0;
568 int idx;
569
570 if (!netif_is_lag_master(upper))
571 return 0;
572
573 if (info->linking)
574 lag_upper_info = info->upper_info;
575
576
577
578
579
580
581 rcu_read_lock();
582 for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
583 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
584 if (idx >= 0)
585 bond_status |= (1 << idx);
586
587 num_slaves++;
588 }
589 rcu_read_unlock();
590
591
592 if (!(bond_status & 0x3))
593 return 0;
594
595 if (lag_upper_info)
596 tracker->tx_type = lag_upper_info->tx_type;
597
598
599
600
601
602 is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
603
604 if (!mlx5_lag_is_ready(ldev) && is_in_lag) {
605 NL_SET_ERR_MSG_MOD(info->info.extack,
606 "Can't activate LAG offload, PF is configured with more than 64 VFs");
607 return 0;
608 }
609
610
611 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
612 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
613
614 if (is_in_lag && !mode_supported)
615 NL_SET_ERR_MSG_MOD(info->info.extack,
616 "Can't activate LAG offload, TX type isn't supported");
617
618 is_bonded = is_in_lag && mode_supported;
619 if (tracker->is_bonded != is_bonded) {
620 tracker->is_bonded = is_bonded;
621 return 1;
622 }
623
624 return 0;
625}
626
627static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
628 struct lag_tracker *tracker,
629 struct net_device *ndev,
630 struct netdev_notifier_changelowerstate_info *info)
631{
632 struct netdev_lag_lower_state_info *lag_lower_info;
633 int idx;
634
635 if (!netif_is_lag_port(ndev))
636 return 0;
637
638 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
639 if (idx < 0)
640 return 0;
641
642
643
644
645 lag_lower_info = info->lower_state_info;
646 if (!lag_lower_info)
647 return 0;
648
649 tracker->netdev_state[idx] = *lag_lower_info;
650
651 return 1;
652}
653
654static int mlx5_lag_netdev_event(struct notifier_block *this,
655 unsigned long event, void *ptr)
656{
657 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
658 struct lag_tracker tracker;
659 struct mlx5_lag *ldev;
660 int changed = 0;
661
662 if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
663 return NOTIFY_DONE;
664
665 ldev = container_of(this, struct mlx5_lag, nb);
666
667 if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE)
668 return NOTIFY_DONE;
669
670 tracker = ldev->tracker;
671
672 switch (event) {
673 case NETDEV_CHANGEUPPER:
674 changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
675 ptr);
676 break;
677 case NETDEV_CHANGELOWERSTATE:
678 changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
679 ndev, ptr);
680 break;
681 }
682
683 ldev->tracker = tracker;
684
685 if (changed)
686 mlx5_queue_bond_work(ldev, 0);
687
688 return NOTIFY_DONE;
689}
690
691static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
692 struct mlx5_core_dev *dev,
693 struct net_device *netdev)
694{
695 unsigned int fn = PCI_FUNC(dev->pdev->devfn);
696
697 if (fn >= MLX5_MAX_PORTS)
698 return;
699
700 spin_lock(&lag_lock);
701 ldev->pf[fn].netdev = netdev;
702 ldev->tracker.netdev_state[fn].link_up = 0;
703 ldev->tracker.netdev_state[fn].tx_enabled = 0;
704 spin_unlock(&lag_lock);
705}
706
707static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
708 struct net_device *netdev)
709{
710 int i;
711
712 spin_lock(&lag_lock);
713 for (i = 0; i < MLX5_MAX_PORTS; i++) {
714 if (ldev->pf[i].netdev == netdev) {
715 ldev->pf[i].netdev = NULL;
716 break;
717 }
718 }
719 spin_unlock(&lag_lock);
720}
721
722static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
723 struct mlx5_core_dev *dev)
724{
725 unsigned int fn = PCI_FUNC(dev->pdev->devfn);
726
727 if (fn >= MLX5_MAX_PORTS)
728 return;
729
730 ldev->pf[fn].dev = dev;
731 dev->priv.lag = ldev;
732}
733
734
735static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
736 struct mlx5_core_dev *dev)
737{
738 int i;
739
740 for (i = 0; i < MLX5_MAX_PORTS; i++)
741 if (ldev->pf[i].dev == dev)
742 break;
743
744 if (i == MLX5_MAX_PORTS)
745 return;
746
747 ldev->pf[i].dev = NULL;
748 dev->priv.lag = NULL;
749}
750
751
752static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
753{
754 struct mlx5_lag *ldev = NULL;
755 struct mlx5_core_dev *tmp_dev;
756
757 if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
758 !MLX5_CAP_GEN(dev, lag_master) ||
759 MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
760 return 0;
761
762 tmp_dev = mlx5_get_next_phys_dev(dev);
763 if (tmp_dev)
764 ldev = tmp_dev->priv.lag;
765
766 if (!ldev) {
767 ldev = mlx5_lag_dev_alloc(dev);
768 if (!ldev) {
769 mlx5_core_err(dev, "Failed to alloc lag dev\n");
770 return 0;
771 }
772 } else {
773 if (ldev->mode_changes_in_progress)
774 return -EAGAIN;
775 mlx5_ldev_get(ldev);
776 }
777
778 mlx5_ldev_add_mdev(ldev, dev);
779
780 return 0;
781}
782
783void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
784{
785 struct mlx5_lag *ldev;
786
787 ldev = mlx5_lag_dev(dev);
788 if (!ldev)
789 return;
790
791recheck:
792 mlx5_dev_list_lock();
793 if (ldev->mode_changes_in_progress) {
794 mlx5_dev_list_unlock();
795 msleep(100);
796 goto recheck;
797 }
798 mlx5_ldev_remove_mdev(ldev, dev);
799 mlx5_dev_list_unlock();
800 mlx5_ldev_put(ldev);
801}
802
803void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
804{
805 int err;
806
807recheck:
808 mlx5_dev_list_lock();
809 err = __mlx5_lag_dev_add_mdev(dev);
810 if (err) {
811 mlx5_dev_list_unlock();
812 msleep(100);
813 goto recheck;
814 }
815 mlx5_dev_list_unlock();
816}
817
818
819void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
820 struct net_device *netdev)
821{
822 struct mlx5_lag *ldev;
823
824 ldev = mlx5_lag_dev(dev);
825 if (!ldev)
826 return;
827
828 mlx5_ldev_remove_netdev(ldev, netdev);
829 ldev->flags &= ~MLX5_LAG_FLAG_READY;
830
831 if (__mlx5_lag_is_active(ldev))
832 mlx5_queue_bond_work(ldev, 0);
833}
834
835
836void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
837 struct net_device *netdev)
838{
839 struct mlx5_lag *ldev;
840 int i;
841
842 ldev = mlx5_lag_dev(dev);
843 if (!ldev)
844 return;
845
846 mlx5_ldev_add_netdev(ldev, dev, netdev);
847
848 for (i = 0; i < MLX5_MAX_PORTS; i++)
849 if (!ldev->pf[i].dev)
850 break;
851
852 if (i >= MLX5_MAX_PORTS)
853 ldev->flags |= MLX5_LAG_FLAG_READY;
854 mlx5_queue_bond_work(ldev, 0);
855}
856
857bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
858{
859 struct mlx5_lag *ldev;
860 bool res;
861
862 spin_lock(&lag_lock);
863 ldev = mlx5_lag_dev(dev);
864 res = ldev && __mlx5_lag_is_roce(ldev);
865 spin_unlock(&lag_lock);
866
867 return res;
868}
869EXPORT_SYMBOL(mlx5_lag_is_roce);
870
871bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
872{
873 struct mlx5_lag *ldev;
874 bool res;
875
876 spin_lock(&lag_lock);
877 ldev = mlx5_lag_dev(dev);
878 res = ldev && __mlx5_lag_is_active(ldev);
879 spin_unlock(&lag_lock);
880
881 return res;
882}
883EXPORT_SYMBOL(mlx5_lag_is_active);
884
885bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
886{
887 struct mlx5_lag *ldev;
888 bool res;
889
890 spin_lock(&lag_lock);
891 ldev = mlx5_lag_dev(dev);
892 res = ldev && __mlx5_lag_is_active(ldev) &&
893 dev == ldev->pf[MLX5_LAG_P1].dev;
894 spin_unlock(&lag_lock);
895
896 return res;
897}
898EXPORT_SYMBOL(mlx5_lag_is_master);
899
900bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
901{
902 struct mlx5_lag *ldev;
903 bool res;
904
905 spin_lock(&lag_lock);
906 ldev = mlx5_lag_dev(dev);
907 res = ldev && __mlx5_lag_is_sriov(ldev);
908 spin_unlock(&lag_lock);
909
910 return res;
911}
912EXPORT_SYMBOL(mlx5_lag_is_sriov);
913
914bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
915{
916 struct mlx5_lag *ldev;
917 bool res;
918
919 spin_lock(&lag_lock);
920 ldev = mlx5_lag_dev(dev);
921 res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
922 spin_unlock(&lag_lock);
923
924 return res;
925}
926EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
927
928void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
929{
930 struct mlx5_core_dev *dev0;
931 struct mlx5_core_dev *dev1;
932 struct mlx5_lag *ldev;
933
934 ldev = mlx5_lag_dev(dev);
935 if (!ldev)
936 return;
937
938 mlx5_dev_list_lock();
939
940 dev0 = ldev->pf[MLX5_LAG_P1].dev;
941 dev1 = ldev->pf[MLX5_LAG_P2].dev;
942
943 ldev->mode_changes_in_progress++;
944 if (__mlx5_lag_is_active(ldev)) {
945 mlx5_lag_lock_eswitches(dev0, dev1);
946 mlx5_disable_lag(ldev);
947 mlx5_lag_unlock_eswitches(dev0, dev1);
948 }
949 mlx5_dev_list_unlock();
950}
951
952void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
953{
954 struct mlx5_lag *ldev;
955
956 ldev = mlx5_lag_dev(dev);
957 if (!ldev)
958 return;
959
960 mlx5_dev_list_lock();
961 ldev->mode_changes_in_progress--;
962 mlx5_dev_list_unlock();
963 mlx5_queue_bond_work(ldev, 0);
964}
965
966struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
967{
968 struct net_device *ndev = NULL;
969 struct mlx5_lag *ldev;
970
971 spin_lock(&lag_lock);
972 ldev = mlx5_lag_dev(dev);
973
974 if (!(ldev && __mlx5_lag_is_roce(ldev)))
975 goto unlock;
976
977 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
978 ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
979 ldev->pf[MLX5_LAG_P1].netdev :
980 ldev->pf[MLX5_LAG_P2].netdev;
981 } else {
982 ndev = ldev->pf[MLX5_LAG_P1].netdev;
983 }
984 if (ndev)
985 dev_hold(ndev);
986
987unlock:
988 spin_unlock(&lag_lock);
989
990 return ndev;
991}
992EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
993
994u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
995 struct net_device *slave)
996{
997 struct mlx5_lag *ldev;
998 u8 port = 0;
999
1000 spin_lock(&lag_lock);
1001 ldev = mlx5_lag_dev(dev);
1002 if (!(ldev && __mlx5_lag_is_roce(ldev)))
1003 goto unlock;
1004
1005 if (ldev->pf[MLX5_LAG_P1].netdev == slave)
1006 port = MLX5_LAG_P1;
1007 else
1008 port = MLX5_LAG_P2;
1009
1010 port = ldev->v2p_map[port];
1011
1012unlock:
1013 spin_unlock(&lag_lock);
1014 return port;
1015}
1016EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1017
1018struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
1019{
1020 struct mlx5_core_dev *peer_dev = NULL;
1021 struct mlx5_lag *ldev;
1022
1023 spin_lock(&lag_lock);
1024 ldev = mlx5_lag_dev(dev);
1025 if (!ldev)
1026 goto unlock;
1027
1028 peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
1029 ldev->pf[MLX5_LAG_P2].dev :
1030 ldev->pf[MLX5_LAG_P1].dev;
1031
1032unlock:
1033 spin_unlock(&lag_lock);
1034 return peer_dev;
1035}
1036EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
1037
1038int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1039 u64 *values,
1040 int num_counters,
1041 size_t *offsets)
1042{
1043 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1044 struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
1045 struct mlx5_lag *ldev;
1046 int num_ports;
1047 int ret, i, j;
1048 void *out;
1049
1050 out = kvzalloc(outlen, GFP_KERNEL);
1051 if (!out)
1052 return -ENOMEM;
1053
1054 memset(values, 0, sizeof(*values) * num_counters);
1055
1056 spin_lock(&lag_lock);
1057 ldev = mlx5_lag_dev(dev);
1058 if (ldev && __mlx5_lag_is_active(ldev)) {
1059 num_ports = MLX5_MAX_PORTS;
1060 mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
1061 mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
1062 } else {
1063 num_ports = 1;
1064 mdev[MLX5_LAG_P1] = dev;
1065 }
1066 spin_unlock(&lag_lock);
1067
1068 for (i = 0; i < num_ports; ++i) {
1069 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1070
1071 MLX5_SET(query_cong_statistics_in, in, opcode,
1072 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1073 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1074 out);
1075 if (ret)
1076 goto free;
1077
1078 for (j = 0; j < num_counters; ++j)
1079 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1080 }
1081
1082free:
1083 kvfree(out);
1084 return ret;
1085}
1086EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
1087