1
2
3
4#include <linux/netdevice.h>
5#include <net/nexthop.h>
6#include "lag.h"
7#include "lag_mp.h"
8#include "mlx5_core.h"
9#include "eswitch.h"
10#include "lib/mlx5.h"
11
12static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
13{
14 return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
15}
16
17static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
18{
19 if (!mlx5_lag_is_ready(ldev))
20 return false;
21
22 if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
23 return false;
24
25 return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
26 ldev->pf[MLX5_LAG_P2].dev);
27}
28
29bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
30{
31 struct mlx5_lag *ldev;
32 bool res;
33
34 ldev = mlx5_lag_dev(dev);
35 res = ldev && __mlx5_lag_is_multipath(ldev);
36
37 return res;
38}
39
40
41
42
43
44
45
46
47
48
49
50static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
51 enum mlx5_lag_port_affinity port)
52{
53 struct lag_tracker tracker;
54
55 if (!__mlx5_lag_is_multipath(ldev))
56 return;
57
58 switch (port) {
59 case MLX5_LAG_NORMAL_AFFINITY:
60 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
61 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
62 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
63 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
64 break;
65 case MLX5_LAG_P1_AFFINITY:
66 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
67 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
68 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
69 tracker.netdev_state[MLX5_LAG_P2].link_up = false;
70 break;
71 case MLX5_LAG_P2_AFFINITY:
72 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
73 tracker.netdev_state[MLX5_LAG_P1].link_up = false;
74 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
75 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
76 break;
77 default:
78 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
79 "Invalid affinity port %d", port);
80 return;
81 }
82
83 if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
84 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
85 MLX5_DEV_EVENT_PORT_AFFINITY,
86 (void *)0);
87
88 if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
89 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
90 MLX5_DEV_EVENT_PORT_AFFINITY,
91 (void *)0);
92
93 mlx5_modify_lag(ldev, &tracker);
94}
95
96static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
97{
98 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
99
100 flush_workqueue(mp->wq);
101}
102
103struct mlx5_fib_event_work {
104 struct work_struct work;
105 struct mlx5_lag *ldev;
106 unsigned long event;
107 union {
108 struct fib_entry_notifier_info fen_info;
109 struct fib_nh_notifier_info fnh_info;
110 };
111};
112
113static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
114 unsigned long event,
115 struct fib_info *fi)
116{
117 struct lag_mp *mp = &ldev->lag_mp;
118 struct fib_nh *fib_nh0, *fib_nh1;
119 unsigned int nhs;
120
121
122 if (event == FIB_EVENT_ENTRY_DEL) {
123
124 if (mp->mfi == fi)
125 mp->mfi = NULL;
126 return;
127 }
128
129
130 nhs = fib_info_num_path(fi);
131 if (nhs == 1) {
132 if (__mlx5_lag_is_active(ldev)) {
133 struct fib_nh *nh = fib_info_nh(fi, 0);
134 struct net_device *nh_dev = nh->fib_nh_dev;
135 int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
136
137 if (i < 0)
138 i = MLX5_LAG_NORMAL_AFFINITY;
139 else
140 ++i;
141
142 mlx5_lag_set_port_affinity(ldev, i);
143 }
144 return;
145 }
146
147 if (nhs != 2)
148 return;
149
150
151 fib_nh0 = fib_info_nh(fi, 0);
152 fib_nh1 = fib_info_nh(fi, 1);
153 if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
154 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
155 !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
156 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
157 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
158 "Multipath offload require two ports of the same HCA\n");
159 return;
160 }
161
162
163 if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
164 struct lag_tracker tracker;
165
166 tracker = ldev->tracker;
167 mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false);
168 }
169
170 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
171 mp->mfi = fi;
172}
173
174static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
175 unsigned long event,
176 struct fib_nh *fib_nh,
177 struct fib_info *fi)
178{
179 struct lag_mp *mp = &ldev->lag_mp;
180
181
182 if (!mp->mfi || mp->mfi != fi)
183 return;
184
185
186 if (event == FIB_EVENT_NH_DEL) {
187 int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
188
189 if (i >= 0) {
190 i = (i + 1) % 2 + 1;
191 mlx5_lag_set_port_affinity(ldev, i);
192 }
193 } else if (event == FIB_EVENT_NH_ADD &&
194 fib_info_num_path(fi) == 2) {
195 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
196 }
197}
198
199static void mlx5_lag_fib_update(struct work_struct *work)
200{
201 struct mlx5_fib_event_work *fib_work =
202 container_of(work, struct mlx5_fib_event_work, work);
203 struct mlx5_lag *ldev = fib_work->ldev;
204 struct fib_nh *fib_nh;
205
206
207 rtnl_lock();
208 switch (fib_work->event) {
209 case FIB_EVENT_ENTRY_REPLACE:
210 case FIB_EVENT_ENTRY_DEL:
211 mlx5_lag_fib_route_event(ldev, fib_work->event,
212 fib_work->fen_info.fi);
213 fib_info_put(fib_work->fen_info.fi);
214 break;
215 case FIB_EVENT_NH_ADD:
216 case FIB_EVENT_NH_DEL:
217 fib_nh = fib_work->fnh_info.fib_nh;
218 mlx5_lag_fib_nexthop_event(ldev,
219 fib_work->event,
220 fib_work->fnh_info.fib_nh,
221 fib_nh->nh_parent);
222 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
223 break;
224 }
225
226 rtnl_unlock();
227 kfree(fib_work);
228}
229
230static struct mlx5_fib_event_work *
231mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
232{
233 struct mlx5_fib_event_work *fib_work;
234
235 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
236 if (WARN_ON(!fib_work))
237 return NULL;
238
239 INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
240 fib_work->ldev = ldev;
241 fib_work->event = event;
242
243 return fib_work;
244}
245
246static int mlx5_lag_fib_event(struct notifier_block *nb,
247 unsigned long event,
248 void *ptr)
249{
250 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
251 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
252 struct fib_notifier_info *info = ptr;
253 struct mlx5_fib_event_work *fib_work;
254 struct fib_entry_notifier_info *fen_info;
255 struct fib_nh_notifier_info *fnh_info;
256 struct net_device *fib_dev;
257 struct fib_info *fi;
258
259 if (info->family != AF_INET)
260 return NOTIFY_DONE;
261
262 if (!mlx5_lag_multipath_check_prereq(ldev))
263 return NOTIFY_DONE;
264
265 switch (event) {
266 case FIB_EVENT_ENTRY_REPLACE:
267 case FIB_EVENT_ENTRY_DEL:
268 fen_info = container_of(info, struct fib_entry_notifier_info,
269 info);
270 fi = fen_info->fi;
271 if (fi->nh) {
272 NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
273 return notifier_from_errno(-EINVAL);
274 }
275 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
276 if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
277 fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
278 return NOTIFY_DONE;
279 }
280 fib_work = mlx5_lag_init_fib_work(ldev, event);
281 if (!fib_work)
282 return NOTIFY_DONE;
283 fib_work->fen_info = *fen_info;
284
285
286
287 fib_info_hold(fib_work->fen_info.fi);
288 break;
289 case FIB_EVENT_NH_ADD:
290 case FIB_EVENT_NH_DEL:
291 fnh_info = container_of(info, struct fib_nh_notifier_info,
292 info);
293 fib_work = mlx5_lag_init_fib_work(ldev, event);
294 if (!fib_work)
295 return NOTIFY_DONE;
296 fib_work->fnh_info = *fnh_info;
297 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
298 break;
299 default:
300 return NOTIFY_DONE;
301 }
302
303 queue_work(mp->wq, &fib_work->work);
304
305 return NOTIFY_DONE;
306}
307
308void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
309{
310
311
312
313 ldev->lag_mp.mfi = NULL;
314}
315
316int mlx5_lag_mp_init(struct mlx5_lag *ldev)
317{
318 struct lag_mp *mp = &ldev->lag_mp;
319 int err;
320
321
322
323
324 mp->mfi = NULL;
325
326 if (mp->fib_nb.notifier_call)
327 return 0;
328
329 mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
330 if (!mp->wq)
331 return -ENOMEM;
332
333 mp->fib_nb.notifier_call = mlx5_lag_fib_event;
334 err = register_fib_notifier(&init_net, &mp->fib_nb,
335 mlx5_lag_fib_event_flush, NULL);
336 if (err) {
337 destroy_workqueue(mp->wq);
338 mp->fib_nb.notifier_call = NULL;
339 }
340
341 return err;
342}
343
344void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
345{
346 struct lag_mp *mp = &ldev->lag_mp;
347
348 if (!mp->fib_nb.notifier_call)
349 return;
350
351 unregister_fib_notifier(&init_net, &mp->fib_nb);
352 destroy_workqueue(mp->wq);
353 mp->fib_nb.notifier_call = NULL;
354 mp->mfi = NULL;
355}
356