1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36#include <linux/mutex.h>
37#include <linux/inetdevice.h>
38#include <linux/slab.h>
39#include <linux/workqueue.h>
40#include <linux/module.h>
41#include <net/arp.h>
42#include <net/neighbour.h>
43#include <net/route.h>
44#include <net/netevent.h>
45#include <net/addrconf.h>
46#include <net/ip6_route.h>
47#include <rdma/ib_addr.h>
48
49MODULE_AUTHOR("Sean Hefty");
50MODULE_DESCRIPTION("IB Address Translation");
51MODULE_LICENSE("Dual BSD/GPL");
52
53struct addr_req {
54 struct list_head list;
55 struct sockaddr_storage src_addr;
56 struct sockaddr_storage dst_addr;
57 struct rdma_dev_addr *addr;
58 struct rdma_addr_client *client;
59 void *context;
60 void (*callback)(int status, struct sockaddr *src_addr,
61 struct rdma_dev_addr *addr, void *context);
62 unsigned long timeout;
63 int status;
64};
65
66static void process_req(struct work_struct *work);
67
68static DEFINE_MUTEX(lock);
69static LIST_HEAD(req_list);
70static DECLARE_DELAYED_WORK(work, process_req);
71static struct workqueue_struct *addr_wq;
72
73void rdma_addr_register_client(struct rdma_addr_client *client)
74{
75 atomic_set(&client->refcount, 1);
76 init_completion(&client->comp);
77}
78EXPORT_SYMBOL(rdma_addr_register_client);
79
80static inline void put_client(struct rdma_addr_client *client)
81{
82 if (atomic_dec_and_test(&client->refcount))
83 complete(&client->comp);
84}
85
86void rdma_addr_unregister_client(struct rdma_addr_client *client)
87{
88 put_client(client);
89 wait_for_completion(&client->comp);
90}
91EXPORT_SYMBOL(rdma_addr_unregister_client);
92
93int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
94 const unsigned char *dst_dev_addr)
95{
96 dev_addr->dev_type = dev->type;
97 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
98 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
99 if (dst_dev_addr)
100 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
101 dev_addr->bound_dev_if = dev->ifindex;
102 return 0;
103}
104EXPORT_SYMBOL(rdma_copy_addr);
105
106int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
107{
108 struct net_device *dev;
109 int ret = -EADDRNOTAVAIL;
110
111 if (dev_addr->bound_dev_if) {
112 dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
113 if (!dev)
114 return -ENODEV;
115 ret = rdma_copy_addr(dev_addr, dev, NULL);
116 dev_put(dev);
117 return ret;
118 }
119
120 switch (addr->sa_family) {
121 case AF_INET:
122 dev = ip_dev_find(&init_net,
123 ((struct sockaddr_in *) addr)->sin_addr.s_addr);
124
125 if (!dev)
126 return ret;
127
128 ret = rdma_copy_addr(dev_addr, dev, NULL);
129 dev_put(dev);
130 break;
131
132#if IS_ENABLED(CONFIG_IPV6)
133 case AF_INET6:
134 rcu_read_lock();
135 for_each_netdev_rcu(&init_net, dev) {
136 if (ipv6_chk_addr(&init_net,
137 &((struct sockaddr_in6 *) addr)->sin6_addr,
138 dev, 1)) {
139 ret = rdma_copy_addr(dev_addr, dev, NULL);
140 break;
141 }
142 }
143 rcu_read_unlock();
144 break;
145#endif
146 }
147 return ret;
148}
149EXPORT_SYMBOL(rdma_translate_ip);
150
151static void set_timeout(unsigned long time)
152{
153 unsigned long delay;
154
155 delay = time - jiffies;
156 if ((long)delay <= 0)
157 delay = 1;
158
159 mod_delayed_work(addr_wq, &work, delay);
160}
161
162static void queue_req(struct addr_req *req)
163{
164 struct addr_req *temp_req;
165
166 mutex_lock(&lock);
167 list_for_each_entry_reverse(temp_req, &req_list, list) {
168 if (time_after_eq(req->timeout, temp_req->timeout))
169 break;
170 }
171
172 list_add(&req->list, &temp_req->list);
173
174 if (req_list.next == &req->list)
175 set_timeout(req->timeout);
176 mutex_unlock(&lock);
177}
178
179static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr)
180{
181 struct neighbour *n;
182 int ret;
183
184 n = dst_neigh_lookup(dst, daddr);
185
186 rcu_read_lock();
187 if (!n || !(n->nud_state & NUD_VALID)) {
188 if (n)
189 neigh_event_send(n, NULL);
190 ret = -ENODATA;
191 } else {
192 ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
193 }
194 rcu_read_unlock();
195
196 if (n)
197 neigh_release(n);
198
199 return ret;
200}
201
202static int addr4_resolve(struct sockaddr_in *src_in,
203 struct sockaddr_in *dst_in,
204 struct rdma_dev_addr *addr)
205{
206 __be32 src_ip = src_in->sin_addr.s_addr;
207 __be32 dst_ip = dst_in->sin_addr.s_addr;
208 struct rtable *rt;
209 struct flowi4 fl4;
210 int ret;
211
212 memset(&fl4, 0, sizeof(fl4));
213 fl4.daddr = dst_ip;
214 fl4.saddr = src_ip;
215 fl4.flowi4_oif = addr->bound_dev_if;
216 rt = ip_route_output_key(&init_net, &fl4);
217 if (IS_ERR(rt)) {
218 ret = PTR_ERR(rt);
219 goto out;
220 }
221 src_in->sin_family = AF_INET;
222 src_in->sin_addr.s_addr = fl4.saddr;
223
224 if (rt->dst.dev->flags & IFF_LOOPBACK) {
225 ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
226 if (!ret)
227 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
228 goto put;
229 }
230
231
232 if (rt->dst.dev->flags & IFF_NOARP) {
233 ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
234 goto put;
235 }
236
237 ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
238put:
239 ip_rt_put(rt);
240out:
241 return ret;
242}
243
244#if IS_ENABLED(CONFIG_IPV6)
245static int addr6_resolve(struct sockaddr_in6 *src_in,
246 struct sockaddr_in6 *dst_in,
247 struct rdma_dev_addr *addr)
248{
249 struct flowi6 fl6;
250 struct dst_entry *dst;
251 int ret;
252
253 memset(&fl6, 0, sizeof fl6);
254 fl6.daddr = dst_in->sin6_addr;
255 fl6.saddr = src_in->sin6_addr;
256 fl6.flowi6_oif = addr->bound_dev_if;
257
258 dst = ip6_route_output(&init_net, NULL, &fl6);
259 if ((ret = dst->error))
260 goto put;
261
262 if (ipv6_addr_any(&fl6.saddr)) {
263 ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
264 &fl6.daddr, 0, &fl6.saddr);
265 if (ret)
266 goto put;
267
268 src_in->sin6_family = AF_INET6;
269 src_in->sin6_addr = fl6.saddr;
270 }
271
272 if (dst->dev->flags & IFF_LOOPBACK) {
273 ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
274 if (!ret)
275 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
276 goto put;
277 }
278
279
280 if (dst->dev->flags & IFF_NOARP) {
281 ret = rdma_copy_addr(addr, dst->dev, NULL);
282 goto put;
283 }
284
285 ret = dst_fetch_ha(dst, addr, &fl6.daddr);
286put:
287 dst_release(dst);
288 return ret;
289}
290#else
291static int addr6_resolve(struct sockaddr_in6 *src_in,
292 struct sockaddr_in6 *dst_in,
293 struct rdma_dev_addr *addr)
294{
295 return -EADDRNOTAVAIL;
296}
297#endif
298
299static int addr_resolve(struct sockaddr *src_in,
300 struct sockaddr *dst_in,
301 struct rdma_dev_addr *addr)
302{
303 if (src_in->sa_family == AF_INET) {
304 return addr4_resolve((struct sockaddr_in *) src_in,
305 (struct sockaddr_in *) dst_in, addr);
306 } else
307 return addr6_resolve((struct sockaddr_in6 *) src_in,
308 (struct sockaddr_in6 *) dst_in, addr);
309}
310
311static void process_req(struct work_struct *work)
312{
313 struct addr_req *req, *temp_req;
314 struct sockaddr *src_in, *dst_in;
315 struct list_head done_list;
316
317 INIT_LIST_HEAD(&done_list);
318
319 mutex_lock(&lock);
320 list_for_each_entry_safe(req, temp_req, &req_list, list) {
321 if (req->status == -ENODATA) {
322 src_in = (struct sockaddr *) &req->src_addr;
323 dst_in = (struct sockaddr *) &req->dst_addr;
324 req->status = addr_resolve(src_in, dst_in, req->addr);
325 if (req->status && time_after_eq(jiffies, req->timeout))
326 req->status = -ETIMEDOUT;
327 else if (req->status == -ENODATA)
328 continue;
329 }
330 list_move_tail(&req->list, &done_list);
331 }
332
333 if (!list_empty(&req_list)) {
334 req = list_entry(req_list.next, struct addr_req, list);
335 set_timeout(req->timeout);
336 }
337 mutex_unlock(&lock);
338
339 list_for_each_entry_safe(req, temp_req, &done_list, list) {
340 list_del(&req->list);
341 req->callback(req->status, (struct sockaddr *) &req->src_addr,
342 req->addr, req->context);
343 put_client(req->client);
344 kfree(req);
345 }
346}
347
348int rdma_resolve_ip(struct rdma_addr_client *client,
349 struct sockaddr *src_addr, struct sockaddr *dst_addr,
350 struct rdma_dev_addr *addr, int timeout_ms,
351 void (*callback)(int status, struct sockaddr *src_addr,
352 struct rdma_dev_addr *addr, void *context),
353 void *context)
354{
355 struct sockaddr *src_in, *dst_in;
356 struct addr_req *req;
357 int ret = 0;
358
359 req = kzalloc(sizeof *req, GFP_KERNEL);
360 if (!req)
361 return -ENOMEM;
362
363 src_in = (struct sockaddr *) &req->src_addr;
364 dst_in = (struct sockaddr *) &req->dst_addr;
365
366 if (src_addr) {
367 if (src_addr->sa_family != dst_addr->sa_family) {
368 ret = -EINVAL;
369 goto err;
370 }
371
372 memcpy(src_in, src_addr, ip_addr_size(src_addr));
373 } else {
374 src_in->sa_family = dst_addr->sa_family;
375 }
376
377 memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
378 req->addr = addr;
379 req->callback = callback;
380 req->context = context;
381 req->client = client;
382 atomic_inc(&client->refcount);
383
384 req->status = addr_resolve(src_in, dst_in, addr);
385 switch (req->status) {
386 case 0:
387 req->timeout = jiffies;
388 queue_req(req);
389 break;
390 case -ENODATA:
391 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
392 queue_req(req);
393 break;
394 default:
395 ret = req->status;
396 atomic_dec(&client->refcount);
397 goto err;
398 }
399 return ret;
400err:
401 kfree(req);
402 return ret;
403}
404EXPORT_SYMBOL(rdma_resolve_ip);
405
406void rdma_addr_cancel(struct rdma_dev_addr *addr)
407{
408 struct addr_req *req, *temp_req;
409
410 mutex_lock(&lock);
411 list_for_each_entry_safe(req, temp_req, &req_list, list) {
412 if (req->addr == addr) {
413 req->status = -ECANCELED;
414 req->timeout = jiffies;
415 list_move(&req->list, &req_list);
416 set_timeout(req->timeout);
417 break;
418 }
419 }
420 mutex_unlock(&lock);
421}
422EXPORT_SYMBOL(rdma_addr_cancel);
423
424static int netevent_callback(struct notifier_block *self, unsigned long event,
425 void *ctx)
426{
427 if (event == NETEVENT_NEIGH_UPDATE) {
428 struct neighbour *neigh = ctx;
429
430 if (neigh->nud_state & NUD_VALID) {
431 set_timeout(jiffies);
432 }
433 }
434 return 0;
435}
436
437static struct notifier_block nb = {
438 .notifier_call = netevent_callback
439};
440
441static int __init addr_init(void)
442{
443 addr_wq = create_singlethread_workqueue("ib_addr");
444 if (!addr_wq)
445 return -ENOMEM;
446
447 register_netevent_notifier(&nb);
448 return 0;
449}
450
451static void __exit addr_cleanup(void)
452{
453 unregister_netevent_notifier(&nb);
454 destroy_workqueue(addr_wq);
455}
456
457module_init(addr_init);
458module_exit(addr_cleanup);
459