1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <linux/slab.h>
34#include <linux/types.h>
35#include <linux/rbtree.h>
36#include <linux/bitops.h>
37#include <linux/export.h>
38
39#include "rds.h"
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81static atomic_t rds_cong_generation = ATOMIC_INIT(0);
82
83
84
85
86static LIST_HEAD(rds_cong_monitor);
87static DEFINE_RWLOCK(rds_cong_monitor_lock);
88
89
90
91
92
93
94
95
96
97
98
99
100
101static DEFINE_SPINLOCK(rds_cong_lock);
102static struct rb_root rds_cong_tree = RB_ROOT;
103
104static struct rds_cong_map *rds_cong_tree_walk(__be32 addr,
105 struct rds_cong_map *insert)
106{
107 struct rb_node **p = &rds_cong_tree.rb_node;
108 struct rb_node *parent = NULL;
109 struct rds_cong_map *map;
110
111 while (*p) {
112 parent = *p;
113 map = rb_entry(parent, struct rds_cong_map, m_rb_node);
114
115 if (addr < map->m_addr)
116 p = &(*p)->rb_left;
117 else if (addr > map->m_addr)
118 p = &(*p)->rb_right;
119 else
120 return map;
121 }
122
123 if (insert) {
124 rb_link_node(&insert->m_rb_node, parent, p);
125 rb_insert_color(&insert->m_rb_node, &rds_cong_tree);
126 }
127 return NULL;
128}
129
130
131
132
133
134
135static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
136{
137 struct rds_cong_map *map;
138 struct rds_cong_map *ret = NULL;
139 unsigned long zp;
140 unsigned long i;
141 unsigned long flags;
142
143 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL);
144 if (!map)
145 return NULL;
146
147 map->m_addr = addr;
148 init_waitqueue_head(&map->m_waitq);
149 INIT_LIST_HEAD(&map->m_conn_list);
150
151 for (i = 0; i < RDS_CONG_MAP_PAGES; i++) {
152 zp = get_zeroed_page(GFP_KERNEL);
153 if (zp == 0)
154 goto out;
155 map->m_page_addrs[i] = zp;
156 }
157
158 spin_lock_irqsave(&rds_cong_lock, flags);
159 ret = rds_cong_tree_walk(addr, map);
160 spin_unlock_irqrestore(&rds_cong_lock, flags);
161
162 if (!ret) {
163 ret = map;
164 map = NULL;
165 }
166
167out:
168 if (map) {
169 for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++)
170 free_page(map->m_page_addrs[i]);
171 kfree(map);
172 }
173
174 rdsdebug("map %p for addr %x\n", ret, be32_to_cpu(addr));
175
176 return ret;
177}
178
179
180
181
182
183void rds_cong_add_conn(struct rds_connection *conn)
184{
185 unsigned long flags;
186
187 rdsdebug("conn %p now on map %p\n", conn, conn->c_lcong);
188 spin_lock_irqsave(&rds_cong_lock, flags);
189 list_add_tail(&conn->c_map_item, &conn->c_lcong->m_conn_list);
190 spin_unlock_irqrestore(&rds_cong_lock, flags);
191}
192
193void rds_cong_remove_conn(struct rds_connection *conn)
194{
195 unsigned long flags;
196
197 rdsdebug("removing conn %p from map %p\n", conn, conn->c_lcong);
198 spin_lock_irqsave(&rds_cong_lock, flags);
199 list_del_init(&conn->c_map_item);
200 spin_unlock_irqrestore(&rds_cong_lock, flags);
201}
202
203int rds_cong_get_maps(struct rds_connection *conn)
204{
205 conn->c_lcong = rds_cong_from_addr(conn->c_laddr);
206 conn->c_fcong = rds_cong_from_addr(conn->c_faddr);
207
208 if (!(conn->c_lcong && conn->c_fcong))
209 return -ENOMEM;
210
211 return 0;
212}
213
214void rds_cong_queue_updates(struct rds_cong_map *map)
215{
216 struct rds_connection *conn;
217 unsigned long flags;
218
219 spin_lock_irqsave(&rds_cong_lock, flags);
220
221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
222 struct rds_conn_path *cp = &conn->c_path[0];
223
224 rcu_read_lock();
225 if (!test_and_set_bit(0, &conn->c_map_queued) &&
226 !rds_destroy_pending(cp->cp_conn)) {
227 rds_stats_inc(s_cong_update_queued);
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242 queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
243 }
244 rcu_read_unlock();
245 }
246
247 spin_unlock_irqrestore(&rds_cong_lock, flags);
248}
249
250void rds_cong_map_updated(struct rds_cong_map *map, uint64_t portmask)
251{
252 rdsdebug("waking map %p for %pI4\n",
253 map, &map->m_addr);
254 rds_stats_inc(s_cong_update_received);
255 atomic_inc(&rds_cong_generation);
256 if (waitqueue_active(&map->m_waitq))
257 wake_up(&map->m_waitq);
258 if (waitqueue_active(&rds_poll_waitq))
259 wake_up_all(&rds_poll_waitq);
260
261 if (portmask && !list_empty(&rds_cong_monitor)) {
262 unsigned long flags;
263 struct rds_sock *rs;
264
265 read_lock_irqsave(&rds_cong_monitor_lock, flags);
266 list_for_each_entry(rs, &rds_cong_monitor, rs_cong_list) {
267 spin_lock(&rs->rs_lock);
268 rs->rs_cong_notify |= (rs->rs_cong_mask & portmask);
269 rs->rs_cong_mask &= ~portmask;
270 spin_unlock(&rs->rs_lock);
271 if (rs->rs_cong_notify)
272 rds_wake_sk_sleep(rs);
273 }
274 read_unlock_irqrestore(&rds_cong_monitor_lock, flags);
275 }
276}
277EXPORT_SYMBOL_GPL(rds_cong_map_updated);
278
279int rds_cong_updated_since(unsigned long *recent)
280{
281 unsigned long gen = atomic_read(&rds_cong_generation);
282
283 if (likely(*recent == gen))
284 return 0;
285 *recent = gen;
286 return 1;
287}
288
289
290
291
292
293
294
295
296void rds_cong_set_bit(struct rds_cong_map *map, __be16 port)
297{
298 unsigned long i;
299 unsigned long off;
300
301 rdsdebug("setting congestion for %pI4:%u in map %p\n",
302 &map->m_addr, ntohs(port), map);
303
304 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
305 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
306
307 set_bit_le(off, (void *)map->m_page_addrs[i]);
308}
309
310void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
311{
312 unsigned long i;
313 unsigned long off;
314
315 rdsdebug("clearing congestion for %pI4:%u in map %p\n",
316 &map->m_addr, ntohs(port), map);
317
318 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
319 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
320
321 clear_bit_le(off, (void *)map->m_page_addrs[i]);
322}
323
324static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
325{
326 unsigned long i;
327 unsigned long off;
328
329 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
330 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
331
332 return test_bit_le(off, (void *)map->m_page_addrs[i]);
333}
334
335void rds_cong_add_socket(struct rds_sock *rs)
336{
337 unsigned long flags;
338
339 write_lock_irqsave(&rds_cong_monitor_lock, flags);
340 if (list_empty(&rs->rs_cong_list))
341 list_add(&rs->rs_cong_list, &rds_cong_monitor);
342 write_unlock_irqrestore(&rds_cong_monitor_lock, flags);
343}
344
345void rds_cong_remove_socket(struct rds_sock *rs)
346{
347 unsigned long flags;
348 struct rds_cong_map *map;
349
350 write_lock_irqsave(&rds_cong_monitor_lock, flags);
351 list_del_init(&rs->rs_cong_list);
352 write_unlock_irqrestore(&rds_cong_monitor_lock, flags);
353
354
355 spin_lock_irqsave(&rds_cong_lock, flags);
356 map = rds_cong_tree_walk(rs->rs_bound_addr, NULL);
357 spin_unlock_irqrestore(&rds_cong_lock, flags);
358
359 if (map && rds_cong_test_bit(map, rs->rs_bound_port)) {
360 rds_cong_clear_bit(map, rs->rs_bound_port);
361 rds_cong_queue_updates(map);
362 }
363}
364
365int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock,
366 struct rds_sock *rs)
367{
368 if (!rds_cong_test_bit(map, port))
369 return 0;
370 if (nonblock) {
371 if (rs && rs->rs_cong_monitor) {
372 unsigned long flags;
373
374
375
376 spin_lock_irqsave(&rs->rs_lock, flags);
377 rs->rs_cong_mask |= RDS_CONG_MONITOR_MASK(ntohs(port));
378 spin_unlock_irqrestore(&rs->rs_lock, flags);
379
380
381
382 if (!rds_cong_test_bit(map, port))
383 return 0;
384 }
385 rds_stats_inc(s_cong_send_error);
386 return -ENOBUFS;
387 }
388
389 rds_stats_inc(s_cong_send_blocked);
390 rdsdebug("waiting on map %p for port %u\n", map, be16_to_cpu(port));
391
392 return wait_event_interruptible(map->m_waitq,
393 !rds_cong_test_bit(map, port));
394}
395
396void rds_cong_exit(void)
397{
398 struct rb_node *node;
399 struct rds_cong_map *map;
400 unsigned long i;
401
402 while ((node = rb_first(&rds_cong_tree))) {
403 map = rb_entry(node, struct rds_cong_map, m_rb_node);
404 rdsdebug("freeing map %p\n", map);
405 rb_erase(&map->m_rb_node, &rds_cong_tree);
406 for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++)
407 free_page(map->m_page_addrs[i]);
408 kfree(map);
409 }
410}
411
412
413
414
415struct rds_message *rds_cong_update_alloc(struct rds_connection *conn)
416{
417 struct rds_cong_map *map = conn->c_lcong;
418 struct rds_message *rm;
419
420 rm = rds_message_map_pages(map->m_page_addrs, RDS_CONG_MAP_BYTES);
421 if (!IS_ERR(rm))
422 rm->m_inc.i_hdr.h_flags = RDS_FLAG_CONG_BITMAP;
423
424 return rm;
425}
426