1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <linux/slab.h>
34#include <linux/types.h>
35#include <linux/rbtree.h>
36#include <linux/bitops.h>
37#include <linux/export.h>
38
39#include "rds.h"
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81static atomic_t rds_cong_generation = ATOMIC_INIT(0);
82
83
84
85
86static LIST_HEAD(rds_cong_monitor);
87static DEFINE_RWLOCK(rds_cong_monitor_lock);
88
89
90
91
92
93
94
95
96
97
98
99
100
101static DEFINE_SPINLOCK(rds_cong_lock);
102static struct rb_root rds_cong_tree = RB_ROOT;
103
104static struct rds_cong_map *rds_cong_tree_walk(__be32 addr,
105 struct rds_cong_map *insert)
106{
107 struct rb_node **p = &rds_cong_tree.rb_node;
108 struct rb_node *parent = NULL;
109 struct rds_cong_map *map;
110
111 while (*p) {
112 parent = *p;
113 map = rb_entry(parent, struct rds_cong_map, m_rb_node);
114
115 if (addr < map->m_addr)
116 p = &(*p)->rb_left;
117 else if (addr > map->m_addr)
118 p = &(*p)->rb_right;
119 else
120 return map;
121 }
122
123 if (insert) {
124 rb_link_node(&insert->m_rb_node, parent, p);
125 rb_insert_color(&insert->m_rb_node, &rds_cong_tree);
126 }
127 return NULL;
128}
129
130
131
132
133
134
135static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
136{
137 struct rds_cong_map *map;
138 struct rds_cong_map *ret = NULL;
139 unsigned long zp;
140 unsigned long i;
141 unsigned long flags;
142
143 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL);
144 if (!map)
145 return NULL;
146
147 map->m_addr = addr;
148 init_waitqueue_head(&map->m_waitq);
149 INIT_LIST_HEAD(&map->m_conn_list);
150
151 for (i = 0; i < RDS_CONG_MAP_PAGES; i++) {
152 zp = get_zeroed_page(GFP_KERNEL);
153 if (zp == 0)
154 goto out;
155 map->m_page_addrs[i] = zp;
156 }
157
158 spin_lock_irqsave(&rds_cong_lock, flags);
159 ret = rds_cong_tree_walk(addr, map);
160 spin_unlock_irqrestore(&rds_cong_lock, flags);
161
162 if (!ret) {
163 ret = map;
164 map = NULL;
165 }
166
167out:
168 if (map) {
169 for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++)
170 free_page(map->m_page_addrs[i]);
171 kfree(map);
172 }
173
174 rdsdebug("map %p for addr %x\n", ret, be32_to_cpu(addr));
175
176 return ret;
177}
178
179
180
181
182
183void rds_cong_add_conn(struct rds_connection *conn)
184{
185 unsigned long flags;
186
187 rdsdebug("conn %p now on map %p\n", conn, conn->c_lcong);
188 spin_lock_irqsave(&rds_cong_lock, flags);
189 list_add_tail(&conn->c_map_item, &conn->c_lcong->m_conn_list);
190 spin_unlock_irqrestore(&rds_cong_lock, flags);
191}
192
193void rds_cong_remove_conn(struct rds_connection *conn)
194{
195 unsigned long flags;
196
197 rdsdebug("removing conn %p from map %p\n", conn, conn->c_lcong);
198 spin_lock_irqsave(&rds_cong_lock, flags);
199 list_del_init(&conn->c_map_item);
200 spin_unlock_irqrestore(&rds_cong_lock, flags);
201}
202
203int rds_cong_get_maps(struct rds_connection *conn)
204{
205 conn->c_lcong = rds_cong_from_addr(conn->c_laddr);
206 conn->c_fcong = rds_cong_from_addr(conn->c_faddr);
207
208 if (!(conn->c_lcong && conn->c_fcong))
209 return -ENOMEM;
210
211 return 0;
212}
213
214void rds_cong_queue_updates(struct rds_cong_map *map)
215{
216 struct rds_connection *conn;
217 unsigned long flags;
218
219 spin_lock_irqsave(&rds_cong_lock, flags);
220
221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
222 if (!test_and_set_bit(0, &conn->c_map_queued)) {
223 rds_stats_inc(s_cong_update_queued);
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238 queue_delayed_work(rds_wq,
239 &conn->c_path[0].cp_send_w, 0);
240 }
241 }
242
243 spin_unlock_irqrestore(&rds_cong_lock, flags);
244}
245
246void rds_cong_map_updated(struct rds_cong_map *map, uint64_t portmask)
247{
248 rdsdebug("waking map %p for %pI4\n",
249 map, &map->m_addr);
250 rds_stats_inc(s_cong_update_received);
251 atomic_inc(&rds_cong_generation);
252 if (waitqueue_active(&map->m_waitq))
253 wake_up(&map->m_waitq);
254 if (waitqueue_active(&rds_poll_waitq))
255 wake_up_all(&rds_poll_waitq);
256
257 if (portmask && !list_empty(&rds_cong_monitor)) {
258 unsigned long flags;
259 struct rds_sock *rs;
260
261 read_lock_irqsave(&rds_cong_monitor_lock, flags);
262 list_for_each_entry(rs, &rds_cong_monitor, rs_cong_list) {
263 spin_lock(&rs->rs_lock);
264 rs->rs_cong_notify |= (rs->rs_cong_mask & portmask);
265 rs->rs_cong_mask &= ~portmask;
266 spin_unlock(&rs->rs_lock);
267 if (rs->rs_cong_notify)
268 rds_wake_sk_sleep(rs);
269 }
270 read_unlock_irqrestore(&rds_cong_monitor_lock, flags);
271 }
272}
273EXPORT_SYMBOL_GPL(rds_cong_map_updated);
274
275int rds_cong_updated_since(unsigned long *recent)
276{
277 unsigned long gen = atomic_read(&rds_cong_generation);
278
279 if (likely(*recent == gen))
280 return 0;
281 *recent = gen;
282 return 1;
283}
284
285
286
287
288
289
290
291
292void rds_cong_set_bit(struct rds_cong_map *map, __be16 port)
293{
294 unsigned long i;
295 unsigned long off;
296
297 rdsdebug("setting congestion for %pI4:%u in map %p\n",
298 &map->m_addr, ntohs(port), map);
299
300 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
301 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
302
303 set_bit_le(off, (void *)map->m_page_addrs[i]);
304}
305
306void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
307{
308 unsigned long i;
309 unsigned long off;
310
311 rdsdebug("clearing congestion for %pI4:%u in map %p\n",
312 &map->m_addr, ntohs(port), map);
313
314 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
315 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
316
317 clear_bit_le(off, (void *)map->m_page_addrs[i]);
318}
319
320static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
321{
322 unsigned long i;
323 unsigned long off;
324
325 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
326 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
327
328 return test_bit_le(off, (void *)map->m_page_addrs[i]);
329}
330
331void rds_cong_add_socket(struct rds_sock *rs)
332{
333 unsigned long flags;
334
335 write_lock_irqsave(&rds_cong_monitor_lock, flags);
336 if (list_empty(&rs->rs_cong_list))
337 list_add(&rs->rs_cong_list, &rds_cong_monitor);
338 write_unlock_irqrestore(&rds_cong_monitor_lock, flags);
339}
340
341void rds_cong_remove_socket(struct rds_sock *rs)
342{
343 unsigned long flags;
344 struct rds_cong_map *map;
345
346 write_lock_irqsave(&rds_cong_monitor_lock, flags);
347 list_del_init(&rs->rs_cong_list);
348 write_unlock_irqrestore(&rds_cong_monitor_lock, flags);
349
350
351 spin_lock_irqsave(&rds_cong_lock, flags);
352 map = rds_cong_tree_walk(rs->rs_bound_addr, NULL);
353 spin_unlock_irqrestore(&rds_cong_lock, flags);
354
355 if (map && rds_cong_test_bit(map, rs->rs_bound_port)) {
356 rds_cong_clear_bit(map, rs->rs_bound_port);
357 rds_cong_queue_updates(map);
358 }
359}
360
361int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock,
362 struct rds_sock *rs)
363{
364 if (!rds_cong_test_bit(map, port))
365 return 0;
366 if (nonblock) {
367 if (rs && rs->rs_cong_monitor) {
368 unsigned long flags;
369
370
371
372 spin_lock_irqsave(&rs->rs_lock, flags);
373 rs->rs_cong_mask |= RDS_CONG_MONITOR_MASK(ntohs(port));
374 spin_unlock_irqrestore(&rs->rs_lock, flags);
375
376
377
378 if (!rds_cong_test_bit(map, port))
379 return 0;
380 }
381 rds_stats_inc(s_cong_send_error);
382 return -ENOBUFS;
383 }
384
385 rds_stats_inc(s_cong_send_blocked);
386 rdsdebug("waiting on map %p for port %u\n", map, be16_to_cpu(port));
387
388 return wait_event_interruptible(map->m_waitq,
389 !rds_cong_test_bit(map, port));
390}
391
392void rds_cong_exit(void)
393{
394 struct rb_node *node;
395 struct rds_cong_map *map;
396 unsigned long i;
397
398 while ((node = rb_first(&rds_cong_tree))) {
399 map = rb_entry(node, struct rds_cong_map, m_rb_node);
400 rdsdebug("freeing map %p\n", map);
401 rb_erase(&map->m_rb_node, &rds_cong_tree);
402 for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++)
403 free_page(map->m_page_addrs[i]);
404 kfree(map);
405 }
406}
407
408
409
410
411struct rds_message *rds_cong_update_alloc(struct rds_connection *conn)
412{
413 struct rds_cong_map *map = conn->c_lcong;
414 struct rds_message *rm;
415
416 rm = rds_message_map_pages(map->m_page_addrs, RDS_CONG_MAP_BYTES);
417 if (!IS_ERR(rm))
418 rm->m_inc.i_hdr.h_flags = RDS_FLAG_CONG_BITMAP;
419
420 return rm;
421}
422