1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <linux/slab.h>
34#include <linux/types.h>
35#include <linux/rbtree.h>
36#include <linux/bitops.h>
37#include <linux/export.h>
38
39#include "rds.h"
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81static atomic_t rds_cong_generation = ATOMIC_INIT(0);
82
83
84
85
86static LIST_HEAD(rds_cong_monitor);
87static DEFINE_RWLOCK(rds_cong_monitor_lock);
88
89
90
91
92
93
94
95
96
97
98
99
100
101static DEFINE_SPINLOCK(rds_cong_lock);
102static struct rb_root rds_cong_tree = RB_ROOT;
103
104static struct rds_cong_map *rds_cong_tree_walk(__be32 addr,
105 struct rds_cong_map *insert)
106{
107 struct rb_node **p = &rds_cong_tree.rb_node;
108 struct rb_node *parent = NULL;
109 struct rds_cong_map *map;
110
111 while (*p) {
112 parent = *p;
113 map = rb_entry(parent, struct rds_cong_map, m_rb_node);
114
115 if (addr < map->m_addr)
116 p = &(*p)->rb_left;
117 else if (addr > map->m_addr)
118 p = &(*p)->rb_right;
119 else
120 return map;
121 }
122
123 if (insert) {
124 rb_link_node(&insert->m_rb_node, parent, p);
125 rb_insert_color(&insert->m_rb_node, &rds_cong_tree);
126 }
127 return NULL;
128}
129
130
131
132
133
134
135static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
136{
137 struct rds_cong_map *map;
138 struct rds_cong_map *ret = NULL;
139 unsigned long zp;
140 unsigned long i;
141 unsigned long flags;
142
143 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL);
144 if (!map)
145 return NULL;
146
147 map->m_addr = addr;
148 init_waitqueue_head(&map->m_waitq);
149 INIT_LIST_HEAD(&map->m_conn_list);
150
151 for (i = 0; i < RDS_CONG_MAP_PAGES; i++) {
152 zp = get_zeroed_page(GFP_KERNEL);
153 if (zp == 0)
154 goto out;
155 map->m_page_addrs[i] = zp;
156 }
157
158 spin_lock_irqsave(&rds_cong_lock, flags);
159 ret = rds_cong_tree_walk(addr, map);
160 spin_unlock_irqrestore(&rds_cong_lock, flags);
161
162 if (!ret) {
163 ret = map;
164 map = NULL;
165 }
166
167out:
168 if (map) {
169 for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++)
170 free_page(map->m_page_addrs[i]);
171 kfree(map);
172 }
173
174 rdsdebug("map %p for addr %x\n", ret, be32_to_cpu(addr));
175
176 return ret;
177}
178
179
180
181
182
183void rds_cong_add_conn(struct rds_connection *conn)
184{
185 unsigned long flags;
186
187 rdsdebug("conn %p now on map %p\n", conn, conn->c_lcong);
188 spin_lock_irqsave(&rds_cong_lock, flags);
189 list_add_tail(&conn->c_map_item, &conn->c_lcong->m_conn_list);
190 spin_unlock_irqrestore(&rds_cong_lock, flags);
191}
192
193void rds_cong_remove_conn(struct rds_connection *conn)
194{
195 unsigned long flags;
196
197 rdsdebug("removing conn %p from map %p\n", conn, conn->c_lcong);
198 spin_lock_irqsave(&rds_cong_lock, flags);
199 list_del_init(&conn->c_map_item);
200 spin_unlock_irqrestore(&rds_cong_lock, flags);
201}
202
203int rds_cong_get_maps(struct rds_connection *conn)
204{
205 conn->c_lcong = rds_cong_from_addr(conn->c_laddr);
206 conn->c_fcong = rds_cong_from_addr(conn->c_faddr);
207
208 if (!(conn->c_lcong && conn->c_fcong))
209 return -ENOMEM;
210
211 return 0;
212}
213
214void rds_cong_queue_updates(struct rds_cong_map *map)
215{
216 struct rds_connection *conn;
217 unsigned long flags;
218
219 spin_lock_irqsave(&rds_cong_lock, flags);
220
221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
222 if (!test_and_set_bit(0, &conn->c_map_queued)) {
223 rds_stats_inc(s_cong_update_queued);
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
239 }
240 }
241
242 spin_unlock_irqrestore(&rds_cong_lock, flags);
243}
244
245void rds_cong_map_updated(struct rds_cong_map *map, uint64_t portmask)
246{
247 rdsdebug("waking map %p for %pI4\n",
248 map, &map->m_addr);
249 rds_stats_inc(s_cong_update_received);
250 atomic_inc(&rds_cong_generation);
251 if (waitqueue_active(&map->m_waitq))
252 wake_up(&map->m_waitq);
253 if (waitqueue_active(&rds_poll_waitq))
254 wake_up_all(&rds_poll_waitq);
255
256 if (portmask && !list_empty(&rds_cong_monitor)) {
257 unsigned long flags;
258 struct rds_sock *rs;
259
260 read_lock_irqsave(&rds_cong_monitor_lock, flags);
261 list_for_each_entry(rs, &rds_cong_monitor, rs_cong_list) {
262 spin_lock(&rs->rs_lock);
263 rs->rs_cong_notify |= (rs->rs_cong_mask & portmask);
264 rs->rs_cong_mask &= ~portmask;
265 spin_unlock(&rs->rs_lock);
266 if (rs->rs_cong_notify)
267 rds_wake_sk_sleep(rs);
268 }
269 read_unlock_irqrestore(&rds_cong_monitor_lock, flags);
270 }
271}
272EXPORT_SYMBOL_GPL(rds_cong_map_updated);
273
274int rds_cong_updated_since(unsigned long *recent)
275{
276 unsigned long gen = atomic_read(&rds_cong_generation);
277
278 if (likely(*recent == gen))
279 return 0;
280 *recent = gen;
281 return 1;
282}
283
284
285
286
287
288
289
290
291void rds_cong_set_bit(struct rds_cong_map *map, __be16 port)
292{
293 unsigned long i;
294 unsigned long off;
295
296 rdsdebug("setting congestion for %pI4:%u in map %p\n",
297 &map->m_addr, ntohs(port), map);
298
299 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
300 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
301
302 __set_bit_le(off, (void *)map->m_page_addrs[i]);
303}
304
305void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
306{
307 unsigned long i;
308 unsigned long off;
309
310 rdsdebug("clearing congestion for %pI4:%u in map %p\n",
311 &map->m_addr, ntohs(port), map);
312
313 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
314 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
315
316 __clear_bit_le(off, (void *)map->m_page_addrs[i]);
317}
318
319static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
320{
321 unsigned long i;
322 unsigned long off;
323
324 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
325 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
326
327 return test_bit_le(off, (void *)map->m_page_addrs[i]);
328}
329
330void rds_cong_add_socket(struct rds_sock *rs)
331{
332 unsigned long flags;
333
334 write_lock_irqsave(&rds_cong_monitor_lock, flags);
335 if (list_empty(&rs->rs_cong_list))
336 list_add(&rs->rs_cong_list, &rds_cong_monitor);
337 write_unlock_irqrestore(&rds_cong_monitor_lock, flags);
338}
339
340void rds_cong_remove_socket(struct rds_sock *rs)
341{
342 unsigned long flags;
343 struct rds_cong_map *map;
344
345 write_lock_irqsave(&rds_cong_monitor_lock, flags);
346 list_del_init(&rs->rs_cong_list);
347 write_unlock_irqrestore(&rds_cong_monitor_lock, flags);
348
349
350 spin_lock_irqsave(&rds_cong_lock, flags);
351 map = rds_cong_tree_walk(rs->rs_bound_addr, NULL);
352 spin_unlock_irqrestore(&rds_cong_lock, flags);
353
354 if (map && rds_cong_test_bit(map, rs->rs_bound_port)) {
355 rds_cong_clear_bit(map, rs->rs_bound_port);
356 rds_cong_queue_updates(map);
357 }
358}
359
360int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock,
361 struct rds_sock *rs)
362{
363 if (!rds_cong_test_bit(map, port))
364 return 0;
365 if (nonblock) {
366 if (rs && rs->rs_cong_monitor) {
367 unsigned long flags;
368
369
370
371 spin_lock_irqsave(&rs->rs_lock, flags);
372 rs->rs_cong_mask |= RDS_CONG_MONITOR_MASK(ntohs(port));
373 spin_unlock_irqrestore(&rs->rs_lock, flags);
374
375
376
377 if (!rds_cong_test_bit(map, port))
378 return 0;
379 }
380 rds_stats_inc(s_cong_send_error);
381 return -ENOBUFS;
382 }
383
384 rds_stats_inc(s_cong_send_blocked);
385 rdsdebug("waiting on map %p for port %u\n", map, be16_to_cpu(port));
386
387 return wait_event_interruptible(map->m_waitq,
388 !rds_cong_test_bit(map, port));
389}
390
391void rds_cong_exit(void)
392{
393 struct rb_node *node;
394 struct rds_cong_map *map;
395 unsigned long i;
396
397 while ((node = rb_first(&rds_cong_tree))) {
398 map = rb_entry(node, struct rds_cong_map, m_rb_node);
399 rdsdebug("freeing map %p\n", map);
400 rb_erase(&map->m_rb_node, &rds_cong_tree);
401 for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++)
402 free_page(map->m_page_addrs[i]);
403 kfree(map);
404 }
405}
406
407
408
409
410struct rds_message *rds_cong_update_alloc(struct rds_connection *conn)
411{
412 struct rds_cong_map *map = conn->c_lcong;
413 struct rds_message *rm;
414
415 rm = rds_message_map_pages(map->m_page_addrs, RDS_CONG_MAP_BYTES);
416 if (!IS_ERR(rm))
417 rm->m_inc.i_hdr.h_flags = RDS_FLAG_CONG_BITMAP;
418
419 return rm;
420}
421