1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <linux/slab.h>
34#include <linux/types.h>
35#include <linux/rbtree.h>
36#include <linux/bitops.h>
37#include <linux/export.h>
38
39#include "rds.h"
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81static atomic_t rds_cong_generation = ATOMIC_INIT(0);
82
83
84
85
86static LIST_HEAD(rds_cong_monitor);
87static DEFINE_RWLOCK(rds_cong_monitor_lock);
88
89
90
91
92
93
94
95
96
97
98
99
100
101static DEFINE_SPINLOCK(rds_cong_lock);
102static struct rb_root rds_cong_tree = RB_ROOT;
103
104static struct rds_cong_map *rds_cong_tree_walk(__be32 addr,
105 struct rds_cong_map *insert)
106{
107 struct rb_node **p = &rds_cong_tree.rb_node;
108 struct rb_node *parent = NULL;
109 struct rds_cong_map *map;
110
111 while (*p) {
112 parent = *p;
113 map = rb_entry(parent, struct rds_cong_map, m_rb_node);
114
115 if (addr < map->m_addr)
116 p = &(*p)->rb_left;
117 else if (addr > map->m_addr)
118 p = &(*p)->rb_right;
119 else
120 return map;
121 }
122
123 if (insert) {
124 rb_link_node(&insert->m_rb_node, parent, p);
125 rb_insert_color(&insert->m_rb_node, &rds_cong_tree);
126 }
127 return NULL;
128}
129
130
131
132
133
134
135static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
136{
137 struct rds_cong_map *map;
138 struct rds_cong_map *ret = NULL;
139 unsigned long zp;
140 unsigned long i;
141 unsigned long flags;
142
143 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL);
144 if (!map)
145 return NULL;
146
147 map->m_addr = addr;
148 init_waitqueue_head(&map->m_waitq);
149 INIT_LIST_HEAD(&map->m_conn_list);
150
151 for (i = 0; i < RDS_CONG_MAP_PAGES; i++) {
152 zp = get_zeroed_page(GFP_KERNEL);
153 if (zp == 0)
154 goto out;
155 map->m_page_addrs[i] = zp;
156 }
157
158 spin_lock_irqsave(&rds_cong_lock, flags);
159 ret = rds_cong_tree_walk(addr, map);
160 spin_unlock_irqrestore(&rds_cong_lock, flags);
161
162 if (!ret) {
163 ret = map;
164 map = NULL;
165 }
166
167out:
168 if (map) {
169 for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++)
170 free_page(map->m_page_addrs[i]);
171 kfree(map);
172 }
173
174 rdsdebug("map %p for addr %x\n", ret, be32_to_cpu(addr));
175
176 return ret;
177}
178
179
180
181
182
183void rds_cong_add_conn(struct rds_connection *conn)
184{
185 unsigned long flags;
186
187 rdsdebug("conn %p now on map %p\n", conn, conn->c_lcong);
188 spin_lock_irqsave(&rds_cong_lock, flags);
189 list_add_tail(&conn->c_map_item, &conn->c_lcong->m_conn_list);
190 spin_unlock_irqrestore(&rds_cong_lock, flags);
191}
192
193void rds_cong_remove_conn(struct rds_connection *conn)
194{
195 unsigned long flags;
196
197 rdsdebug("removing conn %p from map %p\n", conn, conn->c_lcong);
198 spin_lock_irqsave(&rds_cong_lock, flags);
199 list_del_init(&conn->c_map_item);
200 spin_unlock_irqrestore(&rds_cong_lock, flags);
201}
202
203int rds_cong_get_maps(struct rds_connection *conn)
204{
205 conn->c_lcong = rds_cong_from_addr(conn->c_laddr);
206 conn->c_fcong = rds_cong_from_addr(conn->c_faddr);
207
208 if (!(conn->c_lcong && conn->c_fcong))
209 return -ENOMEM;
210
211 return 0;
212}
213
214void rds_cong_queue_updates(struct rds_cong_map *map)
215{
216 struct rds_connection *conn;
217 unsigned long flags;
218
219 spin_lock_irqsave(&rds_cong_lock, flags);
220
221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
222 if (!test_and_set_bit(0, &conn->c_map_queued)) {
223 rds_stats_inc(s_cong_update_queued);
224 rds_send_xmit(conn);
225 }
226 }
227
228 spin_unlock_irqrestore(&rds_cong_lock, flags);
229}
230
231void rds_cong_map_updated(struct rds_cong_map *map, uint64_t portmask)
232{
233 rdsdebug("waking map %p for %pI4\n",
234 map, &map->m_addr);
235 rds_stats_inc(s_cong_update_received);
236 atomic_inc(&rds_cong_generation);
237 if (waitqueue_active(&map->m_waitq))
238 wake_up(&map->m_waitq);
239 if (waitqueue_active(&rds_poll_waitq))
240 wake_up_all(&rds_poll_waitq);
241
242 if (portmask && !list_empty(&rds_cong_monitor)) {
243 unsigned long flags;
244 struct rds_sock *rs;
245
246 read_lock_irqsave(&rds_cong_monitor_lock, flags);
247 list_for_each_entry(rs, &rds_cong_monitor, rs_cong_list) {
248 spin_lock(&rs->rs_lock);
249 rs->rs_cong_notify |= (rs->rs_cong_mask & portmask);
250 rs->rs_cong_mask &= ~portmask;
251 spin_unlock(&rs->rs_lock);
252 if (rs->rs_cong_notify)
253 rds_wake_sk_sleep(rs);
254 }
255 read_unlock_irqrestore(&rds_cong_monitor_lock, flags);
256 }
257}
258EXPORT_SYMBOL_GPL(rds_cong_map_updated);
259
260int rds_cong_updated_since(unsigned long *recent)
261{
262 unsigned long gen = atomic_read(&rds_cong_generation);
263
264 if (likely(*recent == gen))
265 return 0;
266 *recent = gen;
267 return 1;
268}
269
270
271
272
273
274
275
276
277void rds_cong_set_bit(struct rds_cong_map *map, __be16 port)
278{
279 unsigned long i;
280 unsigned long off;
281
282 rdsdebug("setting congestion for %pI4:%u in map %p\n",
283 &map->m_addr, ntohs(port), map);
284
285 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
286 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
287
288 __set_bit_le(off, (void *)map->m_page_addrs[i]);
289}
290
291void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
292{
293 unsigned long i;
294 unsigned long off;
295
296 rdsdebug("clearing congestion for %pI4:%u in map %p\n",
297 &map->m_addr, ntohs(port), map);
298
299 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
300 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
301
302 __clear_bit_le(off, (void *)map->m_page_addrs[i]);
303}
304
305static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
306{
307 unsigned long i;
308 unsigned long off;
309
310 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
311 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
312
313 return test_bit_le(off, (void *)map->m_page_addrs[i]);
314}
315
316void rds_cong_add_socket(struct rds_sock *rs)
317{
318 unsigned long flags;
319
320 write_lock_irqsave(&rds_cong_monitor_lock, flags);
321 if (list_empty(&rs->rs_cong_list))
322 list_add(&rs->rs_cong_list, &rds_cong_monitor);
323 write_unlock_irqrestore(&rds_cong_monitor_lock, flags);
324}
325
326void rds_cong_remove_socket(struct rds_sock *rs)
327{
328 unsigned long flags;
329 struct rds_cong_map *map;
330
331 write_lock_irqsave(&rds_cong_monitor_lock, flags);
332 list_del_init(&rs->rs_cong_list);
333 write_unlock_irqrestore(&rds_cong_monitor_lock, flags);
334
335
336 spin_lock_irqsave(&rds_cong_lock, flags);
337 map = rds_cong_tree_walk(rs->rs_bound_addr, NULL);
338 spin_unlock_irqrestore(&rds_cong_lock, flags);
339
340 if (map && rds_cong_test_bit(map, rs->rs_bound_port)) {
341 rds_cong_clear_bit(map, rs->rs_bound_port);
342 rds_cong_queue_updates(map);
343 }
344}
345
346int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock,
347 struct rds_sock *rs)
348{
349 if (!rds_cong_test_bit(map, port))
350 return 0;
351 if (nonblock) {
352 if (rs && rs->rs_cong_monitor) {
353 unsigned long flags;
354
355
356
357 spin_lock_irqsave(&rs->rs_lock, flags);
358 rs->rs_cong_mask |= RDS_CONG_MONITOR_MASK(ntohs(port));
359 spin_unlock_irqrestore(&rs->rs_lock, flags);
360
361
362
363 if (!rds_cong_test_bit(map, port))
364 return 0;
365 }
366 rds_stats_inc(s_cong_send_error);
367 return -ENOBUFS;
368 }
369
370 rds_stats_inc(s_cong_send_blocked);
371 rdsdebug("waiting on map %p for port %u\n", map, be16_to_cpu(port));
372
373 return wait_event_interruptible(map->m_waitq,
374 !rds_cong_test_bit(map, port));
375}
376
377void rds_cong_exit(void)
378{
379 struct rb_node *node;
380 struct rds_cong_map *map;
381 unsigned long i;
382
383 while ((node = rb_first(&rds_cong_tree))) {
384 map = rb_entry(node, struct rds_cong_map, m_rb_node);
385 rdsdebug("freeing map %p\n", map);
386 rb_erase(&map->m_rb_node, &rds_cong_tree);
387 for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++)
388 free_page(map->m_page_addrs[i]);
389 kfree(map);
390 }
391}
392
393
394
395
396struct rds_message *rds_cong_update_alloc(struct rds_connection *conn)
397{
398 struct rds_cong_map *map = conn->c_lcong;
399 struct rds_message *rm;
400
401 rm = rds_message_map_pages(map->m_page_addrs, RDS_CONG_MAP_BYTES);
402 if (!IS_ERR(rm))
403 rm->m_inc.i_hdr.h_flags = RDS_FLAG_CONG_BITMAP;
404
405 return rm;
406}
407