1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/bitops.h>
15#include <linux/slab.h>
16#include <linux/seq_file.h>
17#include <linux/cgroup.h>
18#include <linux/parser.h>
19#include <linux/cgroup_rdma.h>
20
21#define RDMACG_MAX_STR "max"
22
23
24
25
26
27static DEFINE_MUTEX(rdmacg_mutex);
28static LIST_HEAD(rdmacg_devices);
29
30enum rdmacg_file_type {
31 RDMACG_RESOURCE_TYPE_MAX,
32 RDMACG_RESOURCE_TYPE_STAT,
33};
34
35
36
37
38
39
40static char const *rdmacg_resource_names[] = {
41 [RDMACG_RESOURCE_HCA_HANDLE] = "hca_handle",
42 [RDMACG_RESOURCE_HCA_OBJECT] = "hca_object",
43};
44
45
46struct rdmacg_resource {
47 int max;
48 int usage;
49};
50
51
52
53
54
55
56
57struct rdmacg_resource_pool {
58 struct rdmacg_device *device;
59 struct rdmacg_resource resources[RDMACG_RESOURCE_MAX];
60
61 struct list_head cg_node;
62 struct list_head dev_node;
63
64
65 u64 usage_sum;
66
67 int num_max_cnt;
68};
69
70static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css)
71{
72 return container_of(css, struct rdma_cgroup, css);
73}
74
75static struct rdma_cgroup *parent_rdmacg(struct rdma_cgroup *cg)
76{
77 return css_rdmacg(cg->css.parent);
78}
79
80static inline struct rdma_cgroup *get_current_rdmacg(void)
81{
82 return css_rdmacg(task_get_css(current, rdma_cgrp_id));
83}
84
85static void set_resource_limit(struct rdmacg_resource_pool *rpool,
86 int index, int new_max)
87{
88 if (new_max == S32_MAX) {
89 if (rpool->resources[index].max != S32_MAX)
90 rpool->num_max_cnt++;
91 } else {
92 if (rpool->resources[index].max == S32_MAX)
93 rpool->num_max_cnt--;
94 }
95 rpool->resources[index].max = new_max;
96}
97
98static void set_all_resource_max_limit(struct rdmacg_resource_pool *rpool)
99{
100 int i;
101
102 for (i = 0; i < RDMACG_RESOURCE_MAX; i++)
103 set_resource_limit(rpool, i, S32_MAX);
104}
105
106static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool)
107{
108 lockdep_assert_held(&rdmacg_mutex);
109
110 list_del(&rpool->cg_node);
111 list_del(&rpool->dev_node);
112 kfree(rpool);
113}
114
115static struct rdmacg_resource_pool *
116find_cg_rpool_locked(struct rdma_cgroup *cg,
117 struct rdmacg_device *device)
118
119{
120 struct rdmacg_resource_pool *pool;
121
122 lockdep_assert_held(&rdmacg_mutex);
123
124 list_for_each_entry(pool, &cg->rpools, cg_node)
125 if (pool->device == device)
126 return pool;
127
128 return NULL;
129}
130
131static struct rdmacg_resource_pool *
132get_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device)
133{
134 struct rdmacg_resource_pool *rpool;
135
136 rpool = find_cg_rpool_locked(cg, device);
137 if (rpool)
138 return rpool;
139
140 rpool = kzalloc(sizeof(*rpool), GFP_KERNEL);
141 if (!rpool)
142 return ERR_PTR(-ENOMEM);
143
144 rpool->device = device;
145 set_all_resource_max_limit(rpool);
146
147 INIT_LIST_HEAD(&rpool->cg_node);
148 INIT_LIST_HEAD(&rpool->dev_node);
149 list_add_tail(&rpool->cg_node, &cg->rpools);
150 list_add_tail(&rpool->dev_node, &device->rpools);
151 return rpool;
152}
153
154
155
156
157
158
159
160
161
162
163
164static void
165uncharge_cg_locked(struct rdma_cgroup *cg,
166 struct rdmacg_device *device,
167 enum rdmacg_resource_type index)
168{
169 struct rdmacg_resource_pool *rpool;
170
171 rpool = find_cg_rpool_locked(cg, device);
172
173
174
175
176
177
178 if (unlikely(!rpool)) {
179 pr_warn("Invalid device %p or rdma cgroup %p\n", cg, device);
180 return;
181 }
182
183 rpool->resources[index].usage--;
184
185
186
187
188
189 WARN_ON_ONCE(rpool->resources[index].usage < 0);
190 rpool->usage_sum--;
191 if (rpool->usage_sum == 0 &&
192 rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
193
194
195
196
197 free_cg_rpool_locked(rpool);
198 }
199}
200
201
202
203
204
205
206
207
208static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg,
209 struct rdmacg_device *device,
210 struct rdma_cgroup *stop_cg,
211 enum rdmacg_resource_type index)
212{
213 struct rdma_cgroup *p;
214
215 mutex_lock(&rdmacg_mutex);
216
217 for (p = cg; p != stop_cg; p = parent_rdmacg(p))
218 uncharge_cg_locked(p, device, index);
219
220 mutex_unlock(&rdmacg_mutex);
221
222 css_put(&cg->css);
223}
224
225
226
227
228
229
230void rdmacg_uncharge(struct rdma_cgroup *cg,
231 struct rdmacg_device *device,
232 enum rdmacg_resource_type index)
233{
234 if (index >= RDMACG_RESOURCE_MAX)
235 return;
236
237 rdmacg_uncharge_hierarchy(cg, device, NULL, index);
238}
239EXPORT_SYMBOL(rdmacg_uncharge);
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262int rdmacg_try_charge(struct rdma_cgroup **rdmacg,
263 struct rdmacg_device *device,
264 enum rdmacg_resource_type index)
265{
266 struct rdma_cgroup *cg, *p;
267 struct rdmacg_resource_pool *rpool;
268 s64 new;
269 int ret = 0;
270
271 if (index >= RDMACG_RESOURCE_MAX)
272 return -EINVAL;
273
274
275
276
277
278 cg = get_current_rdmacg();
279
280 mutex_lock(&rdmacg_mutex);
281 for (p = cg; p; p = parent_rdmacg(p)) {
282 rpool = get_cg_rpool_locked(p, device);
283 if (IS_ERR(rpool)) {
284 ret = PTR_ERR(rpool);
285 goto err;
286 } else {
287 new = rpool->resources[index].usage + 1;
288 if (new > rpool->resources[index].max) {
289 ret = -EAGAIN;
290 goto err;
291 } else {
292 rpool->resources[index].usage = new;
293 rpool->usage_sum++;
294 }
295 }
296 }
297 mutex_unlock(&rdmacg_mutex);
298
299 *rdmacg = cg;
300 return 0;
301
302err:
303 mutex_unlock(&rdmacg_mutex);
304 rdmacg_uncharge_hierarchy(cg, device, p, index);
305 return ret;
306}
307EXPORT_SYMBOL(rdmacg_try_charge);
308
309
310
311
312
313
314
315
316
317
318
319int rdmacg_register_device(struct rdmacg_device *device)
320{
321 INIT_LIST_HEAD(&device->dev_node);
322 INIT_LIST_HEAD(&device->rpools);
323
324 mutex_lock(&rdmacg_mutex);
325 list_add_tail(&device->dev_node, &rdmacg_devices);
326 mutex_unlock(&rdmacg_mutex);
327 return 0;
328}
329EXPORT_SYMBOL(rdmacg_register_device);
330
331
332
333
334
335
336
337
338
339
340void rdmacg_unregister_device(struct rdmacg_device *device)
341{
342 struct rdmacg_resource_pool *rpool, *tmp;
343
344
345
346
347
348 mutex_lock(&rdmacg_mutex);
349 list_del_init(&device->dev_node);
350
351
352
353
354
355 list_for_each_entry_safe(rpool, tmp, &device->rpools, dev_node)
356 free_cg_rpool_locked(rpool);
357
358 mutex_unlock(&rdmacg_mutex);
359}
360EXPORT_SYMBOL(rdmacg_unregister_device);
361
362static int parse_resource(char *c, int *intval)
363{
364 substring_t argstr;
365 char *name, *value = c;
366 size_t len;
367 int ret, i;
368
369 name = strsep(&value, "=");
370 if (!name || !value)
371 return -EINVAL;
372
373 i = match_string(rdmacg_resource_names, RDMACG_RESOURCE_MAX, name);
374 if (i < 0)
375 return i;
376
377 len = strlen(value);
378
379 argstr.from = value;
380 argstr.to = value + len;
381
382 ret = match_int(&argstr, intval);
383 if (ret >= 0) {
384 if (*intval < 0)
385 return -EINVAL;
386 return i;
387 }
388 if (strncmp(value, RDMACG_MAX_STR, len) == 0) {
389 *intval = S32_MAX;
390 return i;
391 }
392 return -EINVAL;
393}
394
395static int rdmacg_parse_limits(char *options,
396 int *new_limits, unsigned long *enables)
397{
398 char *c;
399 int err = -EINVAL;
400
401
402 while ((c = strsep(&options, " ")) != NULL) {
403 int index, intval;
404
405 index = parse_resource(c, &intval);
406 if (index < 0)
407 goto err;
408
409 new_limits[index] = intval;
410 *enables |= BIT(index);
411 }
412 return 0;
413
414err:
415 return err;
416}
417
418static struct rdmacg_device *rdmacg_get_device_locked(const char *name)
419{
420 struct rdmacg_device *device;
421
422 lockdep_assert_held(&rdmacg_mutex);
423
424 list_for_each_entry(device, &rdmacg_devices, dev_node)
425 if (!strcmp(name, device->name))
426 return device;
427
428 return NULL;
429}
430
431static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of,
432 char *buf, size_t nbytes, loff_t off)
433{
434 struct rdma_cgroup *cg = css_rdmacg(of_css(of));
435 const char *dev_name;
436 struct rdmacg_resource_pool *rpool;
437 struct rdmacg_device *device;
438 char *options = strstrip(buf);
439 int *new_limits;
440 unsigned long enables = 0;
441 int i = 0, ret = 0;
442
443
444 dev_name = strsep(&options, " ");
445 if (!dev_name) {
446 ret = -EINVAL;
447 goto err;
448 }
449
450 new_limits = kcalloc(RDMACG_RESOURCE_MAX, sizeof(int), GFP_KERNEL);
451 if (!new_limits) {
452 ret = -ENOMEM;
453 goto err;
454 }
455
456 ret = rdmacg_parse_limits(options, new_limits, &enables);
457 if (ret)
458 goto parse_err;
459
460
461 mutex_lock(&rdmacg_mutex);
462
463 device = rdmacg_get_device_locked(dev_name);
464 if (!device) {
465 ret = -ENODEV;
466 goto dev_err;
467 }
468
469 rpool = get_cg_rpool_locked(cg, device);
470 if (IS_ERR(rpool)) {
471 ret = PTR_ERR(rpool);
472 goto dev_err;
473 }
474
475
476 for_each_set_bit(i, &enables, RDMACG_RESOURCE_MAX)
477 set_resource_limit(rpool, i, new_limits[i]);
478
479 if (rpool->usage_sum == 0 &&
480 rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
481
482
483
484
485 free_cg_rpool_locked(rpool);
486 }
487
488dev_err:
489 mutex_unlock(&rdmacg_mutex);
490
491parse_err:
492 kfree(new_limits);
493
494err:
495 return ret ?: nbytes;
496}
497
498static void print_rpool_values(struct seq_file *sf,
499 struct rdmacg_resource_pool *rpool)
500{
501 enum rdmacg_file_type sf_type;
502 int i;
503 u32 value;
504
505 sf_type = seq_cft(sf)->private;
506
507 for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
508 seq_puts(sf, rdmacg_resource_names[i]);
509 seq_putc(sf, '=');
510 if (sf_type == RDMACG_RESOURCE_TYPE_MAX) {
511 if (rpool)
512 value = rpool->resources[i].max;
513 else
514 value = S32_MAX;
515 } else {
516 if (rpool)
517 value = rpool->resources[i].usage;
518 else
519 value = 0;
520 }
521
522 if (value == S32_MAX)
523 seq_puts(sf, RDMACG_MAX_STR);
524 else
525 seq_printf(sf, "%d", value);
526 seq_putc(sf, ' ');
527 }
528}
529
530static int rdmacg_resource_read(struct seq_file *sf, void *v)
531{
532 struct rdmacg_device *device;
533 struct rdmacg_resource_pool *rpool;
534 struct rdma_cgroup *cg = css_rdmacg(seq_css(sf));
535
536 mutex_lock(&rdmacg_mutex);
537
538 list_for_each_entry(device, &rdmacg_devices, dev_node) {
539 seq_printf(sf, "%s ", device->name);
540
541 rpool = find_cg_rpool_locked(cg, device);
542 print_rpool_values(sf, rpool);
543
544 seq_putc(sf, '\n');
545 }
546
547 mutex_unlock(&rdmacg_mutex);
548 return 0;
549}
550
551static struct cftype rdmacg_files[] = {
552 {
553 .name = "max",
554 .write = rdmacg_resource_set_max,
555 .seq_show = rdmacg_resource_read,
556 .private = RDMACG_RESOURCE_TYPE_MAX,
557 .flags = CFTYPE_NOT_ON_ROOT,
558 },
559 {
560 .name = "current",
561 .seq_show = rdmacg_resource_read,
562 .private = RDMACG_RESOURCE_TYPE_STAT,
563 .flags = CFTYPE_NOT_ON_ROOT,
564 },
565 { }
566};
567
568static struct cgroup_subsys_state *
569rdmacg_css_alloc(struct cgroup_subsys_state *parent)
570{
571 struct rdma_cgroup *cg;
572
573 cg = kzalloc(sizeof(*cg), GFP_KERNEL);
574 if (!cg)
575 return ERR_PTR(-ENOMEM);
576
577 INIT_LIST_HEAD(&cg->rpools);
578 return &cg->css;
579}
580
581static void rdmacg_css_free(struct cgroup_subsys_state *css)
582{
583 struct rdma_cgroup *cg = css_rdmacg(css);
584
585 kfree(cg);
586}
587
588
589
590
591
592
593
594
595
596
597static void rdmacg_css_offline(struct cgroup_subsys_state *css)
598{
599 struct rdma_cgroup *cg = css_rdmacg(css);
600 struct rdmacg_resource_pool *rpool;
601
602 mutex_lock(&rdmacg_mutex);
603
604 list_for_each_entry(rpool, &cg->rpools, cg_node)
605 set_all_resource_max_limit(rpool);
606
607 mutex_unlock(&rdmacg_mutex);
608}
609
610struct cgroup_subsys rdma_cgrp_subsys = {
611 .css_alloc = rdmacg_css_alloc,
612 .css_free = rdmacg_css_free,
613 .css_offline = rdmacg_css_offline,
614 .legacy_cftypes = rdmacg_files,
615 .dfl_cftypes = rdmacg_files,
616};
617