1
2
3
4
5
6
7
8
9
10
11#include <linux/bitops.h>
12#include <linux/slab.h>
13#include <linux/seq_file.h>
14#include <linux/cgroup.h>
15#include <linux/parser.h>
16#include <linux/cgroup_rdma.h>
17
18#define RDMACG_MAX_STR "max"
19
20
21
22
23
24static DEFINE_MUTEX(rdmacg_mutex);
25static LIST_HEAD(rdmacg_devices);
26
27enum rdmacg_file_type {
28 RDMACG_RESOURCE_TYPE_MAX,
29 RDMACG_RESOURCE_TYPE_STAT,
30};
31
32
33
34
35
36
37static char const *rdmacg_resource_names[] = {
38 [RDMACG_RESOURCE_HCA_HANDLE] = "hca_handle",
39 [RDMACG_RESOURCE_HCA_OBJECT] = "hca_object",
40};
41
42
43struct rdmacg_resource {
44 int max;
45 int usage;
46};
47
48
49
50
51
52
53
54struct rdmacg_resource_pool {
55 struct rdmacg_device *device;
56 struct rdmacg_resource resources[RDMACG_RESOURCE_MAX];
57
58 struct list_head cg_node;
59 struct list_head dev_node;
60
61
62 u64 usage_sum;
63
64 int num_max_cnt;
65};
66
67static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css)
68{
69 return container_of(css, struct rdma_cgroup, css);
70}
71
72static struct rdma_cgroup *parent_rdmacg(struct rdma_cgroup *cg)
73{
74 return css_rdmacg(cg->css.parent);
75}
76
77static inline struct rdma_cgroup *get_current_rdmacg(void)
78{
79 return css_rdmacg(task_get_css(current, rdma_cgrp_id));
80}
81
82static void set_resource_limit(struct rdmacg_resource_pool *rpool,
83 int index, int new_max)
84{
85 if (new_max == S32_MAX) {
86 if (rpool->resources[index].max != S32_MAX)
87 rpool->num_max_cnt++;
88 } else {
89 if (rpool->resources[index].max == S32_MAX)
90 rpool->num_max_cnt--;
91 }
92 rpool->resources[index].max = new_max;
93}
94
95static void set_all_resource_max_limit(struct rdmacg_resource_pool *rpool)
96{
97 int i;
98
99 for (i = 0; i < RDMACG_RESOURCE_MAX; i++)
100 set_resource_limit(rpool, i, S32_MAX);
101}
102
103static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool)
104{
105 lockdep_assert_held(&rdmacg_mutex);
106
107 list_del(&rpool->cg_node);
108 list_del(&rpool->dev_node);
109 kfree(rpool);
110}
111
112static struct rdmacg_resource_pool *
113find_cg_rpool_locked(struct rdma_cgroup *cg,
114 struct rdmacg_device *device)
115
116{
117 struct rdmacg_resource_pool *pool;
118
119 lockdep_assert_held(&rdmacg_mutex);
120
121 list_for_each_entry(pool, &cg->rpools, cg_node)
122 if (pool->device == device)
123 return pool;
124
125 return NULL;
126}
127
128static struct rdmacg_resource_pool *
129get_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device)
130{
131 struct rdmacg_resource_pool *rpool;
132
133 rpool = find_cg_rpool_locked(cg, device);
134 if (rpool)
135 return rpool;
136
137 rpool = kzalloc(sizeof(*rpool), GFP_KERNEL);
138 if (!rpool)
139 return ERR_PTR(-ENOMEM);
140
141 rpool->device = device;
142 set_all_resource_max_limit(rpool);
143
144 INIT_LIST_HEAD(&rpool->cg_node);
145 INIT_LIST_HEAD(&rpool->dev_node);
146 list_add_tail(&rpool->cg_node, &cg->rpools);
147 list_add_tail(&rpool->dev_node, &device->rpools);
148 return rpool;
149}
150
151
152
153
154
155
156
157
158
159
160
161static void
162uncharge_cg_locked(struct rdma_cgroup *cg,
163 struct rdmacg_device *device,
164 enum rdmacg_resource_type index)
165{
166 struct rdmacg_resource_pool *rpool;
167
168 rpool = find_cg_rpool_locked(cg, device);
169
170
171
172
173
174
175 if (unlikely(!rpool)) {
176 pr_warn("Invalid device %p or rdma cgroup %p\n", cg, device);
177 return;
178 }
179
180 rpool->resources[index].usage--;
181
182
183
184
185
186 WARN_ON_ONCE(rpool->resources[index].usage < 0);
187 rpool->usage_sum--;
188 if (rpool->usage_sum == 0 &&
189 rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
190
191
192
193
194 free_cg_rpool_locked(rpool);
195 }
196}
197
198
199
200
201
202
203
204
205static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg,
206 struct rdmacg_device *device,
207 struct rdma_cgroup *stop_cg,
208 enum rdmacg_resource_type index)
209{
210 struct rdma_cgroup *p;
211
212 mutex_lock(&rdmacg_mutex);
213
214 for (p = cg; p != stop_cg; p = parent_rdmacg(p))
215 uncharge_cg_locked(p, device, index);
216
217 mutex_unlock(&rdmacg_mutex);
218
219 css_put(&cg->css);
220}
221
222
223
224
225
226
227void rdmacg_uncharge(struct rdma_cgroup *cg,
228 struct rdmacg_device *device,
229 enum rdmacg_resource_type index)
230{
231 if (index >= RDMACG_RESOURCE_MAX)
232 return;
233
234 rdmacg_uncharge_hierarchy(cg, device, NULL, index);
235}
236EXPORT_SYMBOL(rdmacg_uncharge);
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259int rdmacg_try_charge(struct rdma_cgroup **rdmacg,
260 struct rdmacg_device *device,
261 enum rdmacg_resource_type index)
262{
263 struct rdma_cgroup *cg, *p;
264 struct rdmacg_resource_pool *rpool;
265 s64 new;
266 int ret = 0;
267
268 if (index >= RDMACG_RESOURCE_MAX)
269 return -EINVAL;
270
271
272
273
274
275 cg = get_current_rdmacg();
276
277 mutex_lock(&rdmacg_mutex);
278 for (p = cg; p; p = parent_rdmacg(p)) {
279 rpool = get_cg_rpool_locked(p, device);
280 if (IS_ERR(rpool)) {
281 ret = PTR_ERR(rpool);
282 goto err;
283 } else {
284 new = rpool->resources[index].usage + 1;
285 if (new > rpool->resources[index].max) {
286 ret = -EAGAIN;
287 goto err;
288 } else {
289 rpool->resources[index].usage = new;
290 rpool->usage_sum++;
291 }
292 }
293 }
294 mutex_unlock(&rdmacg_mutex);
295
296 *rdmacg = cg;
297 return 0;
298
299err:
300 mutex_unlock(&rdmacg_mutex);
301 rdmacg_uncharge_hierarchy(cg, device, p, index);
302 return ret;
303}
304EXPORT_SYMBOL(rdmacg_try_charge);
305
306
307
308
309
310
311
312
313
314void rdmacg_register_device(struct rdmacg_device *device)
315{
316 INIT_LIST_HEAD(&device->dev_node);
317 INIT_LIST_HEAD(&device->rpools);
318
319 mutex_lock(&rdmacg_mutex);
320 list_add_tail(&device->dev_node, &rdmacg_devices);
321 mutex_unlock(&rdmacg_mutex);
322}
323EXPORT_SYMBOL(rdmacg_register_device);
324
325
326
327
328
329
330
331
332
333
334void rdmacg_unregister_device(struct rdmacg_device *device)
335{
336 struct rdmacg_resource_pool *rpool, *tmp;
337
338
339
340
341
342 mutex_lock(&rdmacg_mutex);
343 list_del_init(&device->dev_node);
344
345
346
347
348
349 list_for_each_entry_safe(rpool, tmp, &device->rpools, dev_node)
350 free_cg_rpool_locked(rpool);
351
352 mutex_unlock(&rdmacg_mutex);
353}
354EXPORT_SYMBOL(rdmacg_unregister_device);
355
356static int parse_resource(char *c, int *intval)
357{
358 substring_t argstr;
359 char *name, *value = c;
360 size_t len;
361 int ret, i;
362
363 name = strsep(&value, "=");
364 if (!name || !value)
365 return -EINVAL;
366
367 i = match_string(rdmacg_resource_names, RDMACG_RESOURCE_MAX, name);
368 if (i < 0)
369 return i;
370
371 len = strlen(value);
372
373 argstr.from = value;
374 argstr.to = value + len;
375
376 ret = match_int(&argstr, intval);
377 if (ret >= 0) {
378 if (*intval < 0)
379 return -EINVAL;
380 return i;
381 }
382 if (strncmp(value, RDMACG_MAX_STR, len) == 0) {
383 *intval = S32_MAX;
384 return i;
385 }
386 return -EINVAL;
387}
388
389static int rdmacg_parse_limits(char *options,
390 int *new_limits, unsigned long *enables)
391{
392 char *c;
393 int err = -EINVAL;
394
395
396 while ((c = strsep(&options, " ")) != NULL) {
397 int index, intval;
398
399 index = parse_resource(c, &intval);
400 if (index < 0)
401 goto err;
402
403 new_limits[index] = intval;
404 *enables |= BIT(index);
405 }
406 return 0;
407
408err:
409 return err;
410}
411
412static struct rdmacg_device *rdmacg_get_device_locked(const char *name)
413{
414 struct rdmacg_device *device;
415
416 lockdep_assert_held(&rdmacg_mutex);
417
418 list_for_each_entry(device, &rdmacg_devices, dev_node)
419 if (!strcmp(name, device->name))
420 return device;
421
422 return NULL;
423}
424
425static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of,
426 char *buf, size_t nbytes, loff_t off)
427{
428 struct rdma_cgroup *cg = css_rdmacg(of_css(of));
429 const char *dev_name;
430 struct rdmacg_resource_pool *rpool;
431 struct rdmacg_device *device;
432 char *options = strstrip(buf);
433 int *new_limits;
434 unsigned long enables = 0;
435 int i = 0, ret = 0;
436
437
438 dev_name = strsep(&options, " ");
439 if (!dev_name) {
440 ret = -EINVAL;
441 goto err;
442 }
443
444 new_limits = kcalloc(RDMACG_RESOURCE_MAX, sizeof(int), GFP_KERNEL);
445 if (!new_limits) {
446 ret = -ENOMEM;
447 goto err;
448 }
449
450 ret = rdmacg_parse_limits(options, new_limits, &enables);
451 if (ret)
452 goto parse_err;
453
454
455 mutex_lock(&rdmacg_mutex);
456
457 device = rdmacg_get_device_locked(dev_name);
458 if (!device) {
459 ret = -ENODEV;
460 goto dev_err;
461 }
462
463 rpool = get_cg_rpool_locked(cg, device);
464 if (IS_ERR(rpool)) {
465 ret = PTR_ERR(rpool);
466 goto dev_err;
467 }
468
469
470 for_each_set_bit(i, &enables, RDMACG_RESOURCE_MAX)
471 set_resource_limit(rpool, i, new_limits[i]);
472
473 if (rpool->usage_sum == 0 &&
474 rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
475
476
477
478
479 free_cg_rpool_locked(rpool);
480 }
481
482dev_err:
483 mutex_unlock(&rdmacg_mutex);
484
485parse_err:
486 kfree(new_limits);
487
488err:
489 return ret ?: nbytes;
490}
491
492static void print_rpool_values(struct seq_file *sf,
493 struct rdmacg_resource_pool *rpool)
494{
495 enum rdmacg_file_type sf_type;
496 int i;
497 u32 value;
498
499 sf_type = seq_cft(sf)->private;
500
501 for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
502 seq_puts(sf, rdmacg_resource_names[i]);
503 seq_putc(sf, '=');
504 if (sf_type == RDMACG_RESOURCE_TYPE_MAX) {
505 if (rpool)
506 value = rpool->resources[i].max;
507 else
508 value = S32_MAX;
509 } else {
510 if (rpool)
511 value = rpool->resources[i].usage;
512 else
513 value = 0;
514 }
515
516 if (value == S32_MAX)
517 seq_puts(sf, RDMACG_MAX_STR);
518 else
519 seq_printf(sf, "%d", value);
520 seq_putc(sf, ' ');
521 }
522}
523
524static int rdmacg_resource_read(struct seq_file *sf, void *v)
525{
526 struct rdmacg_device *device;
527 struct rdmacg_resource_pool *rpool;
528 struct rdma_cgroup *cg = css_rdmacg(seq_css(sf));
529
530 mutex_lock(&rdmacg_mutex);
531
532 list_for_each_entry(device, &rdmacg_devices, dev_node) {
533 seq_printf(sf, "%s ", device->name);
534
535 rpool = find_cg_rpool_locked(cg, device);
536 print_rpool_values(sf, rpool);
537
538 seq_putc(sf, '\n');
539 }
540
541 mutex_unlock(&rdmacg_mutex);
542 return 0;
543}
544
545static struct cftype rdmacg_files[] = {
546 {
547 .name = "max",
548 .write = rdmacg_resource_set_max,
549 .seq_show = rdmacg_resource_read,
550 .private = RDMACG_RESOURCE_TYPE_MAX,
551 .flags = CFTYPE_NOT_ON_ROOT,
552 },
553 {
554 .name = "current",
555 .seq_show = rdmacg_resource_read,
556 .private = RDMACG_RESOURCE_TYPE_STAT,
557 .flags = CFTYPE_NOT_ON_ROOT,
558 },
559 { }
560};
561
562static struct cgroup_subsys_state *
563rdmacg_css_alloc(struct cgroup_subsys_state *parent)
564{
565 struct rdma_cgroup *cg;
566
567 cg = kzalloc(sizeof(*cg), GFP_KERNEL);
568 if (!cg)
569 return ERR_PTR(-ENOMEM);
570
571 INIT_LIST_HEAD(&cg->rpools);
572 return &cg->css;
573}
574
575static void rdmacg_css_free(struct cgroup_subsys_state *css)
576{
577 struct rdma_cgroup *cg = css_rdmacg(css);
578
579 kfree(cg);
580}
581
582
583
584
585
586
587
588
589
590
591static void rdmacg_css_offline(struct cgroup_subsys_state *css)
592{
593 struct rdma_cgroup *cg = css_rdmacg(css);
594 struct rdmacg_resource_pool *rpool;
595
596 mutex_lock(&rdmacg_mutex);
597
598 list_for_each_entry(rpool, &cg->rpools, cg_node)
599 set_all_resource_max_limit(rpool);
600
601 mutex_unlock(&rdmacg_mutex);
602}
603
604struct cgroup_subsys rdma_cgrp_subsys = {
605 .css_alloc = rdmacg_css_alloc,
606 .css_free = rdmacg_css_free,
607 .css_offline = rdmacg_css_offline,
608 .legacy_cftypes = rdmacg_files,
609 .dfl_cftypes = rdmacg_files,
610};
611