linux/net/ipv4/tcp_memcontrol.c
<<
>>
Prefs
   1#include <net/tcp.h>
   2#include <net/tcp_memcontrol.h>
   3#include <net/sock.h>
   4#include <net/ip.h>
   5#include <linux/nsproxy.h>
   6#include <linux/memcontrol.h>
   7#include <linux/module.h>
   8
   9static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
  10{
  11        return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
  12}
  13
  14static void memcg_tcp_enter_memory_pressure(struct sock *sk)
  15{
  16        if (sk->sk_cgrp->memory_pressure)
  17                *sk->sk_cgrp->memory_pressure = 1;
  18}
  19EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
  20
  21int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
  22{
  23        /*
  24         * The root cgroup does not use page_counters, but rather,
  25         * rely on the data already collected by the network
  26         * subsystem
  27         */
  28        struct tcp_memcontrol *tcp;
  29        struct mem_cgroup *parent = parent_mem_cgroup(memcg);
  30        struct net *net = current->nsproxy->net_ns;
  31        struct page_counter *counter_parent = NULL;
  32        struct cg_proto *cg_proto, *parent_cg;
  33
  34        cg_proto = tcp_prot.proto_cgroup(memcg);
  35        if (!cg_proto)
  36                return 0;
  37
  38        tcp = tcp_from_cgproto(cg_proto);
  39
  40        tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0];
  41        tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1];
  42        tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2];
  43        tcp->tcp_memory_pressure = 0;
  44
  45        parent_cg = tcp_prot.proto_cgroup(parent);
  46        if (parent_cg)
  47                counter_parent = parent_cg->memory_allocated;
  48
  49        page_counter_init(&tcp->tcp_memory_allocated, counter_parent);
  50        percpu_counter_init(&tcp->tcp_sockets_allocated, 0, GFP_KERNEL);
  51
  52        cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure;
  53        cg_proto->memory_pressure = &tcp->tcp_memory_pressure;
  54        cg_proto->sysctl_mem = tcp->tcp_prot_mem;
  55        cg_proto->memory_allocated = &tcp->tcp_memory_allocated;
  56        cg_proto->memcg = memcg;
  57
  58        return 0;
  59}
  60EXPORT_SYMBOL(tcp_init_cgroup);
  61
  62void tcp_destroy_cgroup(struct mem_cgroup *memcg)
  63{
  64        struct cg_proto *cg_proto;
  65        struct tcp_memcontrol *tcp;
  66
  67        cg_proto = tcp_prot.proto_cgroup(memcg);
  68        if (!cg_proto)
  69                return;
  70
  71        tcp = tcp_from_cgproto(cg_proto);
  72        percpu_counter_destroy(&tcp->tcp_sockets_allocated);
  73}
  74EXPORT_SYMBOL(tcp_destroy_cgroup);
  75
  76static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
  77{
  78        struct net *net = current->nsproxy->net_ns;
  79        struct tcp_memcontrol *tcp;
  80        struct cg_proto *cg_proto;
  81        int i;
  82        int ret;
  83
  84        cg_proto = tcp_prot.proto_cgroup(memcg);
  85        if (!cg_proto)
  86                return -EINVAL;
  87
  88        tcp = tcp_from_cgproto(cg_proto);
  89
  90        ret = page_counter_limit(&tcp->tcp_memory_allocated, nr_pages);
  91        if (ret)
  92                return ret;
  93
  94        for (i = 0; i < 3; i++)
  95                tcp->tcp_prot_mem[i] = min_t(long, nr_pages,
  96                                             net->ipv4.sysctl_tcp_mem[i]);
  97
  98        if (nr_pages == PAGE_COUNTER_MAX)
  99                clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
 100        else {
 101                /*
 102                 * The active bit needs to be written after the static_key
 103                 * update. This is what guarantees that the socket activation
 104                 * function is the last one to run. See sock_update_memcg() for
 105                 * details, and note that we don't mark any socket as belonging
 106                 * to this memcg until that flag is up.
 107                 *
 108                 * We need to do this, because static_keys will span multiple
 109                 * sites, but we can't control their order. If we mark a socket
 110                 * as accounted, but the accounting functions are not patched in
 111                 * yet, we'll lose accounting.
 112                 *
 113                 * We never race with the readers in sock_update_memcg(),
 114                 * because when this value change, the code to process it is not
 115                 * patched in yet.
 116                 *
 117                 * The activated bit is used to guarantee that no two writers
 118                 * will do the update in the same memcg. Without that, we can't
 119                 * properly shutdown the static key.
 120                 */
 121                if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
 122                        static_key_slow_inc(&memcg_socket_limit_enabled);
 123                set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
 124        }
 125
 126        return 0;
 127}
 128
 129enum {
 130        RES_USAGE,
 131        RES_LIMIT,
 132        RES_MAX_USAGE,
 133        RES_FAILCNT,
 134};
 135
 136static DEFINE_MUTEX(tcp_limit_mutex);
 137
 138static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft,
 139                            const char *buffer)
 140{
 141        struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 142        unsigned long nr_pages;
 143        int ret = 0;
 144
 145        switch (cft->private) {
 146        case RES_LIMIT:
 147                /* see memcontrol.c */
 148                ret = page_counter_memparse(buffer, &nr_pages);
 149                if (ret)
 150                        break;
 151                mutex_lock(&tcp_limit_mutex);
 152                ret = tcp_update_limit(memcg, nr_pages);
 153                mutex_unlock(&tcp_limit_mutex);
 154                break;
 155        default:
 156                ret = -EINVAL;
 157                break;
 158        }
 159        return ret;
 160}
 161
 162static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft)
 163{
 164        struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 165        struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg);
 166        u64 val;
 167
 168        switch (cft->private) {
 169        case RES_LIMIT:
 170                if (!cg_proto)
 171                        return PAGE_COUNTER_MAX;
 172                val = cg_proto->memory_allocated->limit;
 173                val *= PAGE_SIZE;
 174                break;
 175        case RES_USAGE:
 176                if (!cg_proto)
 177                        val = atomic_long_read(&tcp_memory_allocated);
 178                else
 179                        val = page_counter_read(cg_proto->memory_allocated);
 180                val *= PAGE_SIZE;
 181                break;
 182        case RES_FAILCNT:
 183                if (!cg_proto)
 184                        return 0;
 185                val = cg_proto->memory_allocated->failcnt;
 186                break;
 187        case RES_MAX_USAGE:
 188                if (!cg_proto)
 189                        return 0;
 190                val = cg_proto->memory_allocated->watermark;
 191                val *= PAGE_SIZE;
 192                break;
 193        default:
 194                BUG();
 195        }
 196        return val;
 197}
 198
 199static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event)
 200{
 201        struct mem_cgroup *memcg;
 202        struct tcp_memcontrol *tcp;
 203        struct cg_proto *cg_proto;
 204
 205        memcg = mem_cgroup_from_cont(cont);
 206        cg_proto = tcp_prot.proto_cgroup(memcg);
 207        if (!cg_proto)
 208                return 0;
 209        tcp = tcp_from_cgproto(cg_proto);
 210
 211        switch (event) {
 212        case RES_MAX_USAGE:
 213                page_counter_reset_watermark(&tcp->tcp_memory_allocated);
 214                break;
 215        case RES_FAILCNT:
 216                tcp->tcp_memory_allocated.failcnt = 0;
 217                break;
 218        }
 219
 220        return 0;
 221}
 222
 223void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx)
 224{
 225        struct tcp_memcontrol *tcp;
 226        struct cg_proto *cg_proto;
 227
 228        cg_proto = tcp_prot.proto_cgroup(memcg);
 229        if (!cg_proto)
 230                return;
 231
 232        tcp = tcp_from_cgproto(cg_proto);
 233
 234        tcp->tcp_prot_mem[idx] = val;
 235}
 236
 237static struct cftype tcp_files[] = {
 238        {
 239                .name = "kmem.tcp.limit_in_bytes",
 240                .write_string = tcp_cgroup_write,
 241                .read_u64 = tcp_cgroup_read,
 242                .private = RES_LIMIT,
 243        },
 244        {
 245                .name = "kmem.tcp.usage_in_bytes",
 246                .read_u64 = tcp_cgroup_read,
 247                .private = RES_USAGE,
 248        },
 249        {
 250                .name = "kmem.tcp.failcnt",
 251                .private = RES_FAILCNT,
 252                .trigger = tcp_cgroup_reset,
 253                .read_u64 = tcp_cgroup_read,
 254        },
 255        {
 256                .name = "kmem.tcp.max_usage_in_bytes",
 257                .private = RES_MAX_USAGE,
 258                .trigger = tcp_cgroup_reset,
 259                .read_u64 = tcp_cgroup_read,
 260        },
 261        { }     /* terminate */
 262};
 263
 264static int __init tcp_memcontrol_init(void)
 265{
 266        WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, tcp_files));
 267        return 0;
 268}
 269__initcall(tcp_memcontrol_init);
 270