linux/net/netfilter/ipvs/ip_vs_sh.c
<<
>>
Prefs
   1/*
   2 * IPVS:        Source Hashing scheduling module
   3 *
   4 * Authors:     Wensong Zhang <wensong@gnuchina.org>
   5 *
   6 *              This program is free software; you can redistribute it and/or
   7 *              modify it under the terms of the GNU General Public License
   8 *              as published by the Free Software Foundation; either version
   9 *              2 of the License, or (at your option) any later version.
  10 *
  11 * Changes:
  12 *
  13 */
  14
  15/*
  16 * The sh algorithm is to select server by the hash key of source IP
  17 * address. The pseudo code is as follows:
  18 *
  19 *       n <- servernode[src_ip];
  20 *       if (n is dead) OR
  21 *          (n is overloaded) or (n.weight <= 0) then
  22 *                 return NULL;
  23 *
  24 *       return n;
  25 *
  26 * Notes that servernode is a 256-bucket hash table that maps the hash
  27 * index derived from packet source IP address to the current server
  28 * array. If the sh scheduler is used in cache cluster, it is good to
  29 * combine it with cache_bypass feature. When the statically assigned
  30 * server is dead or overloaded, the load balancer can bypass the cache
  31 * server and send requests to the original server directly.
  32 *
  33 */
  34
  35#define KMSG_COMPONENT "IPVS"
  36#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  37
  38#include <linux/ip.h>
  39#include <linux/module.h>
  40#include <linux/kernel.h>
  41#include <linux/skbuff.h>
  42
  43#include <net/ip_vs.h>
  44
  45
  46/*
  47 *      IPVS SH bucket
  48 */
  49struct ip_vs_sh_bucket {
  50        struct ip_vs_dest       *dest;          /* real server (cache) */
  51};
  52
  53/*
  54 *     for IPVS SH entry hash table
  55 */
  56#ifndef CONFIG_IP_VS_SH_TAB_BITS
  57#define CONFIG_IP_VS_SH_TAB_BITS        8
  58#endif
  59#define IP_VS_SH_TAB_BITS               CONFIG_IP_VS_SH_TAB_BITS
  60#define IP_VS_SH_TAB_SIZE               (1 << IP_VS_SH_TAB_BITS)
  61#define IP_VS_SH_TAB_MASK               (IP_VS_SH_TAB_SIZE - 1)
  62
  63
  64/*
  65 *      Returns hash value for IPVS SH entry
  66 */
  67static inline unsigned ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr)
  68{
  69        __be32 addr_fold = addr->ip;
  70
  71#ifdef CONFIG_IP_VS_IPV6
  72        if (af == AF_INET6)
  73                addr_fold = addr->ip6[0]^addr->ip6[1]^
  74                            addr->ip6[2]^addr->ip6[3];
  75#endif
  76        return (ntohl(addr_fold)*2654435761UL) & IP_VS_SH_TAB_MASK;
  77}
  78
  79
  80/*
  81 *      Get ip_vs_dest associated with supplied parameters.
  82 */
  83static inline struct ip_vs_dest *
  84ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl,
  85             const union nf_inet_addr *addr)
  86{
  87        return (tbl[ip_vs_sh_hashkey(af, addr)]).dest;
  88}
  89
  90
  91/*
  92 *      Assign all the hash buckets of the specified table with the service.
  93 */
  94static int
  95ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
  96{
  97        int i;
  98        struct ip_vs_sh_bucket *b;
  99        struct list_head *p;
 100        struct ip_vs_dest *dest;
 101
 102        b = tbl;
 103        p = &svc->destinations;
 104        for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
 105                if (list_empty(p)) {
 106                        b->dest = NULL;
 107                } else {
 108                        if (p == &svc->destinations)
 109                                p = p->next;
 110
 111                        dest = list_entry(p, struct ip_vs_dest, n_list);
 112                        atomic_inc(&dest->refcnt);
 113                        b->dest = dest;
 114
 115                        p = p->next;
 116                }
 117                b++;
 118        }
 119        return 0;
 120}
 121
 122
 123/*
 124 *      Flush all the hash buckets of the specified table.
 125 */
 126static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
 127{
 128        int i;
 129        struct ip_vs_sh_bucket *b;
 130
 131        b = tbl;
 132        for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
 133                if (b->dest) {
 134                        atomic_dec(&b->dest->refcnt);
 135                        b->dest = NULL;
 136                }
 137                b++;
 138        }
 139}
 140
 141
 142static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
 143{
 144        struct ip_vs_sh_bucket *tbl;
 145
 146        /* allocate the SH table for this service */
 147        tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
 148                      GFP_ATOMIC);
 149        if (tbl == NULL) {
 150                pr_err("%s(): no memory\n", __func__);
 151                return -ENOMEM;
 152        }
 153        svc->sched_data = tbl;
 154        IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
 155                  "current service\n",
 156                  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
 157
 158        /* assign the hash buckets with the updated service */
 159        ip_vs_sh_assign(tbl, svc);
 160
 161        return 0;
 162}
 163
 164
 165static int ip_vs_sh_done_svc(struct ip_vs_service *svc)
 166{
 167        struct ip_vs_sh_bucket *tbl = svc->sched_data;
 168
 169        /* got to clean up hash buckets here */
 170        ip_vs_sh_flush(tbl);
 171
 172        /* release the table itself */
 173        kfree(svc->sched_data);
 174        IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
 175                  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
 176
 177        return 0;
 178}
 179
 180
 181static int ip_vs_sh_update_svc(struct ip_vs_service *svc)
 182{
 183        struct ip_vs_sh_bucket *tbl = svc->sched_data;
 184
 185        /* got to clean up hash buckets here */
 186        ip_vs_sh_flush(tbl);
 187
 188        /* assign the hash buckets with the updated service */
 189        ip_vs_sh_assign(tbl, svc);
 190
 191        return 0;
 192}
 193
 194
 195/*
 196 *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
 197 *      consider that the server is overloaded here.
 198 */
 199static inline int is_overloaded(struct ip_vs_dest *dest)
 200{
 201        return dest->flags & IP_VS_DEST_F_OVERLOAD;
 202}
 203
 204
 205/*
 206 *      Source Hashing scheduling
 207 */
 208static struct ip_vs_dest *
 209ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 210{
 211        struct ip_vs_dest *dest;
 212        struct ip_vs_sh_bucket *tbl;
 213        struct ip_vs_iphdr iph;
 214
 215        ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 216
 217        IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
 218
 219        tbl = (struct ip_vs_sh_bucket *)svc->sched_data;
 220        dest = ip_vs_sh_get(svc->af, tbl, &iph.saddr);
 221        if (!dest
 222            || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
 223            || atomic_read(&dest->weight) <= 0
 224            || is_overloaded(dest)) {
 225                IP_VS_ERR_RL("SH: no destination available\n");
 226                return NULL;
 227        }
 228
 229        IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
 230                      IP_VS_DBG_ADDR(svc->af, &iph.saddr),
 231                      IP_VS_DBG_ADDR(svc->af, &dest->addr),
 232                      ntohs(dest->port));
 233
 234        return dest;
 235}
 236
 237
 238/*
 239 *      IPVS SH Scheduler structure
 240 */
 241static struct ip_vs_scheduler ip_vs_sh_scheduler =
 242{
 243        .name =                 "sh",
 244        .refcnt =               ATOMIC_INIT(0),
 245        .module =               THIS_MODULE,
 246        .n_list  =              LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
 247        .init_service =         ip_vs_sh_init_svc,
 248        .done_service =         ip_vs_sh_done_svc,
 249        .update_service =       ip_vs_sh_update_svc,
 250        .schedule =             ip_vs_sh_schedule,
 251};
 252
 253
 254static int __init ip_vs_sh_init(void)
 255{
 256        return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
 257}
 258
 259
 260static void __exit ip_vs_sh_cleanup(void)
 261{
 262        unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
 263}
 264
 265
 266module_init(ip_vs_sh_init);
 267module_exit(ip_vs_sh_cleanup);
 268MODULE_LICENSE("GPL");
 269