linux/net/xfrm/xfrm_state.c
<<
>>
Prefs
   1/*
   2 * xfrm_state.c
   3 *
   4 * Changes:
   5 *      Mitsuru KANDA @USAGI
   6 *      Kazunori MIYAZAWA @USAGI
   7 *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
   8 *              IPv6 support
   9 *      YOSHIFUJI Hideaki @USAGI
  10 *              Split up af-specific functions
  11 *      Derek Atkins <derek@ihtfp.com>
  12 *              Add UDP Encapsulation
  13 *
  14 */
  15
  16#include <linux/workqueue.h>
  17#include <net/xfrm.h>
  18#include <linux/pfkeyv2.h>
  19#include <linux/ipsec.h>
  20#include <linux/module.h>
  21#include <linux/cache.h>
  22#include <linux/audit.h>
  23#include <linux/uaccess.h>
  24#include <linux/ktime.h>
  25#include <linux/slab.h>
  26#include <linux/interrupt.h>
  27#include <linux/kernel.h>
  28
  29#include "xfrm_hash.h"
  30
  31#define xfrm_state_deref_prot(table, net) \
  32        rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
  33
  34static void xfrm_state_gc_task(struct work_struct *work);
  35
  36/* Each xfrm_state may be linked to two tables:
  37
  38   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
  39   2. Hash table by (daddr,family,reqid) to find what SAs exist for given
  40      destination/tunnel endpoint. (output)
  41 */
  42
  43static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
  44static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
  45
  46static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
  47static HLIST_HEAD(xfrm_state_gc_list);
  48
  49static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
  50{
  51        return refcount_inc_not_zero(&x->refcnt);
  52}
  53
  54static inline unsigned int xfrm_dst_hash(struct net *net,
  55                                         const xfrm_address_t *daddr,
  56                                         const xfrm_address_t *saddr,
  57                                         u32 reqid,
  58                                         unsigned short family)
  59{
  60        return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask);
  61}
  62
  63static inline unsigned int xfrm_src_hash(struct net *net,
  64                                         const xfrm_address_t *daddr,
  65                                         const xfrm_address_t *saddr,
  66                                         unsigned short family)
  67{
  68        return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);
  69}
  70
  71static inline unsigned int
  72xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
  73              __be32 spi, u8 proto, unsigned short family)
  74{
  75        return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
  76}
  77
  78static void xfrm_hash_transfer(struct hlist_head *list,
  79                               struct hlist_head *ndsttable,
  80                               struct hlist_head *nsrctable,
  81                               struct hlist_head *nspitable,
  82                               unsigned int nhashmask)
  83{
  84        struct hlist_node *tmp;
  85        struct xfrm_state *x;
  86
  87        hlist_for_each_entry_safe(x, tmp, list, bydst) {
  88                unsigned int h;
  89
  90                h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
  91                                    x->props.reqid, x->props.family,
  92                                    nhashmask);
  93                hlist_add_head_rcu(&x->bydst, ndsttable + h);
  94
  95                h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
  96                                    x->props.family,
  97                                    nhashmask);
  98                hlist_add_head_rcu(&x->bysrc, nsrctable + h);
  99
 100                if (x->id.spi) {
 101                        h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
 102                                            x->id.proto, x->props.family,
 103                                            nhashmask);
 104                        hlist_add_head_rcu(&x->byspi, nspitable + h);
 105                }
 106        }
 107}
 108
 109static unsigned long xfrm_hash_new_size(unsigned int state_hmask)
 110{
 111        return ((state_hmask + 1) << 1) * sizeof(struct hlist_head);
 112}
 113
 114static void xfrm_hash_resize(struct work_struct *work)
 115{
 116        struct net *net = container_of(work, struct net, xfrm.state_hash_work);
 117        struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
 118        unsigned long nsize, osize;
 119        unsigned int nhashmask, ohashmask;
 120        int i;
 121
 122        nsize = xfrm_hash_new_size(net->xfrm.state_hmask);
 123        ndst = xfrm_hash_alloc(nsize);
 124        if (!ndst)
 125                return;
 126        nsrc = xfrm_hash_alloc(nsize);
 127        if (!nsrc) {
 128                xfrm_hash_free(ndst, nsize);
 129                return;
 130        }
 131        nspi = xfrm_hash_alloc(nsize);
 132        if (!nspi) {
 133                xfrm_hash_free(ndst, nsize);
 134                xfrm_hash_free(nsrc, nsize);
 135                return;
 136        }
 137
 138        spin_lock_bh(&net->xfrm.xfrm_state_lock);
 139        write_seqcount_begin(&xfrm_state_hash_generation);
 140
 141        nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
 142        odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
 143        for (i = net->xfrm.state_hmask; i >= 0; i--)
 144                xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
 145
 146        osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
 147        ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
 148        ohashmask = net->xfrm.state_hmask;
 149
 150        rcu_assign_pointer(net->xfrm.state_bydst, ndst);
 151        rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
 152        rcu_assign_pointer(net->xfrm.state_byspi, nspi);
 153        net->xfrm.state_hmask = nhashmask;
 154
 155        write_seqcount_end(&xfrm_state_hash_generation);
 156        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 157
 158        osize = (ohashmask + 1) * sizeof(struct hlist_head);
 159
 160        synchronize_rcu();
 161
 162        xfrm_hash_free(odst, osize);
 163        xfrm_hash_free(osrc, osize);
 164        xfrm_hash_free(ospi, osize);
 165}
 166
 167static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
 168static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO];
 169
 170static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 171
 172int __xfrm_state_delete(struct xfrm_state *x);
 173
 174int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
 175bool km_is_alive(const struct km_event *c);
 176void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
 177
 178static DEFINE_SPINLOCK(xfrm_type_lock);
 179int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
 180{
 181        struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 182        const struct xfrm_type **typemap;
 183        int err = 0;
 184
 185        if (unlikely(afinfo == NULL))
 186                return -EAFNOSUPPORT;
 187        typemap = afinfo->type_map;
 188        spin_lock_bh(&xfrm_type_lock);
 189
 190        if (likely(typemap[type->proto] == NULL))
 191                typemap[type->proto] = type;
 192        else
 193                err = -EEXIST;
 194        spin_unlock_bh(&xfrm_type_lock);
 195        rcu_read_unlock();
 196        return err;
 197}
 198EXPORT_SYMBOL(xfrm_register_type);
 199
 200int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
 201{
 202        struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 203        const struct xfrm_type **typemap;
 204        int err = 0;
 205
 206        if (unlikely(afinfo == NULL))
 207                return -EAFNOSUPPORT;
 208        typemap = afinfo->type_map;
 209        spin_lock_bh(&xfrm_type_lock);
 210
 211        if (unlikely(typemap[type->proto] != type))
 212                err = -ENOENT;
 213        else
 214                typemap[type->proto] = NULL;
 215        spin_unlock_bh(&xfrm_type_lock);
 216        rcu_read_unlock();
 217        return err;
 218}
 219EXPORT_SYMBOL(xfrm_unregister_type);
 220
 221static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
 222{
 223        struct xfrm_state_afinfo *afinfo;
 224        const struct xfrm_type **typemap;
 225        const struct xfrm_type *type;
 226        int modload_attempted = 0;
 227
 228retry:
 229        afinfo = xfrm_state_get_afinfo(family);
 230        if (unlikely(afinfo == NULL))
 231                return NULL;
 232        typemap = afinfo->type_map;
 233
 234        type = READ_ONCE(typemap[proto]);
 235        if (unlikely(type && !try_module_get(type->owner)))
 236                type = NULL;
 237
 238        rcu_read_unlock();
 239
 240        if (!type && !modload_attempted) {
 241                request_module("xfrm-type-%d-%d", family, proto);
 242                modload_attempted = 1;
 243                goto retry;
 244        }
 245
 246        return type;
 247}
 248
 249static void xfrm_put_type(const struct xfrm_type *type)
 250{
 251        module_put(type->owner);
 252}
 253
 254static DEFINE_SPINLOCK(xfrm_type_offload_lock);
 255int xfrm_register_type_offload(const struct xfrm_type_offload *type,
 256                               unsigned short family)
 257{
 258        struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 259        const struct xfrm_type_offload **typemap;
 260        int err = 0;
 261
 262        if (unlikely(afinfo == NULL))
 263                return -EAFNOSUPPORT;
 264        typemap = afinfo->type_offload_map;
 265        spin_lock_bh(&xfrm_type_offload_lock);
 266
 267        if (likely(typemap[type->proto] == NULL))
 268                typemap[type->proto] = type;
 269        else
 270                err = -EEXIST;
 271        spin_unlock_bh(&xfrm_type_offload_lock);
 272        rcu_read_unlock();
 273        return err;
 274}
 275EXPORT_SYMBOL(xfrm_register_type_offload);
 276
 277int xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
 278                                 unsigned short family)
 279{
 280        struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 281        const struct xfrm_type_offload **typemap;
 282        int err = 0;
 283
 284        if (unlikely(afinfo == NULL))
 285                return -EAFNOSUPPORT;
 286        typemap = afinfo->type_offload_map;
 287        spin_lock_bh(&xfrm_type_offload_lock);
 288
 289        if (unlikely(typemap[type->proto] != type))
 290                err = -ENOENT;
 291        else
 292                typemap[type->proto] = NULL;
 293        spin_unlock_bh(&xfrm_type_offload_lock);
 294        rcu_read_unlock();
 295        return err;
 296}
 297EXPORT_SYMBOL(xfrm_unregister_type_offload);
 298
 299static const struct xfrm_type_offload *xfrm_get_type_offload(u8 proto, unsigned short family)
 300{
 301        struct xfrm_state_afinfo *afinfo;
 302        const struct xfrm_type_offload **typemap;
 303        const struct xfrm_type_offload *type;
 304
 305        afinfo = xfrm_state_get_afinfo(family);
 306        if (unlikely(afinfo == NULL))
 307                return NULL;
 308        typemap = afinfo->type_offload_map;
 309
 310        type = typemap[proto];
 311        if ((type && !try_module_get(type->owner)))
 312                type = NULL;
 313
 314        rcu_read_unlock();
 315        return type;
 316}
 317
 318static void xfrm_put_type_offload(const struct xfrm_type_offload *type)
 319{
 320        module_put(type->owner);
 321}
 322
 323static DEFINE_SPINLOCK(xfrm_mode_lock);
 324int xfrm_register_mode(struct xfrm_mode *mode, int family)
 325{
 326        struct xfrm_state_afinfo *afinfo;
 327        struct xfrm_mode **modemap;
 328        int err;
 329
 330        if (unlikely(mode->encap >= XFRM_MODE_MAX))
 331                return -EINVAL;
 332
 333        afinfo = xfrm_state_get_afinfo(family);
 334        if (unlikely(afinfo == NULL))
 335                return -EAFNOSUPPORT;
 336
 337        err = -EEXIST;
 338        modemap = afinfo->mode_map;
 339        spin_lock_bh(&xfrm_mode_lock);
 340        if (modemap[mode->encap])
 341                goto out;
 342
 343        err = -ENOENT;
 344        if (!try_module_get(afinfo->owner))
 345                goto out;
 346
 347        mode->afinfo = afinfo;
 348        modemap[mode->encap] = mode;
 349        err = 0;
 350
 351out:
 352        spin_unlock_bh(&xfrm_mode_lock);
 353        rcu_read_unlock();
 354        return err;
 355}
 356EXPORT_SYMBOL(xfrm_register_mode);
 357
 358int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
 359{
 360        struct xfrm_state_afinfo *afinfo;
 361        struct xfrm_mode **modemap;
 362        int err;
 363
 364        if (unlikely(mode->encap >= XFRM_MODE_MAX))
 365                return -EINVAL;
 366
 367        afinfo = xfrm_state_get_afinfo(family);
 368        if (unlikely(afinfo == NULL))
 369                return -EAFNOSUPPORT;
 370
 371        err = -ENOENT;
 372        modemap = afinfo->mode_map;
 373        spin_lock_bh(&xfrm_mode_lock);
 374        if (likely(modemap[mode->encap] == mode)) {
 375                modemap[mode->encap] = NULL;
 376                module_put(mode->afinfo->owner);
 377                err = 0;
 378        }
 379
 380        spin_unlock_bh(&xfrm_mode_lock);
 381        rcu_read_unlock();
 382        return err;
 383}
 384EXPORT_SYMBOL(xfrm_unregister_mode);
 385
 386static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
 387{
 388        struct xfrm_state_afinfo *afinfo;
 389        struct xfrm_mode *mode;
 390        int modload_attempted = 0;
 391
 392        if (unlikely(encap >= XFRM_MODE_MAX))
 393                return NULL;
 394
 395retry:
 396        afinfo = xfrm_state_get_afinfo(family);
 397        if (unlikely(afinfo == NULL))
 398                return NULL;
 399
 400        mode = READ_ONCE(afinfo->mode_map[encap]);
 401        if (unlikely(mode && !try_module_get(mode->owner)))
 402                mode = NULL;
 403
 404        rcu_read_unlock();
 405        if (!mode && !modload_attempted) {
 406                request_module("xfrm-mode-%d-%d", family, encap);
 407                modload_attempted = 1;
 408                goto retry;
 409        }
 410
 411        return mode;
 412}
 413
 414static void xfrm_put_mode(struct xfrm_mode *mode)
 415{
 416        module_put(mode->owner);
 417}
 418
 419static void xfrm_state_gc_destroy(struct xfrm_state *x)
 420{
 421        tasklet_hrtimer_cancel(&x->mtimer);
 422        del_timer_sync(&x->rtimer);
 423        kfree(x->aead);
 424        kfree(x->aalg);
 425        kfree(x->ealg);
 426        kfree(x->calg);
 427        kfree(x->encap);
 428        kfree(x->coaddr);
 429        kfree(x->replay_esn);
 430        kfree(x->preplay_esn);
 431        if (x->inner_mode)
 432                xfrm_put_mode(x->inner_mode);
 433        if (x->inner_mode_iaf)
 434                xfrm_put_mode(x->inner_mode_iaf);
 435        if (x->outer_mode)
 436                xfrm_put_mode(x->outer_mode);
 437        if (x->type_offload)
 438                xfrm_put_type_offload(x->type_offload);
 439        if (x->type) {
 440                x->type->destructor(x);
 441                xfrm_put_type(x->type);
 442        }
 443        xfrm_dev_state_free(x);
 444        security_xfrm_state_free(x);
 445        kfree(x);
 446}
 447
 448static void xfrm_state_gc_task(struct work_struct *work)
 449{
 450        struct xfrm_state *x;
 451        struct hlist_node *tmp;
 452        struct hlist_head gc_list;
 453
 454        spin_lock_bh(&xfrm_state_gc_lock);
 455        hlist_move_list(&xfrm_state_gc_list, &gc_list);
 456        spin_unlock_bh(&xfrm_state_gc_lock);
 457
 458        synchronize_rcu();
 459
 460        hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
 461                xfrm_state_gc_destroy(x);
 462}
 463
 464static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
 465{
 466        struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);
 467        struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer);
 468        unsigned long now = get_seconds();
 469        long next = LONG_MAX;
 470        int warn = 0;
 471        int err = 0;
 472
 473        spin_lock(&x->lock);
 474        if (x->km.state == XFRM_STATE_DEAD)
 475                goto out;
 476        if (x->km.state == XFRM_STATE_EXPIRED)
 477                goto expired;
 478        if (x->lft.hard_add_expires_seconds) {
 479                long tmo = x->lft.hard_add_expires_seconds +
 480                        x->curlft.add_time - now;
 481                if (tmo <= 0) {
 482                        if (x->xflags & XFRM_SOFT_EXPIRE) {
 483                                /* enter hard expire without soft expire first?!
 484                                 * setting a new date could trigger this.
 485                                 * workaround: fix x->curflt.add_time by below:
 486                                 */
 487                                x->curlft.add_time = now - x->saved_tmo - 1;
 488                                tmo = x->lft.hard_add_expires_seconds - x->saved_tmo;
 489                        } else
 490                                goto expired;
 491                }
 492                if (tmo < next)
 493                        next = tmo;
 494        }
 495        if (x->lft.hard_use_expires_seconds) {
 496                long tmo = x->lft.hard_use_expires_seconds +
 497                        (x->curlft.use_time ? : now) - now;
 498                if (tmo <= 0)
 499                        goto expired;
 500                if (tmo < next)
 501                        next = tmo;
 502        }
 503        if (x->km.dying)
 504                goto resched;
 505        if (x->lft.soft_add_expires_seconds) {
 506                long tmo = x->lft.soft_add_expires_seconds +
 507                        x->curlft.add_time - now;
 508                if (tmo <= 0) {
 509                        warn = 1;
 510                        x->xflags &= ~XFRM_SOFT_EXPIRE;
 511                } else if (tmo < next) {
 512                        next = tmo;
 513                        x->xflags |= XFRM_SOFT_EXPIRE;
 514                        x->saved_tmo = tmo;
 515                }
 516        }
 517        if (x->lft.soft_use_expires_seconds) {
 518                long tmo = x->lft.soft_use_expires_seconds +
 519                        (x->curlft.use_time ? : now) - now;
 520                if (tmo <= 0)
 521                        warn = 1;
 522                else if (tmo < next)
 523                        next = tmo;
 524        }
 525
 526        x->km.dying = warn;
 527        if (warn)
 528                km_state_expired(x, 0, 0);
 529resched:
 530        if (next != LONG_MAX) {
 531                tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL);
 532        }
 533
 534        goto out;
 535
 536expired:
 537        if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0)
 538                x->km.state = XFRM_STATE_EXPIRED;
 539
 540        err = __xfrm_state_delete(x);
 541        if (!err)
 542                km_state_expired(x, 1, 0);
 543
 544        xfrm_audit_state_delete(x, err ? 0 : 1, true);
 545
 546out:
 547        spin_unlock(&x->lock);
 548        return HRTIMER_NORESTART;
 549}
 550
 551static void xfrm_replay_timer_handler(unsigned long data);
 552
 553struct xfrm_state *xfrm_state_alloc(struct net *net)
 554{
 555        struct xfrm_state *x;
 556
 557        x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
 558
 559        if (x) {
 560                write_pnet(&x->xs_net, net);
 561                refcount_set(&x->refcnt, 1);
 562                atomic_set(&x->tunnel_users, 0);
 563                INIT_LIST_HEAD(&x->km.all);
 564                INIT_HLIST_NODE(&x->bydst);
 565                INIT_HLIST_NODE(&x->bysrc);
 566                INIT_HLIST_NODE(&x->byspi);
 567                tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
 568                                        CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
 569                setup_timer(&x->rtimer, xfrm_replay_timer_handler,
 570                                (unsigned long)x);
 571                x->curlft.add_time = get_seconds();
 572                x->lft.soft_byte_limit = XFRM_INF;
 573                x->lft.soft_packet_limit = XFRM_INF;
 574                x->lft.hard_byte_limit = XFRM_INF;
 575                x->lft.hard_packet_limit = XFRM_INF;
 576                x->replay_maxage = 0;
 577                x->replay_maxdiff = 0;
 578                x->inner_mode = NULL;
 579                x->inner_mode_iaf = NULL;
 580                spin_lock_init(&x->lock);
 581        }
 582        return x;
 583}
 584EXPORT_SYMBOL(xfrm_state_alloc);
 585
 586void __xfrm_state_destroy(struct xfrm_state *x)
 587{
 588        WARN_ON(x->km.state != XFRM_STATE_DEAD);
 589
 590        spin_lock_bh(&xfrm_state_gc_lock);
 591        hlist_add_head(&x->gclist, &xfrm_state_gc_list);
 592        spin_unlock_bh(&xfrm_state_gc_lock);
 593        schedule_work(&xfrm_state_gc_work);
 594}
 595EXPORT_SYMBOL(__xfrm_state_destroy);
 596
 597int __xfrm_state_delete(struct xfrm_state *x)
 598{
 599        struct net *net = xs_net(x);
 600        int err = -ESRCH;
 601
 602        if (x->km.state != XFRM_STATE_DEAD) {
 603                x->km.state = XFRM_STATE_DEAD;
 604                spin_lock(&net->xfrm.xfrm_state_lock);
 605                list_del(&x->km.all);
 606                hlist_del_rcu(&x->bydst);
 607                hlist_del_rcu(&x->bysrc);
 608                if (x->id.spi)
 609                        hlist_del_rcu(&x->byspi);
 610                net->xfrm.state_num--;
 611                spin_unlock(&net->xfrm.xfrm_state_lock);
 612
 613                xfrm_dev_state_delete(x);
 614
 615                /* All xfrm_state objects are created by xfrm_state_alloc.
 616                 * The xfrm_state_alloc call gives a reference, and that
 617                 * is what we are dropping here.
 618                 */
 619                xfrm_state_put(x);
 620                err = 0;
 621        }
 622
 623        return err;
 624}
 625EXPORT_SYMBOL(__xfrm_state_delete);
 626
 627int xfrm_state_delete(struct xfrm_state *x)
 628{
 629        int err;
 630
 631        spin_lock_bh(&x->lock);
 632        err = __xfrm_state_delete(x);
 633        spin_unlock_bh(&x->lock);
 634
 635        return err;
 636}
 637EXPORT_SYMBOL(xfrm_state_delete);
 638
 639#ifdef CONFIG_SECURITY_NETWORK_XFRM
 640static inline int
 641xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
 642{
 643        int i, err = 0;
 644
 645        for (i = 0; i <= net->xfrm.state_hmask; i++) {
 646                struct xfrm_state *x;
 647
 648                hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 649                        if (xfrm_id_proto_match(x->id.proto, proto) &&
 650                           (err = security_xfrm_state_delete(x)) != 0) {
 651                                xfrm_audit_state_delete(x, 0, task_valid);
 652                                return err;
 653                        }
 654                }
 655        }
 656
 657        return err;
 658}
 659
 660static inline int
 661xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
 662{
 663        int i, err = 0;
 664
 665        for (i = 0; i <= net->xfrm.state_hmask; i++) {
 666                struct xfrm_state *x;
 667                struct xfrm_state_offload *xso;
 668
 669                hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 670                        xso = &x->xso;
 671
 672                        if (xso->dev == dev &&
 673                           (err = security_xfrm_state_delete(x)) != 0) {
 674                                xfrm_audit_state_delete(x, 0, task_valid);
 675                                return err;
 676                        }
 677                }
 678        }
 679
 680        return err;
 681}
 682#else
 683static inline int
 684xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
 685{
 686        return 0;
 687}
 688
 689static inline int
 690xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
 691{
 692        return 0;
 693}
 694#endif
 695
 696int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
 697{
 698        int i, err = 0, cnt = 0;
 699
 700        spin_lock_bh(&net->xfrm.xfrm_state_lock);
 701        err = xfrm_state_flush_secctx_check(net, proto, task_valid);
 702        if (err)
 703                goto out;
 704
 705        err = -ESRCH;
 706        for (i = 0; i <= net->xfrm.state_hmask; i++) {
 707                struct xfrm_state *x;
 708restart:
 709                hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 710                        if (!xfrm_state_kern(x) &&
 711                            xfrm_id_proto_match(x->id.proto, proto)) {
 712                                xfrm_state_hold(x);
 713                                spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 714
 715                                err = xfrm_state_delete(x);
 716                                xfrm_audit_state_delete(x, err ? 0 : 1,
 717                                                        task_valid);
 718                                xfrm_state_put(x);
 719                                if (!err)
 720                                        cnt++;
 721
 722                                spin_lock_bh(&net->xfrm.xfrm_state_lock);
 723                                goto restart;
 724                        }
 725                }
 726        }
 727        if (cnt)
 728                err = 0;
 729
 730out:
 731        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 732        return err;
 733}
 734EXPORT_SYMBOL(xfrm_state_flush);
 735
 736int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid)
 737{
 738        int i, err = 0, cnt = 0;
 739
 740        spin_lock_bh(&net->xfrm.xfrm_state_lock);
 741        err = xfrm_dev_state_flush_secctx_check(net, dev, task_valid);
 742        if (err)
 743                goto out;
 744
 745        err = -ESRCH;
 746        for (i = 0; i <= net->xfrm.state_hmask; i++) {
 747                struct xfrm_state *x;
 748                struct xfrm_state_offload *xso;
 749restart:
 750                hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 751                        xso = &x->xso;
 752
 753                        if (!xfrm_state_kern(x) && xso->dev == dev) {
 754                                xfrm_state_hold(x);
 755                                spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 756
 757                                err = xfrm_state_delete(x);
 758                                xfrm_audit_state_delete(x, err ? 0 : 1,
 759                                                        task_valid);
 760                                xfrm_state_put(x);
 761                                if (!err)
 762                                        cnt++;
 763
 764                                spin_lock_bh(&net->xfrm.xfrm_state_lock);
 765                                goto restart;
 766                        }
 767                }
 768        }
 769        if (cnt)
 770                err = 0;
 771
 772out:
 773        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 774        return err;
 775}
 776EXPORT_SYMBOL(xfrm_dev_state_flush);
 777
 778void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
 779{
 780        spin_lock_bh(&net->xfrm.xfrm_state_lock);
 781        si->sadcnt = net->xfrm.state_num;
 782        si->sadhcnt = net->xfrm.state_hmask;
 783        si->sadhmcnt = xfrm_state_hashmax;
 784        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 785}
 786EXPORT_SYMBOL(xfrm_sad_getinfo);
 787
 788static void
 789xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
 790                    const struct xfrm_tmpl *tmpl,
 791                    const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 792                    unsigned short family)
 793{
 794        struct xfrm_state_afinfo *afinfo = xfrm_state_afinfo_get_rcu(family);
 795
 796        if (!afinfo)
 797                return;
 798
 799        afinfo->init_tempsel(&x->sel, fl);
 800
 801        if (family != tmpl->encap_family) {
 802                afinfo = xfrm_state_afinfo_get_rcu(tmpl->encap_family);
 803                if (!afinfo)
 804                        return;
 805        }
 806        afinfo->init_temprop(x, tmpl, daddr, saddr);
 807}
 808
 809static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
 810                                              const xfrm_address_t *daddr,
 811                                              __be32 spi, u8 proto,
 812                                              unsigned short family)
 813{
 814        unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
 815        struct xfrm_state *x;
 816
 817        hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
 818                if (x->props.family != family ||
 819                    x->id.spi       != spi ||
 820                    x->id.proto     != proto ||
 821                    !xfrm_addr_equal(&x->id.daddr, daddr, family))
 822                        continue;
 823
 824                if ((mark & x->mark.m) != x->mark.v)
 825                        continue;
 826                if (!xfrm_state_hold_rcu(x))
 827                        continue;
 828                return x;
 829        }
 830
 831        return NULL;
 832}
 833
 834static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 835                                                     const xfrm_address_t *daddr,
 836                                                     const xfrm_address_t *saddr,
 837                                                     u8 proto, unsigned short family)
 838{
 839        unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
 840        struct xfrm_state *x;
 841
 842        hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
 843                if (x->props.family != family ||
 844                    x->id.proto     != proto ||
 845                    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
 846                    !xfrm_addr_equal(&x->props.saddr, saddr, family))
 847                        continue;
 848
 849                if ((mark & x->mark.m) != x->mark.v)
 850                        continue;
 851                if (!xfrm_state_hold_rcu(x))
 852                        continue;
 853                return x;
 854        }
 855
 856        return NULL;
 857}
 858
 859static inline struct xfrm_state *
 860__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
 861{
 862        struct net *net = xs_net(x);
 863        u32 mark = x->mark.v & x->mark.m;
 864
 865        if (use_spi)
 866                return __xfrm_state_lookup(net, mark, &x->id.daddr,
 867                                           x->id.spi, x->id.proto, family);
 868        else
 869                return __xfrm_state_lookup_byaddr(net, mark,
 870                                                  &x->id.daddr,
 871                                                  &x->props.saddr,
 872                                                  x->id.proto, family);
 873}
 874
 875static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
 876{
 877        if (have_hash_collision &&
 878            (net->xfrm.state_hmask + 1) < xfrm_state_hashmax &&
 879            net->xfrm.state_num > net->xfrm.state_hmask)
 880                schedule_work(&net->xfrm.state_hash_work);
 881}
 882
 883static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
 884                               const struct flowi *fl, unsigned short family,
 885                               struct xfrm_state **best, int *acq_in_progress,
 886                               int *error)
 887{
 888        /* Resolution logic:
 889         * 1. There is a valid state with matching selector. Done.
 890         * 2. Valid state with inappropriate selector. Skip.
 891         *
 892         * Entering area of "sysdeps".
 893         *
 894         * 3. If state is not valid, selector is temporary, it selects
 895         *    only session which triggered previous resolution. Key
 896         *    manager will do something to install a state with proper
 897         *    selector.
 898         */
 899        if (x->km.state == XFRM_STATE_VALID) {
 900                if ((x->sel.family &&
 901                     !xfrm_selector_match(&x->sel, fl, x->sel.family)) ||
 902                    !security_xfrm_state_pol_flow_match(x, pol, fl))
 903                        return;
 904
 905                if (!*best ||
 906                    (*best)->km.dying > x->km.dying ||
 907                    ((*best)->km.dying == x->km.dying &&
 908                     (*best)->curlft.add_time < x->curlft.add_time))
 909                        *best = x;
 910        } else if (x->km.state == XFRM_STATE_ACQ) {
 911                *acq_in_progress = 1;
 912        } else if (x->km.state == XFRM_STATE_ERROR ||
 913                   x->km.state == XFRM_STATE_EXPIRED) {
 914                if (xfrm_selector_match(&x->sel, fl, x->sel.family) &&
 915                    security_xfrm_state_pol_flow_match(x, pol, fl))
 916                        *error = -ESRCH;
 917        }
 918}
 919
 920struct xfrm_state *
 921xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 922                const struct flowi *fl, struct xfrm_tmpl *tmpl,
 923                struct xfrm_policy *pol, int *err,
 924                unsigned short family)
 925{
 926        static xfrm_address_t saddr_wildcard = { };
 927        struct net *net = xp_net(pol);
 928        unsigned int h, h_wildcard;
 929        struct xfrm_state *x, *x0, *to_put;
 930        int acquire_in_progress = 0;
 931        int error = 0;
 932        struct xfrm_state *best = NULL;
 933        u32 mark = pol->mark.v & pol->mark.m;
 934        unsigned short encap_family = tmpl->encap_family;
 935        unsigned int sequence;
 936        struct km_event c;
 937
 938        to_put = NULL;
 939
 940        sequence = read_seqcount_begin(&xfrm_state_hash_generation);
 941
 942        rcu_read_lock();
 943        h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
 944        hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
 945                if (x->props.family == encap_family &&
 946                    x->props.reqid == tmpl->reqid &&
 947                    (mark & x->mark.m) == x->mark.v &&
 948                    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 949                    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
 950                    tmpl->mode == x->props.mode &&
 951                    tmpl->id.proto == x->id.proto &&
 952                    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
 953                        xfrm_state_look_at(pol, x, fl, encap_family,
 954                                           &best, &acquire_in_progress, &error);
 955        }
 956        if (best || acquire_in_progress)
 957                goto found;
 958
 959        h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
 960        hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
 961                if (x->props.family == encap_family &&
 962                    x->props.reqid == tmpl->reqid &&
 963                    (mark & x->mark.m) == x->mark.v &&
 964                    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 965                    xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
 966                    tmpl->mode == x->props.mode &&
 967                    tmpl->id.proto == x->id.proto &&
 968                    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
 969                        xfrm_state_look_at(pol, x, fl, encap_family,
 970                                           &best, &acquire_in_progress, &error);
 971        }
 972
 973found:
 974        x = best;
 975        if (!x && !error && !acquire_in_progress) {
 976                if (tmpl->id.spi &&
 977                    (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi,
 978                                              tmpl->id.proto, encap_family)) != NULL) {
 979                        to_put = x0;
 980                        error = -EEXIST;
 981                        goto out;
 982                }
 983
 984                c.net = net;
 985                /* If the KMs have no listeners (yet...), avoid allocating an SA
 986                 * for each and every packet - garbage collection might not
 987                 * handle the flood.
 988                 */
 989                if (!km_is_alive(&c)) {
 990                        error = -ESRCH;
 991                        goto out;
 992                }
 993
 994                x = xfrm_state_alloc(net);
 995                if (x == NULL) {
 996                        error = -ENOMEM;
 997                        goto out;
 998                }
 999                /* Initialize temporary state matching only
1000                 * to current session. */
1001                xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
1002                memcpy(&x->mark, &pol->mark, sizeof(x->mark));
1003
1004                error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
1005                if (error) {
1006                        x->km.state = XFRM_STATE_DEAD;
1007                        to_put = x;
1008                        x = NULL;
1009                        goto out;
1010                }
1011
1012                if (km_query(x, tmpl, pol) == 0) {
1013                        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1014                        x->km.state = XFRM_STATE_ACQ;
1015                        list_add(&x->km.all, &net->xfrm.state_all);
1016                        hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
1017                        h = xfrm_src_hash(net, daddr, saddr, encap_family);
1018                        hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
1019                        if (x->id.spi) {
1020                                h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
1021                                hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
1022                        }
1023                        x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
1024                        tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
1025                        net->xfrm.state_num++;
1026                        xfrm_hash_grow_check(net, x->bydst.next != NULL);
1027                        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1028                } else {
1029                        x->km.state = XFRM_STATE_DEAD;
1030                        to_put = x;
1031                        x = NULL;
1032                        error = -ESRCH;
1033                }
1034        }
1035out:
1036        if (x) {
1037                if (!xfrm_state_hold_rcu(x)) {
1038                        *err = -EAGAIN;
1039                        x = NULL;
1040                }
1041        } else {
1042                *err = acquire_in_progress ? -EAGAIN : error;
1043        }
1044        rcu_read_unlock();
1045        if (to_put)
1046                xfrm_state_put(to_put);
1047
1048        if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
1049                *err = -EAGAIN;
1050                if (x) {
1051                        xfrm_state_put(x);
1052                        x = NULL;
1053                }
1054        }
1055
1056        return x;
1057}
1058
1059struct xfrm_state *
1060xfrm_stateonly_find(struct net *net, u32 mark,
1061                    xfrm_address_t *daddr, xfrm_address_t *saddr,
1062                    unsigned short family, u8 mode, u8 proto, u32 reqid)
1063{
1064        unsigned int h;
1065        struct xfrm_state *rx = NULL, *x = NULL;
1066
1067        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1068        h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
1069        hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1070                if (x->props.family == family &&
1071                    x->props.reqid == reqid &&
1072                    (mark & x->mark.m) == x->mark.v &&
1073                    !(x->props.flags & XFRM_STATE_WILDRECV) &&
1074                    xfrm_state_addr_check(x, daddr, saddr, family) &&
1075                    mode == x->props.mode &&
1076                    proto == x->id.proto &&
1077                    x->km.state == XFRM_STATE_VALID) {
1078                        rx = x;
1079                        break;
1080                }
1081        }
1082
1083        if (rx)
1084                xfrm_state_hold(rx);
1085        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1086
1087
1088        return rx;
1089}
1090EXPORT_SYMBOL(xfrm_stateonly_find);
1091
1092struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
1093                                              unsigned short family)
1094{
1095        struct xfrm_state *x;
1096        struct xfrm_state_walk *w;
1097
1098        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1099        list_for_each_entry(w, &net->xfrm.state_all, all) {
1100                x = container_of(w, struct xfrm_state, km);
1101                if (x->props.family != family ||
1102                        x->id.spi != spi)
1103                        continue;
1104
1105                xfrm_state_hold(x);
1106                spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1107                return x;
1108        }
1109        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1110        return NULL;
1111}
1112EXPORT_SYMBOL(xfrm_state_lookup_byspi);
1113
1114static void __xfrm_state_insert(struct xfrm_state *x)
1115{
1116        struct net *net = xs_net(x);
1117        unsigned int h;
1118
1119        list_add(&x->km.all, &net->xfrm.state_all);
1120
1121        h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
1122                          x->props.reqid, x->props.family);
1123        hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
1124
1125        h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
1126        hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
1127
1128        if (x->id.spi) {
1129                h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
1130                                  x->props.family);
1131
1132                hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
1133        }
1134
1135        tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
1136        if (x->replay_maxage)
1137                mod_timer(&x->rtimer, jiffies + x->replay_maxage);
1138
1139        net->xfrm.state_num++;
1140
1141        xfrm_hash_grow_check(net, x->bydst.next != NULL);
1142}
1143
1144/* net->xfrm.xfrm_state_lock is held */
1145static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
1146{
1147        struct net *net = xs_net(xnew);
1148        unsigned short family = xnew->props.family;
1149        u32 reqid = xnew->props.reqid;
1150        struct xfrm_state *x;
1151        unsigned int h;
1152        u32 mark = xnew->mark.v & xnew->mark.m;
1153
1154        h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
1155        hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1156                if (x->props.family     == family &&
1157                    x->props.reqid      == reqid &&
1158                    (mark & x->mark.m) == x->mark.v &&
1159                    xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
1160                    xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
1161                        x->genid++;
1162        }
1163}
1164
1165void xfrm_state_insert(struct xfrm_state *x)
1166{
1167        struct net *net = xs_net(x);
1168
1169        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1170        __xfrm_state_bump_genids(x);
1171        __xfrm_state_insert(x);
1172        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1173}
1174EXPORT_SYMBOL(xfrm_state_insert);
1175
1176/* net->xfrm.xfrm_state_lock is held */
1177static struct xfrm_state *__find_acq_core(struct net *net,
1178                                          const struct xfrm_mark *m,
1179                                          unsigned short family, u8 mode,
1180                                          u32 reqid, u8 proto,
1181                                          const xfrm_address_t *daddr,
1182                                          const xfrm_address_t *saddr,
1183                                          int create)
1184{
1185        unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
1186        struct xfrm_state *x;
1187        u32 mark = m->v & m->m;
1188
1189        hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1190                if (x->props.reqid  != reqid ||
1191                    x->props.mode   != mode ||
1192                    x->props.family != family ||
1193                    x->km.state     != XFRM_STATE_ACQ ||
1194                    x->id.spi       != 0 ||
1195                    x->id.proto     != proto ||
1196                    (mark & x->mark.m) != x->mark.v ||
1197                    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
1198                    !xfrm_addr_equal(&x->props.saddr, saddr, family))
1199                        continue;
1200
1201                xfrm_state_hold(x);
1202                return x;
1203        }
1204
1205        if (!create)
1206                return NULL;
1207
1208        x = xfrm_state_alloc(net);
1209        if (likely(x)) {
1210                switch (family) {
1211                case AF_INET:
1212                        x->sel.daddr.a4 = daddr->a4;
1213                        x->sel.saddr.a4 = saddr->a4;
1214                        x->sel.prefixlen_d = 32;
1215                        x->sel.prefixlen_s = 32;
1216                        x->props.saddr.a4 = saddr->a4;
1217                        x->id.daddr.a4 = daddr->a4;
1218                        break;
1219
1220                case AF_INET6:
1221                        x->sel.daddr.in6 = daddr->in6;
1222                        x->sel.saddr.in6 = saddr->in6;
1223                        x->sel.prefixlen_d = 128;
1224                        x->sel.prefixlen_s = 128;
1225                        x->props.saddr.in6 = saddr->in6;
1226                        x->id.daddr.in6 = daddr->in6;
1227                        break;
1228                }
1229
1230                x->km.state = XFRM_STATE_ACQ;
1231                x->id.proto = proto;
1232                x->props.family = family;
1233                x->props.mode = mode;
1234                x->props.reqid = reqid;
1235                x->mark.v = m->v;
1236                x->mark.m = m->m;
1237                x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
1238                xfrm_state_hold(x);
1239                tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
1240                list_add(&x->km.all, &net->xfrm.state_all);
1241                hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
1242                h = xfrm_src_hash(net, daddr, saddr, family);
1243                hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
1244
1245                net->xfrm.state_num++;
1246
1247                xfrm_hash_grow_check(net, x->bydst.next != NULL);
1248        }
1249
1250        return x;
1251}
1252
1253static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
1254
1255int xfrm_state_add(struct xfrm_state *x)
1256{
1257        struct net *net = xs_net(x);
1258        struct xfrm_state *x1, *to_put;
1259        int family;
1260        int err;
1261        u32 mark = x->mark.v & x->mark.m;
1262        int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1263
1264        family = x->props.family;
1265
1266        to_put = NULL;
1267
1268        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1269
1270        x1 = __xfrm_state_locate(x, use_spi, family);
1271        if (x1) {
1272                to_put = x1;
1273                x1 = NULL;
1274                err = -EEXIST;
1275                goto out;
1276        }
1277
1278        if (use_spi && x->km.seq) {
1279                x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
1280                if (x1 && ((x1->id.proto != x->id.proto) ||
1281                    !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
1282                        to_put = x1;
1283                        x1 = NULL;
1284                }
1285        }
1286
1287        if (use_spi && !x1)
1288                x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
1289                                     x->props.reqid, x->id.proto,
1290                                     &x->id.daddr, &x->props.saddr, 0);
1291
1292        __xfrm_state_bump_genids(x);
1293        __xfrm_state_insert(x);
1294        err = 0;
1295
1296out:
1297        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1298
1299        if (x1) {
1300                xfrm_state_delete(x1);
1301                xfrm_state_put(x1);
1302        }
1303
1304        if (to_put)
1305                xfrm_state_put(to_put);
1306
1307        return err;
1308}
1309EXPORT_SYMBOL(xfrm_state_add);
1310
1311#ifdef CONFIG_XFRM_MIGRATE
1312static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
1313                                           struct xfrm_encap_tmpl *encap)
1314{
1315        struct net *net = xs_net(orig);
1316        struct xfrm_state *x = xfrm_state_alloc(net);
1317        if (!x)
1318                goto out;
1319
1320        memcpy(&x->id, &orig->id, sizeof(x->id));
1321        memcpy(&x->sel, &orig->sel, sizeof(x->sel));
1322        memcpy(&x->lft, &orig->lft, sizeof(x->lft));
1323        x->props.mode = orig->props.mode;
1324        x->props.replay_window = orig->props.replay_window;
1325        x->props.reqid = orig->props.reqid;
1326        x->props.family = orig->props.family;
1327        x->props.saddr = orig->props.saddr;
1328
1329        if (orig->aalg) {
1330                x->aalg = xfrm_algo_auth_clone(orig->aalg);
1331                if (!x->aalg)
1332                        goto error;
1333        }
1334        x->props.aalgo = orig->props.aalgo;
1335
1336        if (orig->aead) {
1337                x->aead = xfrm_algo_aead_clone(orig->aead);
1338                if (!x->aead)
1339                        goto error;
1340        }
1341        if (orig->ealg) {
1342                x->ealg = xfrm_algo_clone(orig->ealg);
1343                if (!x->ealg)
1344                        goto error;
1345        }
1346        x->props.ealgo = orig->props.ealgo;
1347
1348        if (orig->calg) {
1349                x->calg = xfrm_algo_clone(orig->calg);
1350                if (!x->calg)
1351                        goto error;
1352        }
1353        x->props.calgo = orig->props.calgo;
1354
1355        if (encap || orig->encap) {
1356                if (encap)
1357                        x->encap = kmemdup(encap, sizeof(*x->encap),
1358                                        GFP_KERNEL);
1359                else
1360                        x->encap = kmemdup(orig->encap, sizeof(*x->encap),
1361                                        GFP_KERNEL);
1362
1363                if (!x->encap)
1364                        goto error;
1365        }
1366
1367        if (orig->coaddr) {
1368                x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
1369                                    GFP_KERNEL);
1370                if (!x->coaddr)
1371                        goto error;
1372        }
1373
1374        if (orig->replay_esn) {
1375                if (xfrm_replay_clone(x, orig))
1376                        goto error;
1377        }
1378
1379        memcpy(&x->mark, &orig->mark, sizeof(x->mark));
1380
1381        if (xfrm_init_state(x) < 0)
1382                goto error;
1383
1384        x->props.flags = orig->props.flags;
1385        x->props.extra_flags = orig->props.extra_flags;
1386
1387        x->tfcpad = orig->tfcpad;
1388        x->replay_maxdiff = orig->replay_maxdiff;
1389        x->replay_maxage = orig->replay_maxage;
1390        x->curlft.add_time = orig->curlft.add_time;
1391        x->km.state = orig->km.state;
1392        x->km.seq = orig->km.seq;
1393        x->replay = orig->replay;
1394        x->preplay = orig->preplay;
1395
1396        return x;
1397
1398 error:
1399        xfrm_state_put(x);
1400out:
1401        return NULL;
1402}
1403
1404struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net)
1405{
1406        unsigned int h;
1407        struct xfrm_state *x = NULL;
1408
1409        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1410
1411        if (m->reqid) {
1412                h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr,
1413                                  m->reqid, m->old_family);
1414                hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1415                        if (x->props.mode != m->mode ||
1416                            x->id.proto != m->proto)
1417                                continue;
1418                        if (m->reqid && x->props.reqid != m->reqid)
1419                                continue;
1420                        if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
1421                                             m->old_family) ||
1422                            !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
1423                                             m->old_family))
1424                                continue;
1425                        xfrm_state_hold(x);
1426                        break;
1427                }
1428        } else {
1429                h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr,
1430                                  m->old_family);
1431                hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
1432                        if (x->props.mode != m->mode ||
1433                            x->id.proto != m->proto)
1434                                continue;
1435                        if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
1436                                             m->old_family) ||
1437                            !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
1438                                             m->old_family))
1439                                continue;
1440                        xfrm_state_hold(x);
1441                        break;
1442                }
1443        }
1444
1445        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1446
1447        return x;
1448}
1449EXPORT_SYMBOL(xfrm_migrate_state_find);
1450
1451struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
1452                                      struct xfrm_migrate *m,
1453                                      struct xfrm_encap_tmpl *encap)
1454{
1455        struct xfrm_state *xc;
1456
1457        xc = xfrm_state_clone(x, encap);
1458        if (!xc)
1459                return NULL;
1460
1461        memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
1462        memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
1463
1464        /* add state */
1465        if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) {
1466                /* a care is needed when the destination address of the
1467                   state is to be updated as it is a part of triplet */
1468                xfrm_state_insert(xc);
1469        } else {
1470                if (xfrm_state_add(xc) < 0)
1471                        goto error;
1472        }
1473
1474        return xc;
1475error:
1476        xfrm_state_put(xc);
1477        return NULL;
1478}
1479EXPORT_SYMBOL(xfrm_state_migrate);
1480#endif
1481
1482int xfrm_state_update(struct xfrm_state *x)
1483{
1484        struct xfrm_state *x1, *to_put;
1485        int err;
1486        int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1487        struct net *net = xs_net(x);
1488
1489        to_put = NULL;
1490
1491        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1492        x1 = __xfrm_state_locate(x, use_spi, x->props.family);
1493
1494        err = -ESRCH;
1495        if (!x1)
1496                goto out;
1497
1498        if (xfrm_state_kern(x1)) {
1499                to_put = x1;
1500                err = -EEXIST;
1501                goto out;
1502        }
1503
1504        if (x1->km.state == XFRM_STATE_ACQ) {
1505                __xfrm_state_insert(x);
1506                x = NULL;
1507        }
1508        err = 0;
1509
1510out:
1511        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1512
1513        if (to_put)
1514                xfrm_state_put(to_put);
1515
1516        if (err)
1517                return err;
1518
1519        if (!x) {
1520                xfrm_state_delete(x1);
1521                xfrm_state_put(x1);
1522                return 0;
1523        }
1524
1525        err = -EINVAL;
1526        spin_lock_bh(&x1->lock);
1527        if (likely(x1->km.state == XFRM_STATE_VALID)) {
1528                if (x->encap && x1->encap)
1529                        memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1530                if (x->coaddr && x1->coaddr) {
1531                        memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1532                }
1533                if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1534                        memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1535                memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1536                x1->km.dying = 0;
1537
1538                tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
1539                if (x1->curlft.use_time)
1540                        xfrm_state_check_expire(x1);
1541
1542                err = 0;
1543                x->km.state = XFRM_STATE_DEAD;
1544                __xfrm_state_put(x);
1545        }
1546        spin_unlock_bh(&x1->lock);
1547
1548        xfrm_state_put(x1);
1549
1550        return err;
1551}
1552EXPORT_SYMBOL(xfrm_state_update);
1553
1554int xfrm_state_check_expire(struct xfrm_state *x)
1555{
1556        if (!x->curlft.use_time)
1557                x->curlft.use_time = get_seconds();
1558
1559        if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1560            x->curlft.packets >= x->lft.hard_packet_limit) {
1561                x->km.state = XFRM_STATE_EXPIRED;
1562                tasklet_hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL);
1563                return -EINVAL;
1564        }
1565
1566        if (!x->km.dying &&
1567            (x->curlft.bytes >= x->lft.soft_byte_limit ||
1568             x->curlft.packets >= x->lft.soft_packet_limit)) {
1569                x->km.dying = 1;
1570                km_state_expired(x, 0, 0);
1571        }
1572        return 0;
1573}
1574EXPORT_SYMBOL(xfrm_state_check_expire);
1575
1576struct xfrm_state *
1577xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
1578                  u8 proto, unsigned short family)
1579{
1580        struct xfrm_state *x;
1581
1582        rcu_read_lock();
1583        x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
1584        rcu_read_unlock();
1585        return x;
1586}
1587EXPORT_SYMBOL(xfrm_state_lookup);
1588
1589struct xfrm_state *
1590xfrm_state_lookup_byaddr(struct net *net, u32 mark,
1591                         const xfrm_address_t *daddr, const xfrm_address_t *saddr,
1592                         u8 proto, unsigned short family)
1593{
1594        struct xfrm_state *x;
1595
1596        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1597        x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family);
1598        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1599        return x;
1600}
1601EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1602
1603struct xfrm_state *
1604xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
1605              u8 proto, const xfrm_address_t *daddr,
1606              const xfrm_address_t *saddr, int create, unsigned short family)
1607{
1608        struct xfrm_state *x;
1609
1610        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1611        x = __find_acq_core(net, mark, family, mode, reqid, proto, daddr, saddr, create);
1612        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1613
1614        return x;
1615}
1616EXPORT_SYMBOL(xfrm_find_acq);
1617
1618#ifdef CONFIG_XFRM_SUB_POLICY
1619int
1620xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1621               unsigned short family, struct net *net)
1622{
1623        int i;
1624        int err = 0;
1625        struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1626        if (!afinfo)
1627                return -EAFNOSUPPORT;
1628
1629        spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/
1630        if (afinfo->tmpl_sort)
1631                err = afinfo->tmpl_sort(dst, src, n);
1632        else
1633                for (i = 0; i < n; i++)
1634                        dst[i] = src[i];
1635        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1636        rcu_read_unlock();
1637        return err;
1638}
1639EXPORT_SYMBOL(xfrm_tmpl_sort);
1640
1641int
1642xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1643                unsigned short family)
1644{
1645        int i;
1646        int err = 0;
1647        struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1648        struct net *net = xs_net(*src);
1649
1650        if (!afinfo)
1651                return -EAFNOSUPPORT;
1652
1653        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1654        if (afinfo->state_sort)
1655                err = afinfo->state_sort(dst, src, n);
1656        else
1657                for (i = 0; i < n; i++)
1658                        dst[i] = src[i];
1659        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1660        rcu_read_unlock();
1661        return err;
1662}
1663EXPORT_SYMBOL(xfrm_state_sort);
1664#endif
1665
1666/* Silly enough, but I'm lazy to build resolution list */
1667
1668static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
1669{
1670        int i;
1671
1672        for (i = 0; i <= net->xfrm.state_hmask; i++) {
1673                struct xfrm_state *x;
1674
1675                hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
1676                        if (x->km.seq == seq &&
1677                            (mark & x->mark.m) == x->mark.v &&
1678                            x->km.state == XFRM_STATE_ACQ) {
1679                                xfrm_state_hold(x);
1680                                return x;
1681                        }
1682                }
1683        }
1684        return NULL;
1685}
1686
1687struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
1688{
1689        struct xfrm_state *x;
1690
1691        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1692        x = __xfrm_find_acq_byseq(net, mark, seq);
1693        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1694        return x;
1695}
1696EXPORT_SYMBOL(xfrm_find_acq_byseq);
1697
1698u32 xfrm_get_acqseq(void)
1699{
1700        u32 res;
1701        static atomic_t acqseq;
1702
1703        do {
1704                res = atomic_inc_return(&acqseq);
1705        } while (!res);
1706
1707        return res;
1708}
1709EXPORT_SYMBOL(xfrm_get_acqseq);
1710
1711int verify_spi_info(u8 proto, u32 min, u32 max)
1712{
1713        switch (proto) {
1714        case IPPROTO_AH:
1715        case IPPROTO_ESP:
1716                break;
1717
1718        case IPPROTO_COMP:
1719                /* IPCOMP spi is 16-bits. */
1720                if (max >= 0x10000)
1721                        return -EINVAL;
1722                break;
1723
1724        default:
1725                return -EINVAL;
1726        }
1727
1728        if (min > max)
1729                return -EINVAL;
1730
1731        return 0;
1732}
1733EXPORT_SYMBOL(verify_spi_info);
1734
1735int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
1736{
1737        struct net *net = xs_net(x);
1738        unsigned int h;
1739        struct xfrm_state *x0;
1740        int err = -ENOENT;
1741        __be32 minspi = htonl(low);
1742        __be32 maxspi = htonl(high);
1743        u32 mark = x->mark.v & x->mark.m;
1744
1745        spin_lock_bh(&x->lock);
1746        if (x->km.state == XFRM_STATE_DEAD)
1747                goto unlock;
1748
1749        err = 0;
1750        if (x->id.spi)
1751                goto unlock;
1752
1753        err = -ENOENT;
1754
1755        if (minspi == maxspi) {
1756                x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family);
1757                if (x0) {
1758                        xfrm_state_put(x0);
1759                        goto unlock;
1760                }
1761                x->id.spi = minspi;
1762        } else {
1763                u32 spi = 0;
1764                for (h = 0; h < high-low+1; h++) {
1765                        spi = low + prandom_u32()%(high-low+1);
1766                        x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1767                        if (x0 == NULL) {
1768                                x->id.spi = htonl(spi);
1769                                break;
1770                        }
1771                        xfrm_state_put(x0);
1772                }
1773        }
1774        if (x->id.spi) {
1775                spin_lock_bh(&net->xfrm.xfrm_state_lock);
1776                h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1777                hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
1778                spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1779
1780                err = 0;
1781        }
1782
1783unlock:
1784        spin_unlock_bh(&x->lock);
1785
1786        return err;
1787}
1788EXPORT_SYMBOL(xfrm_alloc_spi);
1789
1790static bool __xfrm_state_filter_match(struct xfrm_state *x,
1791                                      struct xfrm_address_filter *filter)
1792{
1793        if (filter) {
1794                if ((filter->family == AF_INET ||
1795                     filter->family == AF_INET6) &&
1796                    x->props.family != filter->family)
1797                        return false;
1798
1799                return addr_match(&x->props.saddr, &filter->saddr,
1800                                  filter->splen) &&
1801                       addr_match(&x->id.daddr, &filter->daddr,
1802                                  filter->dplen);
1803        }
1804        return true;
1805}
1806
1807int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
1808                    int (*func)(struct xfrm_state *, int, void*),
1809                    void *data)
1810{
1811        struct xfrm_state *state;
1812        struct xfrm_state_walk *x;
1813        int err = 0;
1814
1815        if (walk->seq != 0 && list_empty(&walk->all))
1816                return 0;
1817
1818        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1819        if (list_empty(&walk->all))
1820                x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all);
1821        else
1822                x = list_first_entry(&walk->all, struct xfrm_state_walk, all);
1823        list_for_each_entry_from(x, &net->xfrm.state_all, all) {
1824                if (x->state == XFRM_STATE_DEAD)
1825                        continue;
1826                state = container_of(x, struct xfrm_state, km);
1827                if (!xfrm_id_proto_match(state->id.proto, walk->proto))
1828                        continue;
1829                if (!__xfrm_state_filter_match(state, walk->filter))
1830                        continue;
1831                err = func(state, walk->seq, data);
1832                if (err) {
1833                        list_move_tail(&walk->all, &x->all);
1834                        goto out;
1835                }
1836                walk->seq++;
1837        }
1838        if (walk->seq == 0) {
1839                err = -ENOENT;
1840                goto out;
1841        }
1842        list_del_init(&walk->all);
1843out:
1844        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1845        return err;
1846}
1847EXPORT_SYMBOL(xfrm_state_walk);
1848
1849void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
1850                          struct xfrm_address_filter *filter)
1851{
1852        INIT_LIST_HEAD(&walk->all);
1853        walk->proto = proto;
1854        walk->state = XFRM_STATE_DEAD;
1855        walk->seq = 0;
1856        walk->filter = filter;
1857}
1858EXPORT_SYMBOL(xfrm_state_walk_init);
1859
1860void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)
1861{
1862        kfree(walk->filter);
1863
1864        if (list_empty(&walk->all))
1865                return;
1866
1867        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1868        list_del(&walk->all);
1869        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1870}
1871EXPORT_SYMBOL(xfrm_state_walk_done);
1872
1873static void xfrm_replay_timer_handler(unsigned long data)
1874{
1875        struct xfrm_state *x = (struct xfrm_state *)data;
1876
1877        spin_lock(&x->lock);
1878
1879        if (x->km.state == XFRM_STATE_VALID) {
1880                if (xfrm_aevent_is_on(xs_net(x)))
1881                        x->repl->notify(x, XFRM_REPLAY_TIMEOUT);
1882                else
1883                        x->xflags |= XFRM_TIME_DEFER;
1884        }
1885
1886        spin_unlock(&x->lock);
1887}
1888
1889static LIST_HEAD(xfrm_km_list);
1890
1891void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
1892{
1893        struct xfrm_mgr *km;
1894
1895        rcu_read_lock();
1896        list_for_each_entry_rcu(km, &xfrm_km_list, list)
1897                if (km->notify_policy)
1898                        km->notify_policy(xp, dir, c);
1899        rcu_read_unlock();
1900}
1901
1902void km_state_notify(struct xfrm_state *x, const struct km_event *c)
1903{
1904        struct xfrm_mgr *km;
1905        rcu_read_lock();
1906        list_for_each_entry_rcu(km, &xfrm_km_list, list)
1907                if (km->notify)
1908                        km->notify(x, c);
1909        rcu_read_unlock();
1910}
1911
1912EXPORT_SYMBOL(km_policy_notify);
1913EXPORT_SYMBOL(km_state_notify);
1914
1915void km_state_expired(struct xfrm_state *x, int hard, u32 portid)
1916{
1917        struct km_event c;
1918
1919        c.data.hard = hard;
1920        c.portid = portid;
1921        c.event = XFRM_MSG_EXPIRE;
1922        km_state_notify(x, &c);
1923}
1924
1925EXPORT_SYMBOL(km_state_expired);
1926/*
1927 * We send to all registered managers regardless of failure
1928 * We are happy with one success
1929*/
1930int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1931{
1932        int err = -EINVAL, acqret;
1933        struct xfrm_mgr *km;
1934
1935        rcu_read_lock();
1936        list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1937                acqret = km->acquire(x, t, pol);
1938                if (!acqret)
1939                        err = acqret;
1940        }
1941        rcu_read_unlock();
1942        return err;
1943}
1944EXPORT_SYMBOL(km_query);
1945
1946int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1947{
1948        int err = -EINVAL;
1949        struct xfrm_mgr *km;
1950
1951        rcu_read_lock();
1952        list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1953                if (km->new_mapping)
1954                        err = km->new_mapping(x, ipaddr, sport);
1955                if (!err)
1956                        break;
1957        }
1958        rcu_read_unlock();
1959        return err;
1960}
1961EXPORT_SYMBOL(km_new_mapping);
1962
1963void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid)
1964{
1965        struct km_event c;
1966
1967        c.data.hard = hard;
1968        c.portid = portid;
1969        c.event = XFRM_MSG_POLEXPIRE;
1970        km_policy_notify(pol, dir, &c);
1971}
1972EXPORT_SYMBOL(km_policy_expired);
1973
1974#ifdef CONFIG_XFRM_MIGRATE
1975int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
1976               const struct xfrm_migrate *m, int num_migrate,
1977               const struct xfrm_kmaddress *k,
1978               const struct xfrm_encap_tmpl *encap)
1979{
1980        int err = -EINVAL;
1981        int ret;
1982        struct xfrm_mgr *km;
1983
1984        rcu_read_lock();
1985        list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1986                if (km->migrate) {
1987                        ret = km->migrate(sel, dir, type, m, num_migrate, k,
1988                                          encap);
1989                        if (!ret)
1990                                err = ret;
1991                }
1992        }
1993        rcu_read_unlock();
1994        return err;
1995}
1996EXPORT_SYMBOL(km_migrate);
1997#endif
1998
1999int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
2000{
2001        int err = -EINVAL;
2002        int ret;
2003        struct xfrm_mgr *km;
2004
2005        rcu_read_lock();
2006        list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2007                if (km->report) {
2008                        ret = km->report(net, proto, sel, addr);
2009                        if (!ret)
2010                                err = ret;
2011                }
2012        }
2013        rcu_read_unlock();
2014        return err;
2015}
2016EXPORT_SYMBOL(km_report);
2017
2018bool km_is_alive(const struct km_event *c)
2019{
2020        struct xfrm_mgr *km;
2021        bool is_alive = false;
2022
2023        rcu_read_lock();
2024        list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2025                if (km->is_alive && km->is_alive(c)) {
2026                        is_alive = true;
2027                        break;
2028                }
2029        }
2030        rcu_read_unlock();
2031
2032        return is_alive;
2033}
2034EXPORT_SYMBOL(km_is_alive);
2035
2036int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
2037{
2038        int err;
2039        u8 *data;
2040        struct xfrm_mgr *km;
2041        struct xfrm_policy *pol = NULL;
2042
2043        if (optlen <= 0 || optlen > PAGE_SIZE)
2044                return -EMSGSIZE;
2045
2046        data = memdup_user(optval, optlen);
2047        if (IS_ERR(data))
2048                return PTR_ERR(data);
2049
2050        err = -EINVAL;
2051        rcu_read_lock();
2052        list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2053                pol = km->compile_policy(sk, optname, data,
2054                                         optlen, &err);
2055                if (err >= 0)
2056                        break;
2057        }
2058        rcu_read_unlock();
2059
2060        if (err >= 0) {
2061                xfrm_sk_policy_insert(sk, err, pol);
2062                xfrm_pol_put(pol);
2063                err = 0;
2064        }
2065
2066        kfree(data);
2067        return err;
2068}
2069EXPORT_SYMBOL(xfrm_user_policy);
2070
2071static DEFINE_SPINLOCK(xfrm_km_lock);
2072
2073int xfrm_register_km(struct xfrm_mgr *km)
2074{
2075        spin_lock_bh(&xfrm_km_lock);
2076        list_add_tail_rcu(&km->list, &xfrm_km_list);
2077        spin_unlock_bh(&xfrm_km_lock);
2078        return 0;
2079}
2080EXPORT_SYMBOL(xfrm_register_km);
2081
2082int xfrm_unregister_km(struct xfrm_mgr *km)
2083{
2084        spin_lock_bh(&xfrm_km_lock);
2085        list_del_rcu(&km->list);
2086        spin_unlock_bh(&xfrm_km_lock);
2087        synchronize_rcu();
2088        return 0;
2089}
2090EXPORT_SYMBOL(xfrm_unregister_km);
2091
2092int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
2093{
2094        int err = 0;
2095
2096        if (WARN_ON(afinfo->family >= NPROTO))
2097                return -EAFNOSUPPORT;
2098
2099        spin_lock_bh(&xfrm_state_afinfo_lock);
2100        if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
2101                err = -EEXIST;
2102        else
2103                rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo);
2104        spin_unlock_bh(&xfrm_state_afinfo_lock);
2105        return err;
2106}
2107EXPORT_SYMBOL(xfrm_state_register_afinfo);
2108
2109int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
2110{
2111        int err = 0, family = afinfo->family;
2112
2113        if (WARN_ON(family >= NPROTO))
2114                return -EAFNOSUPPORT;
2115
2116        spin_lock_bh(&xfrm_state_afinfo_lock);
2117        if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
2118                if (rcu_access_pointer(xfrm_state_afinfo[family]) != afinfo)
2119                        err = -EINVAL;
2120                else
2121                        RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL);
2122        }
2123        spin_unlock_bh(&xfrm_state_afinfo_lock);
2124        synchronize_rcu();
2125        return err;
2126}
2127EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
2128
2129struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family)
2130{
2131        if (unlikely(family >= NPROTO))
2132                return NULL;
2133
2134        return rcu_dereference(xfrm_state_afinfo[family]);
2135}
2136
2137struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
2138{
2139        struct xfrm_state_afinfo *afinfo;
2140        if (unlikely(family >= NPROTO))
2141                return NULL;
2142        rcu_read_lock();
2143        afinfo = rcu_dereference(xfrm_state_afinfo[family]);
2144        if (unlikely(!afinfo))
2145                rcu_read_unlock();
2146        return afinfo;
2147}
2148
2149/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
2150void xfrm_state_delete_tunnel(struct xfrm_state *x)
2151{
2152        if (x->tunnel) {
2153                struct xfrm_state *t = x->tunnel;
2154
2155                if (atomic_read(&t->tunnel_users) == 2)
2156                        xfrm_state_delete(t);
2157                atomic_dec(&t->tunnel_users);
2158                xfrm_state_put(t);
2159                x->tunnel = NULL;
2160        }
2161}
2162EXPORT_SYMBOL(xfrm_state_delete_tunnel);
2163
2164int xfrm_state_mtu(struct xfrm_state *x, int mtu)
2165{
2166        const struct xfrm_type *type = READ_ONCE(x->type);
2167
2168        if (x->km.state == XFRM_STATE_VALID &&
2169            type && type->get_mtu)
2170                return type->get_mtu(x, mtu);
2171
2172        return mtu - x->props.header_len;
2173}
2174
2175int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
2176{
2177        struct xfrm_state_afinfo *afinfo;
2178        struct xfrm_mode *inner_mode;
2179        int family = x->props.family;
2180        int err;
2181
2182        err = -EAFNOSUPPORT;
2183        afinfo = xfrm_state_get_afinfo(family);
2184        if (!afinfo)
2185                goto error;
2186
2187        err = 0;
2188        if (afinfo->init_flags)
2189                err = afinfo->init_flags(x);
2190
2191        rcu_read_unlock();
2192
2193        if (err)
2194                goto error;
2195
2196        err = -EPROTONOSUPPORT;
2197
2198        if (x->sel.family != AF_UNSPEC) {
2199                inner_mode = xfrm_get_mode(x->props.mode, x->sel.family);
2200                if (inner_mode == NULL)
2201                        goto error;
2202
2203                if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2204                    family != x->sel.family) {
2205                        xfrm_put_mode(inner_mode);
2206                        goto error;
2207                }
2208
2209                x->inner_mode = inner_mode;
2210        } else {
2211                struct xfrm_mode *inner_mode_iaf;
2212                int iafamily = AF_INET;
2213
2214                inner_mode = xfrm_get_mode(x->props.mode, x->props.family);
2215                if (inner_mode == NULL)
2216                        goto error;
2217
2218                if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) {
2219                        xfrm_put_mode(inner_mode);
2220                        goto error;
2221                }
2222                x->inner_mode = inner_mode;
2223
2224                if (x->props.family == AF_INET)
2225                        iafamily = AF_INET6;
2226
2227                inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily);
2228                if (inner_mode_iaf) {
2229                        if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)
2230                                x->inner_mode_iaf = inner_mode_iaf;
2231                        else
2232                                xfrm_put_mode(inner_mode_iaf);
2233                }
2234        }
2235
2236        x->type = xfrm_get_type(x->id.proto, family);
2237        if (x->type == NULL)
2238                goto error;
2239
2240        x->type_offload = xfrm_get_type_offload(x->id.proto, family);
2241
2242        err = x->type->init_state(x);
2243        if (err)
2244                goto error;
2245
2246        x->outer_mode = xfrm_get_mode(x->props.mode, family);
2247        if (x->outer_mode == NULL) {
2248                err = -EPROTONOSUPPORT;
2249                goto error;
2250        }
2251
2252        if (init_replay) {
2253                err = xfrm_init_replay(x);
2254                if (err)
2255                        goto error;
2256        }
2257
2258        x->km.state = XFRM_STATE_VALID;
2259
2260error:
2261        return err;
2262}
2263
2264EXPORT_SYMBOL(__xfrm_init_state);
2265
2266int xfrm_init_state(struct xfrm_state *x)
2267{
2268        return __xfrm_init_state(x, true);
2269}
2270
2271EXPORT_SYMBOL(xfrm_init_state);
2272
2273int __net_init xfrm_state_init(struct net *net)
2274{
2275        unsigned int sz;
2276
2277        INIT_LIST_HEAD(&net->xfrm.state_all);
2278
2279        sz = sizeof(struct hlist_head) * 8;
2280
2281        net->xfrm.state_bydst = xfrm_hash_alloc(sz);
2282        if (!net->xfrm.state_bydst)
2283                goto out_bydst;
2284        net->xfrm.state_bysrc = xfrm_hash_alloc(sz);
2285        if (!net->xfrm.state_bysrc)
2286                goto out_bysrc;
2287        net->xfrm.state_byspi = xfrm_hash_alloc(sz);
2288        if (!net->xfrm.state_byspi)
2289                goto out_byspi;
2290        net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
2291
2292        net->xfrm.state_num = 0;
2293        INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
2294        spin_lock_init(&net->xfrm.xfrm_state_lock);
2295        return 0;
2296
2297out_byspi:
2298        xfrm_hash_free(net->xfrm.state_bysrc, sz);
2299out_bysrc:
2300        xfrm_hash_free(net->xfrm.state_bydst, sz);
2301out_bydst:
2302        return -ENOMEM;
2303}
2304
2305void xfrm_state_fini(struct net *net)
2306{
2307        unsigned int sz;
2308
2309        flush_work(&net->xfrm.state_hash_work);
2310        xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
2311        flush_work(&xfrm_state_gc_work);
2312
2313        WARN_ON(!list_empty(&net->xfrm.state_all));
2314
2315        sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
2316        WARN_ON(!hlist_empty(net->xfrm.state_byspi));
2317        xfrm_hash_free(net->xfrm.state_byspi, sz);
2318        WARN_ON(!hlist_empty(net->xfrm.state_bysrc));
2319        xfrm_hash_free(net->xfrm.state_bysrc, sz);
2320        WARN_ON(!hlist_empty(net->xfrm.state_bydst));
2321        xfrm_hash_free(net->xfrm.state_bydst, sz);
2322}
2323
2324#ifdef CONFIG_AUDITSYSCALL
2325static void xfrm_audit_helper_sainfo(struct xfrm_state *x,
2326                                     struct audit_buffer *audit_buf)
2327{
2328        struct xfrm_sec_ctx *ctx = x->security;
2329        u32 spi = ntohl(x->id.spi);
2330
2331        if (ctx)
2332                audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2333                                 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2334
2335        switch (x->props.family) {
2336        case AF_INET:
2337                audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
2338                                 &x->props.saddr.a4, &x->id.daddr.a4);
2339                break;
2340        case AF_INET6:
2341                audit_log_format(audit_buf, " src=%pI6 dst=%pI6",
2342                                 x->props.saddr.a6, x->id.daddr.a6);
2343                break;
2344        }
2345
2346        audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2347}
2348
2349static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
2350                                      struct audit_buffer *audit_buf)
2351{
2352        const struct iphdr *iph4;
2353        const struct ipv6hdr *iph6;
2354
2355        switch (family) {
2356        case AF_INET:
2357                iph4 = ip_hdr(skb);
2358                audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
2359                                 &iph4->saddr, &iph4->daddr);
2360                break;
2361        case AF_INET6:
2362                iph6 = ipv6_hdr(skb);
2363                audit_log_format(audit_buf,
2364                                 " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x",
2365                                 &iph6->saddr, &iph6->daddr,
2366                                 iph6->flow_lbl[0] & 0x0f,
2367                                 iph6->flow_lbl[1],
2368                                 iph6->flow_lbl[2]);
2369                break;
2370        }
2371}
2372
2373void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid)
2374{
2375        struct audit_buffer *audit_buf;
2376
2377        audit_buf = xfrm_audit_start("SAD-add");
2378        if (audit_buf == NULL)
2379                return;
2380        xfrm_audit_helper_usrinfo(task_valid, audit_buf);
2381        xfrm_audit_helper_sainfo(x, audit_buf);
2382        audit_log_format(audit_buf, " res=%u", result);
2383        audit_log_end(audit_buf);
2384}
2385EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
2386
2387void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid)
2388{
2389        struct audit_buffer *audit_buf;
2390
2391        audit_buf = xfrm_audit_start("SAD-delete");
2392        if (audit_buf == NULL)
2393                return;
2394        xfrm_audit_helper_usrinfo(task_valid, audit_buf);
2395        xfrm_audit_helper_sainfo(x, audit_buf);
2396        audit_log_format(audit_buf, " res=%u", result);
2397        audit_log_end(audit_buf);
2398}
2399EXPORT_SYMBOL_GPL(xfrm_audit_state_delete);
2400
2401void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
2402                                      struct sk_buff *skb)
2403{
2404        struct audit_buffer *audit_buf;
2405        u32 spi;
2406
2407        audit_buf = xfrm_audit_start("SA-replay-overflow");
2408        if (audit_buf == NULL)
2409                return;
2410        xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2411        /* don't record the sequence number because it's inherent in this kind
2412         * of audit message */
2413        spi = ntohl(x->id.spi);
2414        audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2415        audit_log_end(audit_buf);
2416}
2417EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow);
2418
2419void xfrm_audit_state_replay(struct xfrm_state *x,
2420                             struct sk_buff *skb, __be32 net_seq)
2421{
2422        struct audit_buffer *audit_buf;
2423        u32 spi;
2424
2425        audit_buf = xfrm_audit_start("SA-replayed-pkt");
2426        if (audit_buf == NULL)
2427                return;
2428        xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2429        spi = ntohl(x->id.spi);
2430        audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2431                         spi, spi, ntohl(net_seq));
2432        audit_log_end(audit_buf);
2433}
2434EXPORT_SYMBOL_GPL(xfrm_audit_state_replay);
2435
2436void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family)
2437{
2438        struct audit_buffer *audit_buf;
2439
2440        audit_buf = xfrm_audit_start("SA-notfound");
2441        if (audit_buf == NULL)
2442                return;
2443        xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2444        audit_log_end(audit_buf);
2445}
2446EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound_simple);
2447
2448void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
2449                               __be32 net_spi, __be32 net_seq)
2450{
2451        struct audit_buffer *audit_buf;
2452        u32 spi;
2453
2454        audit_buf = xfrm_audit_start("SA-notfound");
2455        if (audit_buf == NULL)
2456                return;
2457        xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2458        spi = ntohl(net_spi);
2459        audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2460                         spi, spi, ntohl(net_seq));
2461        audit_log_end(audit_buf);
2462}
2463EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound);
2464
2465void xfrm_audit_state_icvfail(struct xfrm_state *x,
2466                              struct sk_buff *skb, u8 proto)
2467{
2468        struct audit_buffer *audit_buf;
2469        __be32 net_spi;
2470        __be32 net_seq;
2471
2472        audit_buf = xfrm_audit_start("SA-icv-failure");
2473        if (audit_buf == NULL)
2474                return;
2475        xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2476        if (xfrm_parse_spi(skb, proto, &net_spi, &net_seq) == 0) {
2477                u32 spi = ntohl(net_spi);
2478                audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2479                                 spi, spi, ntohl(net_seq));
2480        }
2481        audit_log_end(audit_buf);
2482}
2483EXPORT_SYMBOL_GPL(xfrm_audit_state_icvfail);
2484#endif /* CONFIG_AUDITSYSCALL */
2485