linux/net/xfrm/xfrm_state.c
<<
>>
Prefs
   1/*
   2 * xfrm_state.c
   3 *
   4 * Changes:
   5 *      Mitsuru KANDA @USAGI
   6 *      Kazunori MIYAZAWA @USAGI
   7 *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
   8 *              IPv6 support
   9 *      YOSHIFUJI Hideaki @USAGI
  10 *              Split up af-specific functions
  11 *      Derek Atkins <derek@ihtfp.com>
  12 *              Add UDP Encapsulation
  13 *
  14 */
  15
  16#include <linux/workqueue.h>
  17#include <net/xfrm.h>
  18#include <linux/pfkeyv2.h>
  19#include <linux/ipsec.h>
  20#include <linux/module.h>
  21#include <linux/cache.h>
  22#include <linux/audit.h>
  23#include <linux/uaccess.h>
  24#include <linux/ktime.h>
  25#include <linux/slab.h>
  26#include <linux/interrupt.h>
  27#include <linux/kernel.h>
  28
  29#include "xfrm_hash.h"
  30
  31#define xfrm_state_deref_prot(table, net) \
  32        rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
  33
  34static void xfrm_state_gc_task(struct work_struct *work);
  35
  36/* Each xfrm_state may be linked to two tables:
  37
  38   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
  39   2. Hash table by (daddr,family,reqid) to find what SAs exist for given
  40      destination/tunnel endpoint. (output)
  41 */
  42
  43static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
  44static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
  45static struct kmem_cache *xfrm_state_cache __ro_after_init;
  46
  47static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
  48static HLIST_HEAD(xfrm_state_gc_list);
  49
  50static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
  51{
  52        return refcount_inc_not_zero(&x->refcnt);
  53}
  54
  55static inline unsigned int xfrm_dst_hash(struct net *net,
  56                                         const xfrm_address_t *daddr,
  57                                         const xfrm_address_t *saddr,
  58                                         u32 reqid,
  59                                         unsigned short family)
  60{
  61        return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask);
  62}
  63
  64static inline unsigned int xfrm_src_hash(struct net *net,
  65                                         const xfrm_address_t *daddr,
  66                                         const xfrm_address_t *saddr,
  67                                         unsigned short family)
  68{
  69        return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);
  70}
  71
  72static inline unsigned int
  73xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
  74              __be32 spi, u8 proto, unsigned short family)
  75{
  76        return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
  77}
  78
  79static void xfrm_hash_transfer(struct hlist_head *list,
  80                               struct hlist_head *ndsttable,
  81                               struct hlist_head *nsrctable,
  82                               struct hlist_head *nspitable,
  83                               unsigned int nhashmask)
  84{
  85        struct hlist_node *tmp;
  86        struct xfrm_state *x;
  87
  88        hlist_for_each_entry_safe(x, tmp, list, bydst) {
  89                unsigned int h;
  90
  91                h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
  92                                    x->props.reqid, x->props.family,
  93                                    nhashmask);
  94                hlist_add_head_rcu(&x->bydst, ndsttable + h);
  95
  96                h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
  97                                    x->props.family,
  98                                    nhashmask);
  99                hlist_add_head_rcu(&x->bysrc, nsrctable + h);
 100
 101                if (x->id.spi) {
 102                        h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
 103                                            x->id.proto, x->props.family,
 104                                            nhashmask);
 105                        hlist_add_head_rcu(&x->byspi, nspitable + h);
 106                }
 107        }
 108}
 109
 110static unsigned long xfrm_hash_new_size(unsigned int state_hmask)
 111{
 112        return ((state_hmask + 1) << 1) * sizeof(struct hlist_head);
 113}
 114
 115static void xfrm_hash_resize(struct work_struct *work)
 116{
 117        struct net *net = container_of(work, struct net, xfrm.state_hash_work);
 118        struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
 119        unsigned long nsize, osize;
 120        unsigned int nhashmask, ohashmask;
 121        int i;
 122
 123        nsize = xfrm_hash_new_size(net->xfrm.state_hmask);
 124        ndst = xfrm_hash_alloc(nsize);
 125        if (!ndst)
 126                return;
 127        nsrc = xfrm_hash_alloc(nsize);
 128        if (!nsrc) {
 129                xfrm_hash_free(ndst, nsize);
 130                return;
 131        }
 132        nspi = xfrm_hash_alloc(nsize);
 133        if (!nspi) {
 134                xfrm_hash_free(ndst, nsize);
 135                xfrm_hash_free(nsrc, nsize);
 136                return;
 137        }
 138
 139        spin_lock_bh(&net->xfrm.xfrm_state_lock);
 140        write_seqcount_begin(&xfrm_state_hash_generation);
 141
 142        nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
 143        odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
 144        for (i = net->xfrm.state_hmask; i >= 0; i--)
 145                xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
 146
 147        osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
 148        ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
 149        ohashmask = net->xfrm.state_hmask;
 150
 151        rcu_assign_pointer(net->xfrm.state_bydst, ndst);
 152        rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
 153        rcu_assign_pointer(net->xfrm.state_byspi, nspi);
 154        net->xfrm.state_hmask = nhashmask;
 155
 156        write_seqcount_end(&xfrm_state_hash_generation);
 157        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 158
 159        osize = (ohashmask + 1) * sizeof(struct hlist_head);
 160
 161        synchronize_rcu();
 162
 163        xfrm_hash_free(odst, osize);
 164        xfrm_hash_free(osrc, osize);
 165        xfrm_hash_free(ospi, osize);
 166}
 167
 168static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
 169static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO];
 170
 171static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 172
 173int __xfrm_state_delete(struct xfrm_state *x);
 174
 175int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
 176bool km_is_alive(const struct km_event *c);
 177void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
 178
 179static DEFINE_SPINLOCK(xfrm_type_lock);
 180int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
 181{
 182        struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 183        const struct xfrm_type **typemap;
 184        int err = 0;
 185
 186        if (unlikely(afinfo == NULL))
 187                return -EAFNOSUPPORT;
 188        typemap = afinfo->type_map;
 189        spin_lock_bh(&xfrm_type_lock);
 190
 191        if (likely(typemap[type->proto] == NULL))
 192                typemap[type->proto] = type;
 193        else
 194                err = -EEXIST;
 195        spin_unlock_bh(&xfrm_type_lock);
 196        rcu_read_unlock();
 197        return err;
 198}
 199EXPORT_SYMBOL(xfrm_register_type);
 200
 201int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
 202{
 203        struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 204        const struct xfrm_type **typemap;
 205        int err = 0;
 206
 207        if (unlikely(afinfo == NULL))
 208                return -EAFNOSUPPORT;
 209        typemap = afinfo->type_map;
 210        spin_lock_bh(&xfrm_type_lock);
 211
 212        if (unlikely(typemap[type->proto] != type))
 213                err = -ENOENT;
 214        else
 215                typemap[type->proto] = NULL;
 216        spin_unlock_bh(&xfrm_type_lock);
 217        rcu_read_unlock();
 218        return err;
 219}
 220EXPORT_SYMBOL(xfrm_unregister_type);
 221
 222static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
 223{
 224        struct xfrm_state_afinfo *afinfo;
 225        const struct xfrm_type **typemap;
 226        const struct xfrm_type *type;
 227        int modload_attempted = 0;
 228
 229retry:
 230        afinfo = xfrm_state_get_afinfo(family);
 231        if (unlikely(afinfo == NULL))
 232                return NULL;
 233        typemap = afinfo->type_map;
 234
 235        type = READ_ONCE(typemap[proto]);
 236        if (unlikely(type && !try_module_get(type->owner)))
 237                type = NULL;
 238
 239        rcu_read_unlock();
 240
 241        if (!type && !modload_attempted) {
 242                request_module("xfrm-type-%d-%d", family, proto);
 243                modload_attempted = 1;
 244                goto retry;
 245        }
 246
 247        return type;
 248}
 249
 250static void xfrm_put_type(const struct xfrm_type *type)
 251{
 252        module_put(type->owner);
 253}
 254
 255static DEFINE_SPINLOCK(xfrm_type_offload_lock);
 256int xfrm_register_type_offload(const struct xfrm_type_offload *type,
 257                               unsigned short family)
 258{
 259        struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 260        const struct xfrm_type_offload **typemap;
 261        int err = 0;
 262
 263        if (unlikely(afinfo == NULL))
 264                return -EAFNOSUPPORT;
 265        typemap = afinfo->type_offload_map;
 266        spin_lock_bh(&xfrm_type_offload_lock);
 267
 268        if (likely(typemap[type->proto] == NULL))
 269                typemap[type->proto] = type;
 270        else
 271                err = -EEXIST;
 272        spin_unlock_bh(&xfrm_type_offload_lock);
 273        rcu_read_unlock();
 274        return err;
 275}
 276EXPORT_SYMBOL(xfrm_register_type_offload);
 277
 278int xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
 279                                 unsigned short family)
 280{
 281        struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 282        const struct xfrm_type_offload **typemap;
 283        int err = 0;
 284
 285        if (unlikely(afinfo == NULL))
 286                return -EAFNOSUPPORT;
 287        typemap = afinfo->type_offload_map;
 288        spin_lock_bh(&xfrm_type_offload_lock);
 289
 290        if (unlikely(typemap[type->proto] != type))
 291                err = -ENOENT;
 292        else
 293                typemap[type->proto] = NULL;
 294        spin_unlock_bh(&xfrm_type_offload_lock);
 295        rcu_read_unlock();
 296        return err;
 297}
 298EXPORT_SYMBOL(xfrm_unregister_type_offload);
 299
 300static const struct xfrm_type_offload *
 301xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load)
 302{
 303        struct xfrm_state_afinfo *afinfo;
 304        const struct xfrm_type_offload **typemap;
 305        const struct xfrm_type_offload *type;
 306
 307retry:
 308        afinfo = xfrm_state_get_afinfo(family);
 309        if (unlikely(afinfo == NULL))
 310                return NULL;
 311        typemap = afinfo->type_offload_map;
 312
 313        type = typemap[proto];
 314        if ((type && !try_module_get(type->owner)))
 315                type = NULL;
 316
 317        rcu_read_unlock();
 318
 319        if (!type && try_load) {
 320                request_module("xfrm-offload-%d-%d", family, proto);
 321                try_load = false;
 322                goto retry;
 323        }
 324
 325        return type;
 326}
 327
 328static void xfrm_put_type_offload(const struct xfrm_type_offload *type)
 329{
 330        module_put(type->owner);
 331}
 332
 333static DEFINE_SPINLOCK(xfrm_mode_lock);
 334int xfrm_register_mode(struct xfrm_mode *mode, int family)
 335{
 336        struct xfrm_state_afinfo *afinfo;
 337        struct xfrm_mode **modemap;
 338        int err;
 339
 340        if (unlikely(mode->encap >= XFRM_MODE_MAX))
 341                return -EINVAL;
 342
 343        afinfo = xfrm_state_get_afinfo(family);
 344        if (unlikely(afinfo == NULL))
 345                return -EAFNOSUPPORT;
 346
 347        err = -EEXIST;
 348        modemap = afinfo->mode_map;
 349        spin_lock_bh(&xfrm_mode_lock);
 350        if (modemap[mode->encap])
 351                goto out;
 352
 353        err = -ENOENT;
 354        if (!try_module_get(afinfo->owner))
 355                goto out;
 356
 357        mode->afinfo = afinfo;
 358        modemap[mode->encap] = mode;
 359        err = 0;
 360
 361out:
 362        spin_unlock_bh(&xfrm_mode_lock);
 363        rcu_read_unlock();
 364        return err;
 365}
 366EXPORT_SYMBOL(xfrm_register_mode);
 367
 368int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
 369{
 370        struct xfrm_state_afinfo *afinfo;
 371        struct xfrm_mode **modemap;
 372        int err;
 373
 374        if (unlikely(mode->encap >= XFRM_MODE_MAX))
 375                return -EINVAL;
 376
 377        afinfo = xfrm_state_get_afinfo(family);
 378        if (unlikely(afinfo == NULL))
 379                return -EAFNOSUPPORT;
 380
 381        err = -ENOENT;
 382        modemap = afinfo->mode_map;
 383        spin_lock_bh(&xfrm_mode_lock);
 384        if (likely(modemap[mode->encap] == mode)) {
 385                modemap[mode->encap] = NULL;
 386                module_put(mode->afinfo->owner);
 387                err = 0;
 388        }
 389
 390        spin_unlock_bh(&xfrm_mode_lock);
 391        rcu_read_unlock();
 392        return err;
 393}
 394EXPORT_SYMBOL(xfrm_unregister_mode);
 395
 396static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
 397{
 398        struct xfrm_state_afinfo *afinfo;
 399        struct xfrm_mode *mode;
 400        int modload_attempted = 0;
 401
 402        if (unlikely(encap >= XFRM_MODE_MAX))
 403                return NULL;
 404
 405retry:
 406        afinfo = xfrm_state_get_afinfo(family);
 407        if (unlikely(afinfo == NULL))
 408                return NULL;
 409
 410        mode = READ_ONCE(afinfo->mode_map[encap]);
 411        if (unlikely(mode && !try_module_get(mode->owner)))
 412                mode = NULL;
 413
 414        rcu_read_unlock();
 415        if (!mode && !modload_attempted) {
 416                request_module("xfrm-mode-%d-%d", family, encap);
 417                modload_attempted = 1;
 418                goto retry;
 419        }
 420
 421        return mode;
 422}
 423
 424static void xfrm_put_mode(struct xfrm_mode *mode)
 425{
 426        module_put(mode->owner);
 427}
 428
 429void xfrm_state_free(struct xfrm_state *x)
 430{
 431        kmem_cache_free(xfrm_state_cache, x);
 432}
 433EXPORT_SYMBOL(xfrm_state_free);
 434
 435static void ___xfrm_state_destroy(struct xfrm_state *x)
 436{
 437        tasklet_hrtimer_cancel(&x->mtimer);
 438        del_timer_sync(&x->rtimer);
 439        kfree(x->aead);
 440        kfree(x->aalg);
 441        kfree(x->ealg);
 442        kfree(x->calg);
 443        kfree(x->encap);
 444        kfree(x->coaddr);
 445        kfree(x->replay_esn);
 446        kfree(x->preplay_esn);
 447        if (x->inner_mode)
 448                xfrm_put_mode(x->inner_mode);
 449        if (x->inner_mode_iaf)
 450                xfrm_put_mode(x->inner_mode_iaf);
 451        if (x->outer_mode)
 452                xfrm_put_mode(x->outer_mode);
 453        if (x->type_offload)
 454                xfrm_put_type_offload(x->type_offload);
 455        if (x->type) {
 456                x->type->destructor(x);
 457                xfrm_put_type(x->type);
 458        }
 459        xfrm_dev_state_free(x);
 460        security_xfrm_state_free(x);
 461        xfrm_state_free(x);
 462}
 463
 464static void xfrm_state_gc_task(struct work_struct *work)
 465{
 466        struct xfrm_state *x;
 467        struct hlist_node *tmp;
 468        struct hlist_head gc_list;
 469
 470        spin_lock_bh(&xfrm_state_gc_lock);
 471        hlist_move_list(&xfrm_state_gc_list, &gc_list);
 472        spin_unlock_bh(&xfrm_state_gc_lock);
 473
 474        synchronize_rcu();
 475
 476        hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
 477                ___xfrm_state_destroy(x);
 478}
 479
 480static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
 481{
 482        struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);
 483        struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer);
 484        time64_t now = ktime_get_real_seconds();
 485        time64_t next = TIME64_MAX;
 486        int warn = 0;
 487        int err = 0;
 488
 489        spin_lock(&x->lock);
 490        if (x->km.state == XFRM_STATE_DEAD)
 491                goto out;
 492        if (x->km.state == XFRM_STATE_EXPIRED)
 493                goto expired;
 494        if (x->lft.hard_add_expires_seconds) {
 495                long tmo = x->lft.hard_add_expires_seconds +
 496                        x->curlft.add_time - now;
 497                if (tmo <= 0) {
 498                        if (x->xflags & XFRM_SOFT_EXPIRE) {
 499                                /* enter hard expire without soft expire first?!
 500                                 * setting a new date could trigger this.
 501                                 * workaround: fix x->curflt.add_time by below:
 502                                 */
 503                                x->curlft.add_time = now - x->saved_tmo - 1;
 504                                tmo = x->lft.hard_add_expires_seconds - x->saved_tmo;
 505                        } else
 506                                goto expired;
 507                }
 508                if (tmo < next)
 509                        next = tmo;
 510        }
 511        if (x->lft.hard_use_expires_seconds) {
 512                long tmo = x->lft.hard_use_expires_seconds +
 513                        (x->curlft.use_time ? : now) - now;
 514                if (tmo <= 0)
 515                        goto expired;
 516                if (tmo < next)
 517                        next = tmo;
 518        }
 519        if (x->km.dying)
 520                goto resched;
 521        if (x->lft.soft_add_expires_seconds) {
 522                long tmo = x->lft.soft_add_expires_seconds +
 523                        x->curlft.add_time - now;
 524                if (tmo <= 0) {
 525                        warn = 1;
 526                        x->xflags &= ~XFRM_SOFT_EXPIRE;
 527                } else if (tmo < next) {
 528                        next = tmo;
 529                        x->xflags |= XFRM_SOFT_EXPIRE;
 530                        x->saved_tmo = tmo;
 531                }
 532        }
 533        if (x->lft.soft_use_expires_seconds) {
 534                long tmo = x->lft.soft_use_expires_seconds +
 535                        (x->curlft.use_time ? : now) - now;
 536                if (tmo <= 0)
 537                        warn = 1;
 538                else if (tmo < next)
 539                        next = tmo;
 540        }
 541
 542        x->km.dying = warn;
 543        if (warn)
 544                km_state_expired(x, 0, 0);
 545resched:
 546        if (next != TIME64_MAX) {
 547                tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL);
 548        }
 549
 550        goto out;
 551
 552expired:
 553        if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0)
 554                x->km.state = XFRM_STATE_EXPIRED;
 555
 556        err = __xfrm_state_delete(x);
 557        if (!err)
 558                km_state_expired(x, 1, 0);
 559
 560        xfrm_audit_state_delete(x, err ? 0 : 1, true);
 561
 562out:
 563        spin_unlock(&x->lock);
 564        return HRTIMER_NORESTART;
 565}
 566
 567static void xfrm_replay_timer_handler(struct timer_list *t);
 568
 569struct xfrm_state *xfrm_state_alloc(struct net *net)
 570{
 571        struct xfrm_state *x;
 572
 573        x = kmem_cache_alloc(xfrm_state_cache, GFP_ATOMIC | __GFP_ZERO);
 574
 575        if (x) {
 576                write_pnet(&x->xs_net, net);
 577                refcount_set(&x->refcnt, 1);
 578                atomic_set(&x->tunnel_users, 0);
 579                INIT_LIST_HEAD(&x->km.all);
 580                INIT_HLIST_NODE(&x->bydst);
 581                INIT_HLIST_NODE(&x->bysrc);
 582                INIT_HLIST_NODE(&x->byspi);
 583                tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
 584                                        CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
 585                timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
 586                x->curlft.add_time = ktime_get_real_seconds();
 587                x->lft.soft_byte_limit = XFRM_INF;
 588                x->lft.soft_packet_limit = XFRM_INF;
 589                x->lft.hard_byte_limit = XFRM_INF;
 590                x->lft.hard_packet_limit = XFRM_INF;
 591                x->replay_maxage = 0;
 592                x->replay_maxdiff = 0;
 593                x->inner_mode = NULL;
 594                x->inner_mode_iaf = NULL;
 595                spin_lock_init(&x->lock);
 596        }
 597        return x;
 598}
 599EXPORT_SYMBOL(xfrm_state_alloc);
 600
 601void __xfrm_state_destroy(struct xfrm_state *x, bool sync)
 602{
 603        WARN_ON(x->km.state != XFRM_STATE_DEAD);
 604
 605        if (sync) {
 606                synchronize_rcu();
 607                ___xfrm_state_destroy(x);
 608        } else {
 609                spin_lock_bh(&xfrm_state_gc_lock);
 610                hlist_add_head(&x->gclist, &xfrm_state_gc_list);
 611                spin_unlock_bh(&xfrm_state_gc_lock);
 612                schedule_work(&xfrm_state_gc_work);
 613        }
 614}
 615EXPORT_SYMBOL(__xfrm_state_destroy);
 616
 617int __xfrm_state_delete(struct xfrm_state *x)
 618{
 619        struct net *net = xs_net(x);
 620        int err = -ESRCH;
 621
 622        if (x->km.state != XFRM_STATE_DEAD) {
 623                x->km.state = XFRM_STATE_DEAD;
 624                spin_lock(&net->xfrm.xfrm_state_lock);
 625                list_del(&x->km.all);
 626                hlist_del_rcu(&x->bydst);
 627                hlist_del_rcu(&x->bysrc);
 628                if (x->id.spi)
 629                        hlist_del_rcu(&x->byspi);
 630                net->xfrm.state_num--;
 631                spin_unlock(&net->xfrm.xfrm_state_lock);
 632
 633                xfrm_dev_state_delete(x);
 634
 635                /* All xfrm_state objects are created by xfrm_state_alloc.
 636                 * The xfrm_state_alloc call gives a reference, and that
 637                 * is what we are dropping here.
 638                 */
 639                xfrm_state_put(x);
 640                err = 0;
 641        }
 642
 643        return err;
 644}
 645EXPORT_SYMBOL(__xfrm_state_delete);
 646
 647int xfrm_state_delete(struct xfrm_state *x)
 648{
 649        int err;
 650
 651        spin_lock_bh(&x->lock);
 652        err = __xfrm_state_delete(x);
 653        spin_unlock_bh(&x->lock);
 654
 655        return err;
 656}
 657EXPORT_SYMBOL(xfrm_state_delete);
 658
 659#ifdef CONFIG_SECURITY_NETWORK_XFRM
 660static inline int
 661xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
 662{
 663        int i, err = 0;
 664
 665        for (i = 0; i <= net->xfrm.state_hmask; i++) {
 666                struct xfrm_state *x;
 667
 668                hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 669                        if (xfrm_id_proto_match(x->id.proto, proto) &&
 670                           (err = security_xfrm_state_delete(x)) != 0) {
 671                                xfrm_audit_state_delete(x, 0, task_valid);
 672                                return err;
 673                        }
 674                }
 675        }
 676
 677        return err;
 678}
 679
 680static inline int
 681xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
 682{
 683        int i, err = 0;
 684
 685        for (i = 0; i <= net->xfrm.state_hmask; i++) {
 686                struct xfrm_state *x;
 687                struct xfrm_state_offload *xso;
 688
 689                hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 690                        xso = &x->xso;
 691
 692                        if (xso->dev == dev &&
 693                           (err = security_xfrm_state_delete(x)) != 0) {
 694                                xfrm_audit_state_delete(x, 0, task_valid);
 695                                return err;
 696                        }
 697                }
 698        }
 699
 700        return err;
 701}
 702#else
 703static inline int
 704xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
 705{
 706        return 0;
 707}
 708
 709static inline int
 710xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
 711{
 712        return 0;
 713}
 714#endif
 715
 716int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync)
 717{
 718        int i, err = 0, cnt = 0;
 719
 720        spin_lock_bh(&net->xfrm.xfrm_state_lock);
 721        err = xfrm_state_flush_secctx_check(net, proto, task_valid);
 722        if (err)
 723                goto out;
 724
 725        err = -ESRCH;
 726        for (i = 0; i <= net->xfrm.state_hmask; i++) {
 727                struct xfrm_state *x;
 728restart:
 729                hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 730                        if (!xfrm_state_kern(x) &&
 731                            xfrm_id_proto_match(x->id.proto, proto)) {
 732                                xfrm_state_hold(x);
 733                                spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 734
 735                                err = xfrm_state_delete(x);
 736                                xfrm_audit_state_delete(x, err ? 0 : 1,
 737                                                        task_valid);
 738                                if (sync)
 739                                        xfrm_state_put_sync(x);
 740                                else
 741                                        xfrm_state_put(x);
 742                                if (!err)
 743                                        cnt++;
 744
 745                                spin_lock_bh(&net->xfrm.xfrm_state_lock);
 746                                goto restart;
 747                        }
 748                }
 749        }
 750out:
 751        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 752        if (cnt)
 753                err = 0;
 754
 755        return err;
 756}
 757EXPORT_SYMBOL(xfrm_state_flush);
 758
 759int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid)
 760{
 761        int i, err = 0, cnt = 0;
 762
 763        spin_lock_bh(&net->xfrm.xfrm_state_lock);
 764        err = xfrm_dev_state_flush_secctx_check(net, dev, task_valid);
 765        if (err)
 766                goto out;
 767
 768        err = -ESRCH;
 769        for (i = 0; i <= net->xfrm.state_hmask; i++) {
 770                struct xfrm_state *x;
 771                struct xfrm_state_offload *xso;
 772restart:
 773                hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 774                        xso = &x->xso;
 775
 776                        if (!xfrm_state_kern(x) && xso->dev == dev) {
 777                                xfrm_state_hold(x);
 778                                spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 779
 780                                err = xfrm_state_delete(x);
 781                                xfrm_audit_state_delete(x, err ? 0 : 1,
 782                                                        task_valid);
 783                                xfrm_state_put(x);
 784                                if (!err)
 785                                        cnt++;
 786
 787                                spin_lock_bh(&net->xfrm.xfrm_state_lock);
 788                                goto restart;
 789                        }
 790                }
 791        }
 792        if (cnt)
 793                err = 0;
 794
 795out:
 796        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 797        return err;
 798}
 799EXPORT_SYMBOL(xfrm_dev_state_flush);
 800
 801void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
 802{
 803        spin_lock_bh(&net->xfrm.xfrm_state_lock);
 804        si->sadcnt = net->xfrm.state_num;
 805        si->sadhcnt = net->xfrm.state_hmask + 1;
 806        si->sadhmcnt = xfrm_state_hashmax;
 807        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 808}
 809EXPORT_SYMBOL(xfrm_sad_getinfo);
 810
 811static void
 812xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
 813                    const struct xfrm_tmpl *tmpl,
 814                    const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 815                    unsigned short family)
 816{
 817        struct xfrm_state_afinfo *afinfo = xfrm_state_afinfo_get_rcu(family);
 818
 819        if (!afinfo)
 820                return;
 821
 822        afinfo->init_tempsel(&x->sel, fl);
 823
 824        if (family != tmpl->encap_family) {
 825                afinfo = xfrm_state_afinfo_get_rcu(tmpl->encap_family);
 826                if (!afinfo)
 827                        return;
 828        }
 829        afinfo->init_temprop(x, tmpl, daddr, saddr);
 830}
 831
 832static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
 833                                              const xfrm_address_t *daddr,
 834                                              __be32 spi, u8 proto,
 835                                              unsigned short family)
 836{
 837        unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
 838        struct xfrm_state *x;
 839
 840        hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
 841                if (x->props.family != family ||
 842                    x->id.spi       != spi ||
 843                    x->id.proto     != proto ||
 844                    !xfrm_addr_equal(&x->id.daddr, daddr, family))
 845                        continue;
 846
 847                if ((mark & x->mark.m) != x->mark.v)
 848                        continue;
 849                if (!xfrm_state_hold_rcu(x))
 850                        continue;
 851                return x;
 852        }
 853
 854        return NULL;
 855}
 856
 857static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 858                                                     const xfrm_address_t *daddr,
 859                                                     const xfrm_address_t *saddr,
 860                                                     u8 proto, unsigned short family)
 861{
 862        unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
 863        struct xfrm_state *x;
 864
 865        hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
 866                if (x->props.family != family ||
 867                    x->id.proto     != proto ||
 868                    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
 869                    !xfrm_addr_equal(&x->props.saddr, saddr, family))
 870                        continue;
 871
 872                if ((mark & x->mark.m) != x->mark.v)
 873                        continue;
 874                if (!xfrm_state_hold_rcu(x))
 875                        continue;
 876                return x;
 877        }
 878
 879        return NULL;
 880}
 881
 882static inline struct xfrm_state *
 883__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
 884{
 885        struct net *net = xs_net(x);
 886        u32 mark = x->mark.v & x->mark.m;
 887
 888        if (use_spi)
 889                return __xfrm_state_lookup(net, mark, &x->id.daddr,
 890                                           x->id.spi, x->id.proto, family);
 891        else
 892                return __xfrm_state_lookup_byaddr(net, mark,
 893                                                  &x->id.daddr,
 894                                                  &x->props.saddr,
 895                                                  x->id.proto, family);
 896}
 897
 898static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
 899{
 900        if (have_hash_collision &&
 901            (net->xfrm.state_hmask + 1) < xfrm_state_hashmax &&
 902            net->xfrm.state_num > net->xfrm.state_hmask)
 903                schedule_work(&net->xfrm.state_hash_work);
 904}
 905
 906static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
 907                               const struct flowi *fl, unsigned short family,
 908                               struct xfrm_state **best, int *acq_in_progress,
 909                               int *error)
 910{
 911        /* Resolution logic:
 912         * 1. There is a valid state with matching selector. Done.
 913         * 2. Valid state with inappropriate selector. Skip.
 914         *
 915         * Entering area of "sysdeps".
 916         *
 917         * 3. If state is not valid, selector is temporary, it selects
 918         *    only session which triggered previous resolution. Key
 919         *    manager will do something to install a state with proper
 920         *    selector.
 921         */
 922        if (x->km.state == XFRM_STATE_VALID) {
 923                if ((x->sel.family &&
 924                     !xfrm_selector_match(&x->sel, fl, x->sel.family)) ||
 925                    !security_xfrm_state_pol_flow_match(x, pol, fl))
 926                        return;
 927
 928                if (!*best ||
 929                    (*best)->km.dying > x->km.dying ||
 930                    ((*best)->km.dying == x->km.dying &&
 931                     (*best)->curlft.add_time < x->curlft.add_time))
 932                        *best = x;
 933        } else if (x->km.state == XFRM_STATE_ACQ) {
 934                *acq_in_progress = 1;
 935        } else if (x->km.state == XFRM_STATE_ERROR ||
 936                   x->km.state == XFRM_STATE_EXPIRED) {
 937                if (xfrm_selector_match(&x->sel, fl, x->sel.family) &&
 938                    security_xfrm_state_pol_flow_match(x, pol, fl))
 939                        *error = -ESRCH;
 940        }
 941}
 942
 943struct xfrm_state *
 944xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 945                const struct flowi *fl, struct xfrm_tmpl *tmpl,
 946                struct xfrm_policy *pol, int *err,
 947                unsigned short family, u32 if_id)
 948{
 949        static xfrm_address_t saddr_wildcard = { };
 950        struct net *net = xp_net(pol);
 951        unsigned int h, h_wildcard;
 952        struct xfrm_state *x, *x0, *to_put;
 953        int acquire_in_progress = 0;
 954        int error = 0;
 955        struct xfrm_state *best = NULL;
 956        u32 mark = pol->mark.v & pol->mark.m;
 957        unsigned short encap_family = tmpl->encap_family;
 958        unsigned int sequence;
 959        struct km_event c;
 960
 961        to_put = NULL;
 962
 963        sequence = read_seqcount_begin(&xfrm_state_hash_generation);
 964
 965        rcu_read_lock();
 966        h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
 967        hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
 968                if (x->props.family == encap_family &&
 969                    x->props.reqid == tmpl->reqid &&
 970                    (mark & x->mark.m) == x->mark.v &&
 971                    x->if_id == if_id &&
 972                    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 973                    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
 974                    tmpl->mode == x->props.mode &&
 975                    tmpl->id.proto == x->id.proto &&
 976                    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
 977                        xfrm_state_look_at(pol, x, fl, encap_family,
 978                                           &best, &acquire_in_progress, &error);
 979        }
 980        if (best || acquire_in_progress)
 981                goto found;
 982
 983        h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
 984        hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
 985                if (x->props.family == encap_family &&
 986                    x->props.reqid == tmpl->reqid &&
 987                    (mark & x->mark.m) == x->mark.v &&
 988                    x->if_id == if_id &&
 989                    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 990                    xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
 991                    tmpl->mode == x->props.mode &&
 992                    tmpl->id.proto == x->id.proto &&
 993                    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
 994                        xfrm_state_look_at(pol, x, fl, encap_family,
 995                                           &best, &acquire_in_progress, &error);
 996        }
 997
 998found:
 999        x = best;
1000        if (!x && !error && !acquire_in_progress) {
1001                if (tmpl->id.spi &&
1002                    (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi,
1003                                              tmpl->id.proto, encap_family)) != NULL) {
1004                        to_put = x0;
1005                        error = -EEXIST;
1006                        goto out;
1007                }
1008
1009                c.net = net;
1010                /* If the KMs have no listeners (yet...), avoid allocating an SA
1011                 * for each and every packet - garbage collection might not
1012                 * handle the flood.
1013                 */
1014                if (!km_is_alive(&c)) {
1015                        error = -ESRCH;
1016                        goto out;
1017                }
1018
1019                x = xfrm_state_alloc(net);
1020                if (x == NULL) {
1021                        error = -ENOMEM;
1022                        goto out;
1023                }
1024                /* Initialize temporary state matching only
1025                 * to current session. */
1026                xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
1027                memcpy(&x->mark, &pol->mark, sizeof(x->mark));
1028                x->if_id = if_id;
1029
1030                error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
1031                if (error) {
1032                        x->km.state = XFRM_STATE_DEAD;
1033                        to_put = x;
1034                        x = NULL;
1035                        goto out;
1036                }
1037
1038                if (km_query(x, tmpl, pol) == 0) {
1039                        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1040                        x->km.state = XFRM_STATE_ACQ;
1041                        list_add(&x->km.all, &net->xfrm.state_all);
1042                        hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
1043                        h = xfrm_src_hash(net, daddr, saddr, encap_family);
1044                        hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
1045                        if (x->id.spi) {
1046                                h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
1047                                hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
1048                        }
1049                        x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
1050                        tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
1051                        net->xfrm.state_num++;
1052                        xfrm_hash_grow_check(net, x->bydst.next != NULL);
1053                        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1054                } else {
1055                        x->km.state = XFRM_STATE_DEAD;
1056                        to_put = x;
1057                        x = NULL;
1058                        error = -ESRCH;
1059                }
1060        }
1061out:
1062        if (x) {
1063                if (!xfrm_state_hold_rcu(x)) {
1064                        *err = -EAGAIN;
1065                        x = NULL;
1066                }
1067        } else {
1068                *err = acquire_in_progress ? -EAGAIN : error;
1069        }
1070        rcu_read_unlock();
1071        if (to_put)
1072                xfrm_state_put(to_put);
1073
1074        if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
1075                *err = -EAGAIN;
1076                if (x) {
1077                        xfrm_state_put(x);
1078                        x = NULL;
1079                }
1080        }
1081
1082        return x;
1083}
1084
1085struct xfrm_state *
1086xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
1087                    xfrm_address_t *daddr, xfrm_address_t *saddr,
1088                    unsigned short family, u8 mode, u8 proto, u32 reqid)
1089{
1090        unsigned int h;
1091        struct xfrm_state *rx = NULL, *x = NULL;
1092
1093        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1094        h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
1095        hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1096                if (x->props.family == family &&
1097                    x->props.reqid == reqid &&
1098                    (mark & x->mark.m) == x->mark.v &&
1099                    x->if_id == if_id &&
1100                    !(x->props.flags & XFRM_STATE_WILDRECV) &&
1101                    xfrm_state_addr_check(x, daddr, saddr, family) &&
1102                    mode == x->props.mode &&
1103                    proto == x->id.proto &&
1104                    x->km.state == XFRM_STATE_VALID) {
1105                        rx = x;
1106                        break;
1107                }
1108        }
1109
1110        if (rx)
1111                xfrm_state_hold(rx);
1112        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1113
1114
1115        return rx;
1116}
1117EXPORT_SYMBOL(xfrm_stateonly_find);
1118
1119struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
1120                                              unsigned short family)
1121{
1122        struct xfrm_state *x;
1123        struct xfrm_state_walk *w;
1124
1125        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1126        list_for_each_entry(w, &net->xfrm.state_all, all) {
1127                x = container_of(w, struct xfrm_state, km);
1128                if (x->props.family != family ||
1129                        x->id.spi != spi)
1130                        continue;
1131
1132                xfrm_state_hold(x);
1133                spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1134                return x;
1135        }
1136        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1137        return NULL;
1138}
1139EXPORT_SYMBOL(xfrm_state_lookup_byspi);
1140
1141static void __xfrm_state_insert(struct xfrm_state *x)
1142{
1143        struct net *net = xs_net(x);
1144        unsigned int h;
1145
1146        list_add(&x->km.all, &net->xfrm.state_all);
1147
1148        h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
1149                          x->props.reqid, x->props.family);
1150        hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
1151
1152        h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
1153        hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
1154
1155        if (x->id.spi) {
1156                h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
1157                                  x->props.family);
1158
1159                hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
1160        }
1161
1162        tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
1163        if (x->replay_maxage)
1164                mod_timer(&x->rtimer, jiffies + x->replay_maxage);
1165
1166        net->xfrm.state_num++;
1167
1168        xfrm_hash_grow_check(net, x->bydst.next != NULL);
1169}
1170
1171/* net->xfrm.xfrm_state_lock is held */
1172static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
1173{
1174        struct net *net = xs_net(xnew);
1175        unsigned short family = xnew->props.family;
1176        u32 reqid = xnew->props.reqid;
1177        struct xfrm_state *x;
1178        unsigned int h;
1179        u32 mark = xnew->mark.v & xnew->mark.m;
1180        u32 if_id = xnew->if_id;
1181
1182        h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
1183        hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1184                if (x->props.family     == family &&
1185                    x->props.reqid      == reqid &&
1186                    x->if_id            == if_id &&
1187                    (mark & x->mark.m) == x->mark.v &&
1188                    xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
1189                    xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
1190                        x->genid++;
1191        }
1192}
1193
1194void xfrm_state_insert(struct xfrm_state *x)
1195{
1196        struct net *net = xs_net(x);
1197
1198        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1199        __xfrm_state_bump_genids(x);
1200        __xfrm_state_insert(x);
1201        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1202}
1203EXPORT_SYMBOL(xfrm_state_insert);
1204
1205/* net->xfrm.xfrm_state_lock is held */
1206static struct xfrm_state *__find_acq_core(struct net *net,
1207                                          const struct xfrm_mark *m,
1208                                          unsigned short family, u8 mode,
1209                                          u32 reqid, u32 if_id, u8 proto,
1210                                          const xfrm_address_t *daddr,
1211                                          const xfrm_address_t *saddr,
1212                                          int create)
1213{
1214        unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
1215        struct xfrm_state *x;
1216        u32 mark = m->v & m->m;
1217
1218        hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1219                if (x->props.reqid  != reqid ||
1220                    x->props.mode   != mode ||
1221                    x->props.family != family ||
1222                    x->km.state     != XFRM_STATE_ACQ ||
1223                    x->id.spi       != 0 ||
1224                    x->id.proto     != proto ||
1225                    (mark & x->mark.m) != x->mark.v ||
1226                    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
1227                    !xfrm_addr_equal(&x->props.saddr, saddr, family))
1228                        continue;
1229
1230                xfrm_state_hold(x);
1231                return x;
1232        }
1233
1234        if (!create)
1235                return NULL;
1236
1237        x = xfrm_state_alloc(net);
1238        if (likely(x)) {
1239                switch (family) {
1240                case AF_INET:
1241                        x->sel.daddr.a4 = daddr->a4;
1242                        x->sel.saddr.a4 = saddr->a4;
1243                        x->sel.prefixlen_d = 32;
1244                        x->sel.prefixlen_s = 32;
1245                        x->props.saddr.a4 = saddr->a4;
1246                        x->id.daddr.a4 = daddr->a4;
1247                        break;
1248
1249                case AF_INET6:
1250                        x->sel.daddr.in6 = daddr->in6;
1251                        x->sel.saddr.in6 = saddr->in6;
1252                        x->sel.prefixlen_d = 128;
1253                        x->sel.prefixlen_s = 128;
1254                        x->props.saddr.in6 = saddr->in6;
1255                        x->id.daddr.in6 = daddr->in6;
1256                        break;
1257                }
1258
1259                x->km.state = XFRM_STATE_ACQ;
1260                x->id.proto = proto;
1261                x->props.family = family;
1262                x->props.mode = mode;
1263                x->props.reqid = reqid;
1264                x->if_id = if_id;
1265                x->mark.v = m->v;
1266                x->mark.m = m->m;
1267                x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
1268                xfrm_state_hold(x);
1269                tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
1270                list_add(&x->km.all, &net->xfrm.state_all);
1271                hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
1272                h = xfrm_src_hash(net, daddr, saddr, family);
1273                hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
1274
1275                net->xfrm.state_num++;
1276
1277                xfrm_hash_grow_check(net, x->bydst.next != NULL);
1278        }
1279
1280        return x;
1281}
1282
1283static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
1284
1285int xfrm_state_add(struct xfrm_state *x)
1286{
1287        struct net *net = xs_net(x);
1288        struct xfrm_state *x1, *to_put;
1289        int family;
1290        int err;
1291        u32 mark = x->mark.v & x->mark.m;
1292        int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1293
1294        family = x->props.family;
1295
1296        to_put = NULL;
1297
1298        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1299
1300        x1 = __xfrm_state_locate(x, use_spi, family);
1301        if (x1) {
1302                to_put = x1;
1303                x1 = NULL;
1304                err = -EEXIST;
1305                goto out;
1306        }
1307
1308        if (use_spi && x->km.seq) {
1309                x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
1310                if (x1 && ((x1->id.proto != x->id.proto) ||
1311                    !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
1312                        to_put = x1;
1313                        x1 = NULL;
1314                }
1315        }
1316
1317        if (use_spi && !x1)
1318                x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
1319                                     x->props.reqid, x->if_id, x->id.proto,
1320                                     &x->id.daddr, &x->props.saddr, 0);
1321
1322        __xfrm_state_bump_genids(x);
1323        __xfrm_state_insert(x);
1324        err = 0;
1325
1326out:
1327        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1328
1329        if (x1) {
1330                xfrm_state_delete(x1);
1331                xfrm_state_put(x1);
1332        }
1333
1334        if (to_put)
1335                xfrm_state_put(to_put);
1336
1337        return err;
1338}
1339EXPORT_SYMBOL(xfrm_state_add);
1340
1341#ifdef CONFIG_XFRM_MIGRATE
1342static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
1343                                           struct xfrm_encap_tmpl *encap)
1344{
1345        struct net *net = xs_net(orig);
1346        struct xfrm_state *x = xfrm_state_alloc(net);
1347        if (!x)
1348                goto out;
1349
1350        memcpy(&x->id, &orig->id, sizeof(x->id));
1351        memcpy(&x->sel, &orig->sel, sizeof(x->sel));
1352        memcpy(&x->lft, &orig->lft, sizeof(x->lft));
1353        x->props.mode = orig->props.mode;
1354        x->props.replay_window = orig->props.replay_window;
1355        x->props.reqid = orig->props.reqid;
1356        x->props.family = orig->props.family;
1357        x->props.saddr = orig->props.saddr;
1358
1359        if (orig->aalg) {
1360                x->aalg = xfrm_algo_auth_clone(orig->aalg);
1361                if (!x->aalg)
1362                        goto error;
1363        }
1364        x->props.aalgo = orig->props.aalgo;
1365
1366        if (orig->aead) {
1367                x->aead = xfrm_algo_aead_clone(orig->aead);
1368                x->geniv = orig->geniv;
1369                if (!x->aead)
1370                        goto error;
1371        }
1372        if (orig->ealg) {
1373                x->ealg = xfrm_algo_clone(orig->ealg);
1374                if (!x->ealg)
1375                        goto error;
1376        }
1377        x->props.ealgo = orig->props.ealgo;
1378
1379        if (orig->calg) {
1380                x->calg = xfrm_algo_clone(orig->calg);
1381                if (!x->calg)
1382                        goto error;
1383        }
1384        x->props.calgo = orig->props.calgo;
1385
1386        if (encap || orig->encap) {
1387                if (encap)
1388                        x->encap = kmemdup(encap, sizeof(*x->encap),
1389                                        GFP_KERNEL);
1390                else
1391                        x->encap = kmemdup(orig->encap, sizeof(*x->encap),
1392                                        GFP_KERNEL);
1393
1394                if (!x->encap)
1395                        goto error;
1396        }
1397
1398        if (orig->coaddr) {
1399                x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
1400                                    GFP_KERNEL);
1401                if (!x->coaddr)
1402                        goto error;
1403        }
1404
1405        if (orig->replay_esn) {
1406                if (xfrm_replay_clone(x, orig))
1407                        goto error;
1408        }
1409
1410        memcpy(&x->mark, &orig->mark, sizeof(x->mark));
1411
1412        if (xfrm_init_state(x) < 0)
1413                goto error;
1414
1415        x->props.flags = orig->props.flags;
1416        x->props.extra_flags = orig->props.extra_flags;
1417
1418        x->if_id = orig->if_id;
1419        x->tfcpad = orig->tfcpad;
1420        x->replay_maxdiff = orig->replay_maxdiff;
1421        x->replay_maxage = orig->replay_maxage;
1422        x->curlft.add_time = orig->curlft.add_time;
1423        x->km.state = orig->km.state;
1424        x->km.seq = orig->km.seq;
1425        x->replay = orig->replay;
1426        x->preplay = orig->preplay;
1427
1428        return x;
1429
1430 error:
1431        xfrm_state_put(x);
1432out:
1433        return NULL;
1434}
1435
1436struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net)
1437{
1438        unsigned int h;
1439        struct xfrm_state *x = NULL;
1440
1441        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1442
1443        if (m->reqid) {
1444                h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr,
1445                                  m->reqid, m->old_family);
1446                hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1447                        if (x->props.mode != m->mode ||
1448                            x->id.proto != m->proto)
1449                                continue;
1450                        if (m->reqid && x->props.reqid != m->reqid)
1451                                continue;
1452                        if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
1453                                             m->old_family) ||
1454                            !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
1455                                             m->old_family))
1456                                continue;
1457                        xfrm_state_hold(x);
1458                        break;
1459                }
1460        } else {
1461                h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr,
1462                                  m->old_family);
1463                hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
1464                        if (x->props.mode != m->mode ||
1465                            x->id.proto != m->proto)
1466                                continue;
1467                        if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
1468                                             m->old_family) ||
1469                            !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
1470                                             m->old_family))
1471                                continue;
1472                        xfrm_state_hold(x);
1473                        break;
1474                }
1475        }
1476
1477        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1478
1479        return x;
1480}
1481EXPORT_SYMBOL(xfrm_migrate_state_find);
1482
1483struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
1484                                      struct xfrm_migrate *m,
1485                                      struct xfrm_encap_tmpl *encap)
1486{
1487        struct xfrm_state *xc;
1488
1489        xc = xfrm_state_clone(x, encap);
1490        if (!xc)
1491                return NULL;
1492
1493        memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
1494        memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
1495
1496        /* add state */
1497        if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) {
1498                /* a care is needed when the destination address of the
1499                   state is to be updated as it is a part of triplet */
1500                xfrm_state_insert(xc);
1501        } else {
1502                if (xfrm_state_add(xc) < 0)
1503                        goto error;
1504        }
1505
1506        return xc;
1507error:
1508        xfrm_state_put(xc);
1509        return NULL;
1510}
1511EXPORT_SYMBOL(xfrm_state_migrate);
1512#endif
1513
1514int xfrm_state_update(struct xfrm_state *x)
1515{
1516        struct xfrm_state *x1, *to_put;
1517        int err;
1518        int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1519        struct net *net = xs_net(x);
1520
1521        to_put = NULL;
1522
1523        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1524        x1 = __xfrm_state_locate(x, use_spi, x->props.family);
1525
1526        err = -ESRCH;
1527        if (!x1)
1528                goto out;
1529
1530        if (xfrm_state_kern(x1)) {
1531                to_put = x1;
1532                err = -EEXIST;
1533                goto out;
1534        }
1535
1536        if (x1->km.state == XFRM_STATE_ACQ) {
1537                __xfrm_state_insert(x);
1538                x = NULL;
1539        }
1540        err = 0;
1541
1542out:
1543        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1544
1545        if (to_put)
1546                xfrm_state_put(to_put);
1547
1548        if (err)
1549                return err;
1550
1551        if (!x) {
1552                xfrm_state_delete(x1);
1553                xfrm_state_put(x1);
1554                return 0;
1555        }
1556
1557        err = -EINVAL;
1558        spin_lock_bh(&x1->lock);
1559        if (likely(x1->km.state == XFRM_STATE_VALID)) {
1560                if (x->encap && x1->encap &&
1561                    x->encap->encap_type == x1->encap->encap_type)
1562                        memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1563                else if (x->encap || x1->encap)
1564                        goto fail;
1565
1566                if (x->coaddr && x1->coaddr) {
1567                        memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1568                }
1569                if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1570                        memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1571                memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1572                x1->km.dying = 0;
1573
1574                tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
1575                if (x1->curlft.use_time)
1576                        xfrm_state_check_expire(x1);
1577
1578                if (x->props.smark.m || x->props.smark.v || x->if_id) {
1579                        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1580
1581                        if (x->props.smark.m || x->props.smark.v)
1582                                x1->props.smark = x->props.smark;
1583
1584                        if (x->if_id)
1585                                x1->if_id = x->if_id;
1586
1587                        __xfrm_state_bump_genids(x1);
1588                        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1589                }
1590
1591                err = 0;
1592                x->km.state = XFRM_STATE_DEAD;
1593                __xfrm_state_put(x);
1594        }
1595
1596fail:
1597        spin_unlock_bh(&x1->lock);
1598
1599        xfrm_state_put(x1);
1600
1601        return err;
1602}
1603EXPORT_SYMBOL(xfrm_state_update);
1604
1605int xfrm_state_check_expire(struct xfrm_state *x)
1606{
1607        if (!x->curlft.use_time)
1608                x->curlft.use_time = ktime_get_real_seconds();
1609
1610        if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1611            x->curlft.packets >= x->lft.hard_packet_limit) {
1612                x->km.state = XFRM_STATE_EXPIRED;
1613                tasklet_hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL);
1614                return -EINVAL;
1615        }
1616
1617        if (!x->km.dying &&
1618            (x->curlft.bytes >= x->lft.soft_byte_limit ||
1619             x->curlft.packets >= x->lft.soft_packet_limit)) {
1620                x->km.dying = 1;
1621                km_state_expired(x, 0, 0);
1622        }
1623        return 0;
1624}
1625EXPORT_SYMBOL(xfrm_state_check_expire);
1626
1627struct xfrm_state *
1628xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
1629                  u8 proto, unsigned short family)
1630{
1631        struct xfrm_state *x;
1632
1633        rcu_read_lock();
1634        x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
1635        rcu_read_unlock();
1636        return x;
1637}
1638EXPORT_SYMBOL(xfrm_state_lookup);
1639
1640struct xfrm_state *
1641xfrm_state_lookup_byaddr(struct net *net, u32 mark,
1642                         const xfrm_address_t *daddr, const xfrm_address_t *saddr,
1643                         u8 proto, unsigned short family)
1644{
1645        struct xfrm_state *x;
1646
1647        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1648        x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family);
1649        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1650        return x;
1651}
1652EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1653
1654struct xfrm_state *
1655xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
1656              u32 if_id, u8 proto, const xfrm_address_t *daddr,
1657              const xfrm_address_t *saddr, int create, unsigned short family)
1658{
1659        struct xfrm_state *x;
1660
1661        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1662        x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create);
1663        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1664
1665        return x;
1666}
1667EXPORT_SYMBOL(xfrm_find_acq);
1668
1669#ifdef CONFIG_XFRM_SUB_POLICY
1670int
1671xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1672               unsigned short family, struct net *net)
1673{
1674        int i;
1675        int err = 0;
1676        struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1677        if (!afinfo)
1678                return -EAFNOSUPPORT;
1679
1680        spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/
1681        if (afinfo->tmpl_sort)
1682                err = afinfo->tmpl_sort(dst, src, n);
1683        else
1684                for (i = 0; i < n; i++)
1685                        dst[i] = src[i];
1686        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1687        rcu_read_unlock();
1688        return err;
1689}
1690EXPORT_SYMBOL(xfrm_tmpl_sort);
1691
1692int
1693xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1694                unsigned short family)
1695{
1696        int i;
1697        int err = 0;
1698        struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1699        struct net *net = xs_net(*src);
1700
1701        if (!afinfo)
1702                return -EAFNOSUPPORT;
1703
1704        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1705        if (afinfo->state_sort)
1706                err = afinfo->state_sort(dst, src, n);
1707        else
1708                for (i = 0; i < n; i++)
1709                        dst[i] = src[i];
1710        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1711        rcu_read_unlock();
1712        return err;
1713}
1714EXPORT_SYMBOL(xfrm_state_sort);
1715#endif
1716
1717/* Silly enough, but I'm lazy to build resolution list */
1718
1719static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
1720{
1721        int i;
1722
1723        for (i = 0; i <= net->xfrm.state_hmask; i++) {
1724                struct xfrm_state *x;
1725
1726                hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
1727                        if (x->km.seq == seq &&
1728                            (mark & x->mark.m) == x->mark.v &&
1729                            x->km.state == XFRM_STATE_ACQ) {
1730                                xfrm_state_hold(x);
1731                                return x;
1732                        }
1733                }
1734        }
1735        return NULL;
1736}
1737
1738struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
1739{
1740        struct xfrm_state *x;
1741
1742        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1743        x = __xfrm_find_acq_byseq(net, mark, seq);
1744        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1745        return x;
1746}
1747EXPORT_SYMBOL(xfrm_find_acq_byseq);
1748
1749u32 xfrm_get_acqseq(void)
1750{
1751        u32 res;
1752        static atomic_t acqseq;
1753
1754        do {
1755                res = atomic_inc_return(&acqseq);
1756        } while (!res);
1757
1758        return res;
1759}
1760EXPORT_SYMBOL(xfrm_get_acqseq);
1761
1762int verify_spi_info(u8 proto, u32 min, u32 max)
1763{
1764        switch (proto) {
1765        case IPPROTO_AH:
1766        case IPPROTO_ESP:
1767                break;
1768
1769        case IPPROTO_COMP:
1770                /* IPCOMP spi is 16-bits. */
1771                if (max >= 0x10000)
1772                        return -EINVAL;
1773                break;
1774
1775        default:
1776                return -EINVAL;
1777        }
1778
1779        if (min > max)
1780                return -EINVAL;
1781
1782        return 0;
1783}
1784EXPORT_SYMBOL(verify_spi_info);
1785
1786int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
1787{
1788        struct net *net = xs_net(x);
1789        unsigned int h;
1790        struct xfrm_state *x0;
1791        int err = -ENOENT;
1792        __be32 minspi = htonl(low);
1793        __be32 maxspi = htonl(high);
1794        u32 mark = x->mark.v & x->mark.m;
1795
1796        spin_lock_bh(&x->lock);
1797        if (x->km.state == XFRM_STATE_DEAD)
1798                goto unlock;
1799
1800        err = 0;
1801        if (x->id.spi)
1802                goto unlock;
1803
1804        err = -ENOENT;
1805
1806        if (minspi == maxspi) {
1807                x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family);
1808                if (x0) {
1809                        xfrm_state_put(x0);
1810                        goto unlock;
1811                }
1812                x->id.spi = minspi;
1813        } else {
1814                u32 spi = 0;
1815                for (h = 0; h < high-low+1; h++) {
1816                        spi = low + prandom_u32()%(high-low+1);
1817                        x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1818                        if (x0 == NULL) {
1819                                x->id.spi = htonl(spi);
1820                                break;
1821                        }
1822                        xfrm_state_put(x0);
1823                }
1824        }
1825        if (x->id.spi) {
1826                spin_lock_bh(&net->xfrm.xfrm_state_lock);
1827                h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1828                hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
1829                spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1830
1831                err = 0;
1832        }
1833
1834unlock:
1835        spin_unlock_bh(&x->lock);
1836
1837        return err;
1838}
1839EXPORT_SYMBOL(xfrm_alloc_spi);
1840
1841static bool __xfrm_state_filter_match(struct xfrm_state *x,
1842                                      struct xfrm_address_filter *filter)
1843{
1844        if (filter) {
1845                if ((filter->family == AF_INET ||
1846                     filter->family == AF_INET6) &&
1847                    x->props.family != filter->family)
1848                        return false;
1849
1850                return addr_match(&x->props.saddr, &filter->saddr,
1851                                  filter->splen) &&
1852                       addr_match(&x->id.daddr, &filter->daddr,
1853                                  filter->dplen);
1854        }
1855        return true;
1856}
1857
1858int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
1859                    int (*func)(struct xfrm_state *, int, void*),
1860                    void *data)
1861{
1862        struct xfrm_state *state;
1863        struct xfrm_state_walk *x;
1864        int err = 0;
1865
1866        if (walk->seq != 0 && list_empty(&walk->all))
1867                return 0;
1868
1869        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1870        if (list_empty(&walk->all))
1871                x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all);
1872        else
1873                x = list_first_entry(&walk->all, struct xfrm_state_walk, all);
1874        list_for_each_entry_from(x, &net->xfrm.state_all, all) {
1875                if (x->state == XFRM_STATE_DEAD)
1876                        continue;
1877                state = container_of(x, struct xfrm_state, km);
1878                if (!xfrm_id_proto_match(state->id.proto, walk->proto))
1879                        continue;
1880                if (!__xfrm_state_filter_match(state, walk->filter))
1881                        continue;
1882                err = func(state, walk->seq, data);
1883                if (err) {
1884                        list_move_tail(&walk->all, &x->all);
1885                        goto out;
1886                }
1887                walk->seq++;
1888        }
1889        if (walk->seq == 0) {
1890                err = -ENOENT;
1891                goto out;
1892        }
1893        list_del_init(&walk->all);
1894out:
1895        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1896        return err;
1897}
1898EXPORT_SYMBOL(xfrm_state_walk);
1899
1900void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
1901                          struct xfrm_address_filter *filter)
1902{
1903        INIT_LIST_HEAD(&walk->all);
1904        walk->proto = proto;
1905        walk->state = XFRM_STATE_DEAD;
1906        walk->seq = 0;
1907        walk->filter = filter;
1908}
1909EXPORT_SYMBOL(xfrm_state_walk_init);
1910
1911void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)
1912{
1913        kfree(walk->filter);
1914
1915        if (list_empty(&walk->all))
1916                return;
1917
1918        spin_lock_bh(&net->xfrm.xfrm_state_lock);
1919        list_del(&walk->all);
1920        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1921}
1922EXPORT_SYMBOL(xfrm_state_walk_done);
1923
1924static void xfrm_replay_timer_handler(struct timer_list *t)
1925{
1926        struct xfrm_state *x = from_timer(x, t, rtimer);
1927
1928        spin_lock(&x->lock);
1929
1930        if (x->km.state == XFRM_STATE_VALID) {
1931                if (xfrm_aevent_is_on(xs_net(x)))
1932                        x->repl->notify(x, XFRM_REPLAY_TIMEOUT);
1933                else
1934                        x->xflags |= XFRM_TIME_DEFER;
1935        }
1936
1937        spin_unlock(&x->lock);
1938}
1939
1940static LIST_HEAD(xfrm_km_list);
1941
1942void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
1943{
1944        struct xfrm_mgr *km;
1945
1946        rcu_read_lock();
1947        list_for_each_entry_rcu(km, &xfrm_km_list, list)
1948                if (km->notify_policy)
1949                        km->notify_policy(xp, dir, c);
1950        rcu_read_unlock();
1951}
1952
1953void km_state_notify(struct xfrm_state *x, const struct km_event *c)
1954{
1955        struct xfrm_mgr *km;
1956        rcu_read_lock();
1957        list_for_each_entry_rcu(km, &xfrm_km_list, list)
1958                if (km->notify)
1959                        km->notify(x, c);
1960        rcu_read_unlock();
1961}
1962
1963EXPORT_SYMBOL(km_policy_notify);
1964EXPORT_SYMBOL(km_state_notify);
1965
1966void km_state_expired(struct xfrm_state *x, int hard, u32 portid)
1967{
1968        struct km_event c;
1969
1970        c.data.hard = hard;
1971        c.portid = portid;
1972        c.event = XFRM_MSG_EXPIRE;
1973        km_state_notify(x, &c);
1974}
1975
1976EXPORT_SYMBOL(km_state_expired);
1977/*
1978 * We send to all registered managers regardless of failure
1979 * We are happy with one success
1980*/
1981int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1982{
1983        int err = -EINVAL, acqret;
1984        struct xfrm_mgr *km;
1985
1986        rcu_read_lock();
1987        list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1988                acqret = km->acquire(x, t, pol);
1989                if (!acqret)
1990                        err = acqret;
1991        }
1992        rcu_read_unlock();
1993        return err;
1994}
1995EXPORT_SYMBOL(km_query);
1996
1997int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1998{
1999        int err = -EINVAL;
2000        struct xfrm_mgr *km;
2001
2002        rcu_read_lock();
2003        list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2004                if (km->new_mapping)
2005                        err = km->new_mapping(x, ipaddr, sport);
2006                if (!err)
2007                        break;
2008        }
2009        rcu_read_unlock();
2010        return err;
2011}
2012EXPORT_SYMBOL(km_new_mapping);
2013
2014void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid)
2015{
2016        struct km_event c;
2017
2018        c.data.hard = hard;
2019        c.portid = portid;
2020        c.event = XFRM_MSG_POLEXPIRE;
2021        km_policy_notify(pol, dir, &c);
2022}
2023EXPORT_SYMBOL(km_policy_expired);
2024
2025#ifdef CONFIG_XFRM_MIGRATE
2026int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2027               const struct xfrm_migrate *m, int num_migrate,
2028               const struct xfrm_kmaddress *k,
2029               const struct xfrm_encap_tmpl *encap)
2030{
2031        int err = -EINVAL;
2032        int ret;
2033        struct xfrm_mgr *km;
2034
2035        rcu_read_lock();
2036        list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2037                if (km->migrate) {
2038                        ret = km->migrate(sel, dir, type, m, num_migrate, k,
2039                                          encap);
2040                        if (!ret)
2041                                err = ret;
2042                }
2043        }
2044        rcu_read_unlock();
2045        return err;
2046}
2047EXPORT_SYMBOL(km_migrate);
2048#endif
2049
2050int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
2051{
2052        int err = -EINVAL;
2053        int ret;
2054        struct xfrm_mgr *km;
2055
2056        rcu_read_lock();
2057        list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2058                if (km->report) {
2059                        ret = km->report(net, proto, sel, addr);
2060                        if (!ret)
2061                                err = ret;
2062                }
2063        }
2064        rcu_read_unlock();
2065        return err;
2066}
2067EXPORT_SYMBOL(km_report);
2068
2069bool km_is_alive(const struct km_event *c)
2070{
2071        struct xfrm_mgr *km;
2072        bool is_alive = false;
2073
2074        rcu_read_lock();
2075        list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2076                if (km->is_alive && km->is_alive(c)) {
2077                        is_alive = true;
2078                        break;
2079                }
2080        }
2081        rcu_read_unlock();
2082
2083        return is_alive;
2084}
2085EXPORT_SYMBOL(km_is_alive);
2086
2087int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
2088{
2089        int err;
2090        u8 *data;
2091        struct xfrm_mgr *km;
2092        struct xfrm_policy *pol = NULL;
2093
2094        if (in_compat_syscall())
2095                return -EOPNOTSUPP;
2096
2097        if (!optval && !optlen) {
2098                xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
2099                xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
2100                __sk_dst_reset(sk);
2101                return 0;
2102        }
2103
2104        if (optlen <= 0 || optlen > PAGE_SIZE)
2105                return -EMSGSIZE;
2106
2107        data = memdup_user(optval, optlen);
2108        if (IS_ERR(data))
2109                return PTR_ERR(data);
2110
2111        err = -EINVAL;
2112        rcu_read_lock();
2113        list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2114                pol = km->compile_policy(sk, optname, data,
2115                                         optlen, &err);
2116                if (err >= 0)
2117                        break;
2118        }
2119        rcu_read_unlock();
2120
2121        if (err >= 0) {
2122                xfrm_sk_policy_insert(sk, err, pol);
2123                xfrm_pol_put(pol);
2124                __sk_dst_reset(sk);
2125                err = 0;
2126        }
2127
2128        kfree(data);
2129        return err;
2130}
2131EXPORT_SYMBOL(xfrm_user_policy);
2132
2133static DEFINE_SPINLOCK(xfrm_km_lock);
2134
2135int xfrm_register_km(struct xfrm_mgr *km)
2136{
2137        spin_lock_bh(&xfrm_km_lock);
2138        list_add_tail_rcu(&km->list, &xfrm_km_list);
2139        spin_unlock_bh(&xfrm_km_lock);
2140        return 0;
2141}
2142EXPORT_SYMBOL(xfrm_register_km);
2143
2144int xfrm_unregister_km(struct xfrm_mgr *km)
2145{
2146        spin_lock_bh(&xfrm_km_lock);
2147        list_del_rcu(&km->list);
2148        spin_unlock_bh(&xfrm_km_lock);
2149        synchronize_rcu();
2150        return 0;
2151}
2152EXPORT_SYMBOL(xfrm_unregister_km);
2153
2154int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
2155{
2156        int err = 0;
2157
2158        if (WARN_ON(afinfo->family >= NPROTO))
2159                return -EAFNOSUPPORT;
2160
2161        spin_lock_bh(&xfrm_state_afinfo_lock);
2162        if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
2163                err = -EEXIST;
2164        else
2165                rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo);
2166        spin_unlock_bh(&xfrm_state_afinfo_lock);
2167        return err;
2168}
2169EXPORT_SYMBOL(xfrm_state_register_afinfo);
2170
2171int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
2172{
2173        int err = 0, family = afinfo->family;
2174
2175        if (WARN_ON(family >= NPROTO))
2176                return -EAFNOSUPPORT;
2177
2178        spin_lock_bh(&xfrm_state_afinfo_lock);
2179        if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
2180                if (rcu_access_pointer(xfrm_state_afinfo[family]) != afinfo)
2181                        err = -EINVAL;
2182                else
2183                        RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL);
2184        }
2185        spin_unlock_bh(&xfrm_state_afinfo_lock);
2186        synchronize_rcu();
2187        return err;
2188}
2189EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
2190
2191struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family)
2192{
2193        if (unlikely(family >= NPROTO))
2194                return NULL;
2195
2196        return rcu_dereference(xfrm_state_afinfo[family]);
2197}
2198
2199struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
2200{
2201        struct xfrm_state_afinfo *afinfo;
2202        if (unlikely(family >= NPROTO))
2203                return NULL;
2204        rcu_read_lock();
2205        afinfo = rcu_dereference(xfrm_state_afinfo[family]);
2206        if (unlikely(!afinfo))
2207                rcu_read_unlock();
2208        return afinfo;
2209}
2210
2211void xfrm_flush_gc(void)
2212{
2213        flush_work(&xfrm_state_gc_work);
2214}
2215EXPORT_SYMBOL(xfrm_flush_gc);
2216
2217/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
2218void xfrm_state_delete_tunnel(struct xfrm_state *x)
2219{
2220        if (x->tunnel) {
2221                struct xfrm_state *t = x->tunnel;
2222
2223                if (atomic_read(&t->tunnel_users) == 2)
2224                        xfrm_state_delete(t);
2225                atomic_dec(&t->tunnel_users);
2226                xfrm_state_put_sync(t);
2227                x->tunnel = NULL;
2228        }
2229}
2230EXPORT_SYMBOL(xfrm_state_delete_tunnel);
2231
2232int xfrm_state_mtu(struct xfrm_state *x, int mtu)
2233{
2234        const struct xfrm_type *type = READ_ONCE(x->type);
2235
2236        if (x->km.state == XFRM_STATE_VALID &&
2237            type && type->get_mtu)
2238                return type->get_mtu(x, mtu);
2239
2240        return mtu - x->props.header_len;
2241}
2242
2243int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
2244{
2245        struct xfrm_state_afinfo *afinfo;
2246        struct xfrm_mode *inner_mode;
2247        int family = x->props.family;
2248        int err;
2249
2250        err = -EAFNOSUPPORT;
2251        afinfo = xfrm_state_get_afinfo(family);
2252        if (!afinfo)
2253                goto error;
2254
2255        err = 0;
2256        if (afinfo->init_flags)
2257                err = afinfo->init_flags(x);
2258
2259        rcu_read_unlock();
2260
2261        if (err)
2262                goto error;
2263
2264        err = -EPROTONOSUPPORT;
2265
2266        if (x->sel.family != AF_UNSPEC) {
2267                inner_mode = xfrm_get_mode(x->props.mode, x->sel.family);
2268                if (inner_mode == NULL)
2269                        goto error;
2270
2271                if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2272                    family != x->sel.family) {
2273                        xfrm_put_mode(inner_mode);
2274                        goto error;
2275                }
2276
2277                x->inner_mode = inner_mode;
2278        } else {
2279                struct xfrm_mode *inner_mode_iaf;
2280                int iafamily = AF_INET;
2281
2282                inner_mode = xfrm_get_mode(x->props.mode, x->props.family);
2283                if (inner_mode == NULL)
2284                        goto error;
2285
2286                if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) {
2287                        xfrm_put_mode(inner_mode);
2288                        goto error;
2289                }
2290                x->inner_mode = inner_mode;
2291
2292                if (x->props.family == AF_INET)
2293                        iafamily = AF_INET6;
2294
2295                inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily);
2296                if (inner_mode_iaf) {
2297                        if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)
2298                                x->inner_mode_iaf = inner_mode_iaf;
2299                        else
2300                                xfrm_put_mode(inner_mode_iaf);
2301                }
2302        }
2303
2304        x->type = xfrm_get_type(x->id.proto, family);
2305        if (x->type == NULL)
2306                goto error;
2307
2308        x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload);
2309
2310        err = x->type->init_state(x);
2311        if (err)
2312                goto error;
2313
2314        x->outer_mode = xfrm_get_mode(x->props.mode, family);
2315        if (x->outer_mode == NULL) {
2316                err = -EPROTONOSUPPORT;
2317                goto error;
2318        }
2319
2320        if (init_replay) {
2321                err = xfrm_init_replay(x);
2322                if (err)
2323                        goto error;
2324        }
2325
2326error:
2327        return err;
2328}
2329
2330EXPORT_SYMBOL(__xfrm_init_state);
2331
2332int xfrm_init_state(struct xfrm_state *x)
2333{
2334        int err;
2335
2336        err = __xfrm_init_state(x, true, false);
2337        if (!err)
2338                x->km.state = XFRM_STATE_VALID;
2339
2340        return err;
2341}
2342
2343EXPORT_SYMBOL(xfrm_init_state);
2344
2345int __net_init xfrm_state_init(struct net *net)
2346{
2347        unsigned int sz;
2348
2349        if (net_eq(net, &init_net))
2350                xfrm_state_cache = KMEM_CACHE(xfrm_state,
2351                                              SLAB_HWCACHE_ALIGN | SLAB_PANIC);
2352
2353        INIT_LIST_HEAD(&net->xfrm.state_all);
2354
2355        sz = sizeof(struct hlist_head) * 8;
2356
2357        net->xfrm.state_bydst = xfrm_hash_alloc(sz);
2358        if (!net->xfrm.state_bydst)
2359                goto out_bydst;
2360        net->xfrm.state_bysrc = xfrm_hash_alloc(sz);
2361        if (!net->xfrm.state_bysrc)
2362                goto out_bysrc;
2363        net->xfrm.state_byspi = xfrm_hash_alloc(sz);
2364        if (!net->xfrm.state_byspi)
2365                goto out_byspi;
2366        net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
2367
2368        net->xfrm.state_num = 0;
2369        INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
2370        spin_lock_init(&net->xfrm.xfrm_state_lock);
2371        return 0;
2372
2373out_byspi:
2374        xfrm_hash_free(net->xfrm.state_bysrc, sz);
2375out_bysrc:
2376        xfrm_hash_free(net->xfrm.state_bydst, sz);
2377out_bydst:
2378        return -ENOMEM;
2379}
2380
2381void xfrm_state_fini(struct net *net)
2382{
2383        unsigned int sz;
2384
2385        flush_work(&net->xfrm.state_hash_work);
2386        flush_work(&xfrm_state_gc_work);
2387        xfrm_state_flush(net, 0, false, true);
2388
2389        WARN_ON(!list_empty(&net->xfrm.state_all));
2390
2391        sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
2392        WARN_ON(!hlist_empty(net->xfrm.state_byspi));
2393        xfrm_hash_free(net->xfrm.state_byspi, sz);
2394        WARN_ON(!hlist_empty(net->xfrm.state_bysrc));
2395        xfrm_hash_free(net->xfrm.state_bysrc, sz);
2396        WARN_ON(!hlist_empty(net->xfrm.state_bydst));
2397        xfrm_hash_free(net->xfrm.state_bydst, sz);
2398}
2399
2400#ifdef CONFIG_AUDITSYSCALL
2401static void xfrm_audit_helper_sainfo(struct xfrm_state *x,
2402                                     struct audit_buffer *audit_buf)
2403{
2404        struct xfrm_sec_ctx *ctx = x->security;
2405        u32 spi = ntohl(x->id.spi);
2406
2407        if (ctx)
2408                audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2409                                 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2410
2411        switch (x->props.family) {
2412        case AF_INET:
2413                audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
2414                                 &x->props.saddr.a4, &x->id.daddr.a4);
2415                break;
2416        case AF_INET6:
2417                audit_log_format(audit_buf, " src=%pI6 dst=%pI6",
2418                                 x->props.saddr.a6, x->id.daddr.a6);
2419                break;
2420        }
2421
2422        audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2423}
2424
2425static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
2426                                      struct audit_buffer *audit_buf)
2427{
2428        const struct iphdr *iph4;
2429        const struct ipv6hdr *iph6;
2430
2431        switch (family) {
2432        case AF_INET:
2433                iph4 = ip_hdr(skb);
2434                audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
2435                                 &iph4->saddr, &iph4->daddr);
2436                break;
2437        case AF_INET6:
2438                iph6 = ipv6_hdr(skb);
2439                audit_log_format(audit_buf,
2440                                 " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x",
2441                                 &iph6->saddr, &iph6->daddr,
2442                                 iph6->flow_lbl[0] & 0x0f,
2443                                 iph6->flow_lbl[1],
2444                                 iph6->flow_lbl[2]);
2445                break;
2446        }
2447}
2448
2449void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid)
2450{
2451        struct audit_buffer *audit_buf;
2452
2453        audit_buf = xfrm_audit_start("SAD-add");
2454        if (audit_buf == NULL)
2455                return;
2456        xfrm_audit_helper_usrinfo(task_valid, audit_buf);
2457        xfrm_audit_helper_sainfo(x, audit_buf);
2458        audit_log_format(audit_buf, " res=%u", result);
2459        audit_log_end(audit_buf);
2460}
2461EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
2462
2463void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid)
2464{
2465        struct audit_buffer *audit_buf;
2466
2467        audit_buf = xfrm_audit_start("SAD-delete");
2468        if (audit_buf == NULL)
2469                return;
2470        xfrm_audit_helper_usrinfo(task_valid, audit_buf);
2471        xfrm_audit_helper_sainfo(x, audit_buf);
2472        audit_log_format(audit_buf, " res=%u", result);
2473        audit_log_end(audit_buf);
2474}
2475EXPORT_SYMBOL_GPL(xfrm_audit_state_delete);
2476
2477void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
2478                                      struct sk_buff *skb)
2479{
2480        struct audit_buffer *audit_buf;
2481        u32 spi;
2482
2483        audit_buf = xfrm_audit_start("SA-replay-overflow");
2484        if (audit_buf == NULL)
2485                return;
2486        xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2487        /* don't record the sequence number because it's inherent in this kind
2488         * of audit message */
2489        spi = ntohl(x->id.spi);
2490        audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2491        audit_log_end(audit_buf);
2492}
2493EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow);
2494
2495void xfrm_audit_state_replay(struct xfrm_state *x,
2496                             struct sk_buff *skb, __be32 net_seq)
2497{
2498        struct audit_buffer *audit_buf;
2499        u32 spi;
2500
2501        audit_buf = xfrm_audit_start("SA-replayed-pkt");
2502        if (audit_buf == NULL)
2503                return;
2504        xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2505        spi = ntohl(x->id.spi);
2506        audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2507                         spi, spi, ntohl(net_seq));
2508        audit_log_end(audit_buf);
2509}
2510EXPORT_SYMBOL_GPL(xfrm_audit_state_replay);
2511
2512void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family)
2513{
2514        struct audit_buffer *audit_buf;
2515
2516        audit_buf = xfrm_audit_start("SA-notfound");
2517        if (audit_buf == NULL)
2518                return;
2519        xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2520        audit_log_end(audit_buf);
2521}
2522EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound_simple);
2523
2524void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
2525                               __be32 net_spi, __be32 net_seq)
2526{
2527        struct audit_buffer *audit_buf;
2528        u32 spi;
2529
2530        audit_buf = xfrm_audit_start("SA-notfound");
2531        if (audit_buf == NULL)
2532                return;
2533        xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2534        spi = ntohl(net_spi);
2535        audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2536                         spi, spi, ntohl(net_seq));
2537        audit_log_end(audit_buf);
2538}
2539EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound);
2540
2541void xfrm_audit_state_icvfail(struct xfrm_state *x,
2542                              struct sk_buff *skb, u8 proto)
2543{
2544        struct audit_buffer *audit_buf;
2545        __be32 net_spi;
2546        __be32 net_seq;
2547
2548        audit_buf = xfrm_audit_start("SA-icv-failure");
2549        if (audit_buf == NULL)
2550                return;
2551        xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2552        if (xfrm_parse_spi(skb, proto, &net_spi, &net_seq) == 0) {
2553                u32 spi = ntohl(net_spi);
2554                audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2555                                 spi, spi, ntohl(net_seq));
2556        }
2557        audit_log_end(audit_buf);
2558}
2559EXPORT_SYMBOL_GPL(xfrm_audit_state_icvfail);
2560#endif /* CONFIG_AUDITSYSCALL */
2561