linux/net/xfrm/xfrm_policy.c
<<
>>
Prefs
   1/*
   2 * xfrm_policy.c
   3 *
   4 * Changes:
   5 *      Mitsuru KANDA @USAGI
   6 *      Kazunori MIYAZAWA @USAGI
   7 *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
   8 *              IPv6 support
   9 *      Kazunori MIYAZAWA @USAGI
  10 *      YOSHIFUJI Hideaki
  11 *              Split up af-specific portion
  12 *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
  13 *
  14 */
  15
  16#include <linux/err.h>
  17#include <linux/slab.h>
  18#include <linux/kmod.h>
  19#include <linux/list.h>
  20#include <linux/spinlock.h>
  21#include <linux/workqueue.h>
  22#include <linux/notifier.h>
  23#include <linux/netdevice.h>
  24#include <linux/netfilter.h>
  25#include <linux/module.h>
  26#include <linux/cache.h>
  27#include <linux/audit.h>
  28#include <net/dst.h>
  29#include <net/flow.h>
  30#include <net/xfrm.h>
  31#include <net/ip.h>
  32#ifdef CONFIG_XFRM_STATISTICS
  33#include <net/snmp.h>
  34#endif
  35
  36#include "xfrm_hash.h"
  37
  38#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
  39#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
  40#define XFRM_MAX_QUEUE_LEN      100
  41
  42struct xfrm_flo {
  43        struct dst_entry *dst_orig;
  44        u8 flags;
  45};
  46
  47static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
  48static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
  49                                                __read_mostly;
  50
  51static struct kmem_cache *xfrm_dst_cache __read_mostly;
  52
  53static void xfrm_init_pmtu(struct dst_entry *dst);
  54static int stale_bundle(struct dst_entry *dst);
  55static int xfrm_bundle_ok(struct xfrm_dst *xdst);
  56static void xfrm_policy_queue_process(unsigned long arg);
  57
  58static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
  59static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
  60                                                int dir);
  61
  62static inline bool
  63__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
  64{
  65        const struct flowi4 *fl4 = &fl->u.ip4;
  66
  67        return  addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) &&
  68                addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) &&
  69                !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
  70                !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
  71                (fl4->flowi4_proto == sel->proto || !sel->proto) &&
  72                (fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
  73}
  74
  75static inline bool
  76__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
  77{
  78        const struct flowi6 *fl6 = &fl->u.ip6;
  79
  80        return  addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
  81                addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
  82                !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
  83                !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
  84                (fl6->flowi6_proto == sel->proto || !sel->proto) &&
  85                (fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
  86}
  87
  88bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
  89                         unsigned short family)
  90{
  91        switch (family) {
  92        case AF_INET:
  93                return __xfrm4_selector_match(sel, fl);
  94        case AF_INET6:
  95                return __xfrm6_selector_match(sel, fl);
  96        }
  97        return false;
  98}
  99
 100static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
 101{
 102        struct xfrm_policy_afinfo *afinfo;
 103
 104        if (unlikely(family >= NPROTO))
 105                return NULL;
 106        rcu_read_lock();
 107        afinfo = rcu_dereference(xfrm_policy_afinfo[family]);
 108        if (unlikely(!afinfo))
 109                rcu_read_unlock();
 110        return afinfo;
 111}
 112
 113static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
 114{
 115        rcu_read_unlock();
 116}
 117
 118static inline struct dst_entry *__xfrm_dst_lookup(struct net *net,
 119                                                  int tos, int oif,
 120                                                  const xfrm_address_t *saddr,
 121                                                  const xfrm_address_t *daddr,
 122                                                  int family)
 123{
 124        struct xfrm_policy_afinfo *afinfo;
 125        struct dst_entry *dst;
 126
 127        afinfo = xfrm_policy_get_afinfo(family);
 128        if (unlikely(afinfo == NULL))
 129                return ERR_PTR(-EAFNOSUPPORT);
 130
 131        dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr);
 132
 133        xfrm_policy_put_afinfo(afinfo);
 134
 135        return dst;
 136}
 137
 138static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
 139                                                int tos, int oif,
 140                                                xfrm_address_t *prev_saddr,
 141                                                xfrm_address_t *prev_daddr,
 142                                                int family)
 143{
 144        struct net *net = xs_net(x);
 145        xfrm_address_t *saddr = &x->props.saddr;
 146        xfrm_address_t *daddr = &x->id.daddr;
 147        struct dst_entry *dst;
 148
 149        if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
 150                saddr = x->coaddr;
 151                daddr = prev_daddr;
 152        }
 153        if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
 154                saddr = prev_saddr;
 155                daddr = x->coaddr;
 156        }
 157
 158        dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family);
 159
 160        if (!IS_ERR(dst)) {
 161                if (prev_saddr != saddr)
 162                        memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
 163                if (prev_daddr != daddr)
 164                        memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
 165        }
 166
 167        return dst;
 168}
 169
 170static inline unsigned long make_jiffies(long secs)
 171{
 172        if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
 173                return MAX_SCHEDULE_TIMEOUT-1;
 174        else
 175                return secs*HZ;
 176}
 177
 178static void xfrm_policy_timer(unsigned long data)
 179{
 180        struct xfrm_policy *xp = (struct xfrm_policy *)data;
 181        unsigned long now = get_seconds();
 182        long next = LONG_MAX;
 183        int warn = 0;
 184        int dir;
 185
 186        read_lock(&xp->lock);
 187
 188        if (unlikely(xp->walk.dead))
 189                goto out;
 190
 191        dir = xfrm_policy_id2dir(xp->index);
 192
 193        if (xp->lft.hard_add_expires_seconds) {
 194                long tmo = xp->lft.hard_add_expires_seconds +
 195                        xp->curlft.add_time - now;
 196                if (tmo <= 0)
 197                        goto expired;
 198                if (tmo < next)
 199                        next = tmo;
 200        }
 201        if (xp->lft.hard_use_expires_seconds) {
 202                long tmo = xp->lft.hard_use_expires_seconds +
 203                        (xp->curlft.use_time ? : xp->curlft.add_time) - now;
 204                if (tmo <= 0)
 205                        goto expired;
 206                if (tmo < next)
 207                        next = tmo;
 208        }
 209        if (xp->lft.soft_add_expires_seconds) {
 210                long tmo = xp->lft.soft_add_expires_seconds +
 211                        xp->curlft.add_time - now;
 212                if (tmo <= 0) {
 213                        warn = 1;
 214                        tmo = XFRM_KM_TIMEOUT;
 215                }
 216                if (tmo < next)
 217                        next = tmo;
 218        }
 219        if (xp->lft.soft_use_expires_seconds) {
 220                long tmo = xp->lft.soft_use_expires_seconds +
 221                        (xp->curlft.use_time ? : xp->curlft.add_time) - now;
 222                if (tmo <= 0) {
 223                        warn = 1;
 224                        tmo = XFRM_KM_TIMEOUT;
 225                }
 226                if (tmo < next)
 227                        next = tmo;
 228        }
 229
 230        if (warn)
 231                km_policy_expired(xp, dir, 0, 0);
 232        if (next != LONG_MAX &&
 233            !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
 234                xfrm_pol_hold(xp);
 235
 236out:
 237        read_unlock(&xp->lock);
 238        xfrm_pol_put(xp);
 239        return;
 240
 241expired:
 242        read_unlock(&xp->lock);
 243        if (!xfrm_policy_delete(xp, dir))
 244                km_policy_expired(xp, dir, 1, 0);
 245        xfrm_pol_put(xp);
 246}
 247
 248static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
 249{
 250        struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
 251
 252        if (unlikely(pol->walk.dead))
 253                flo = NULL;
 254        else
 255                xfrm_pol_hold(pol);
 256
 257        return flo;
 258}
 259
 260static int xfrm_policy_flo_check(struct flow_cache_object *flo)
 261{
 262        struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
 263
 264        return !pol->walk.dead;
 265}
 266
 267static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
 268{
 269        xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
 270}
 271
 272static const struct flow_cache_ops xfrm_policy_fc_ops = {
 273        .get = xfrm_policy_flo_get,
 274        .check = xfrm_policy_flo_check,
 275        .delete = xfrm_policy_flo_delete,
 276};
 277
 278/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
 279 * SPD calls.
 280 */
 281
 282struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
 283{
 284        struct xfrm_policy *policy;
 285
 286        policy = kzalloc(sizeof(struct xfrm_policy), gfp);
 287
 288        if (policy) {
 289                write_pnet(&policy->xp_net, net);
 290                INIT_LIST_HEAD(&policy->walk.all);
 291                INIT_HLIST_NODE(&policy->bydst);
 292                INIT_HLIST_NODE(&policy->byidx);
 293                rwlock_init(&policy->lock);
 294                atomic_set(&policy->refcnt, 1);
 295                skb_queue_head_init(&policy->polq.hold_queue);
 296                setup_timer(&policy->timer, xfrm_policy_timer,
 297                                (unsigned long)policy);
 298                setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
 299                            (unsigned long)policy);
 300                policy->flo.ops = &xfrm_policy_fc_ops;
 301        }
 302        return policy;
 303}
 304EXPORT_SYMBOL(xfrm_policy_alloc);
 305
 306static void xfrm_policy_destroy_rcu(struct rcu_head *head)
 307{
 308        struct xfrm_policy *policy = container_of(head, struct xfrm_policy, rcu);
 309
 310        security_xfrm_policy_free(policy->security);
 311        kfree(policy);
 312}
 313
 314/* Destroy xfrm_policy: descendant resources must be released to this moment. */
 315
 316void xfrm_policy_destroy(struct xfrm_policy *policy)
 317{
 318        BUG_ON(!policy->walk.dead);
 319
 320        if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
 321                BUG();
 322
 323        call_rcu(&policy->rcu, xfrm_policy_destroy_rcu);
 324}
 325EXPORT_SYMBOL(xfrm_policy_destroy);
 326
 327/* Rule must be locked. Release descentant resources, announce
 328 * entry dead. The rule must be unlinked from lists to the moment.
 329 */
 330
 331static void xfrm_policy_kill(struct xfrm_policy *policy)
 332{
 333        policy->walk.dead = 1;
 334
 335        atomic_inc(&policy->genid);
 336
 337        if (del_timer(&policy->polq.hold_timer))
 338                xfrm_pol_put(policy);
 339        skb_queue_purge(&policy->polq.hold_queue);
 340
 341        if (del_timer(&policy->timer))
 342                xfrm_pol_put(policy);
 343
 344        xfrm_pol_put(policy);
 345}
 346
 347static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
 348
 349static inline unsigned int idx_hash(struct net *net, u32 index)
 350{
 351        return __idx_hash(index, net->xfrm.policy_idx_hmask);
 352}
 353
 354/* calculate policy hash thresholds */
 355static void __get_hash_thresh(struct net *net,
 356                              unsigned short family, int dir,
 357                              u8 *dbits, u8 *sbits)
 358{
 359        switch (family) {
 360        case AF_INET:
 361                *dbits = net->xfrm.policy_bydst[dir].dbits4;
 362                *sbits = net->xfrm.policy_bydst[dir].sbits4;
 363                break;
 364
 365        case AF_INET6:
 366                *dbits = net->xfrm.policy_bydst[dir].dbits6;
 367                *sbits = net->xfrm.policy_bydst[dir].sbits6;
 368                break;
 369
 370        default:
 371                *dbits = 0;
 372                *sbits = 0;
 373        }
 374}
 375
 376static struct hlist_head *policy_hash_bysel(struct net *net,
 377                                            const struct xfrm_selector *sel,
 378                                            unsigned short family, int dir)
 379{
 380        unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
 381        unsigned int hash;
 382        u8 dbits;
 383        u8 sbits;
 384
 385        __get_hash_thresh(net, family, dir, &dbits, &sbits);
 386        hash = __sel_hash(sel, family, hmask, dbits, sbits);
 387
 388        return (hash == hmask + 1 ?
 389                &net->xfrm.policy_inexact[dir] :
 390                net->xfrm.policy_bydst[dir].table + hash);
 391}
 392
 393static struct hlist_head *policy_hash_direct(struct net *net,
 394                                             const xfrm_address_t *daddr,
 395                                             const xfrm_address_t *saddr,
 396                                             unsigned short family, int dir)
 397{
 398        unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
 399        unsigned int hash;
 400        u8 dbits;
 401        u8 sbits;
 402
 403        __get_hash_thresh(net, family, dir, &dbits, &sbits);
 404        hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
 405
 406        return net->xfrm.policy_bydst[dir].table + hash;
 407}
 408
 409static void xfrm_dst_hash_transfer(struct net *net,
 410                                   struct hlist_head *list,
 411                                   struct hlist_head *ndsttable,
 412                                   unsigned int nhashmask,
 413                                   int dir)
 414{
 415        struct hlist_node *tmp, *entry0 = NULL;
 416        struct xfrm_policy *pol;
 417        unsigned int h0 = 0;
 418        u8 dbits;
 419        u8 sbits;
 420
 421redo:
 422        hlist_for_each_entry_safe(pol, tmp, list, bydst) {
 423                unsigned int h;
 424
 425                __get_hash_thresh(net, pol->family, dir, &dbits, &sbits);
 426                h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
 427                                pol->family, nhashmask, dbits, sbits);
 428                if (!entry0) {
 429                        hlist_del(&pol->bydst);
 430                        hlist_add_head(&pol->bydst, ndsttable+h);
 431                        h0 = h;
 432                } else {
 433                        if (h != h0)
 434                                continue;
 435                        hlist_del(&pol->bydst);
 436                        hlist_add_behind(&pol->bydst, entry0);
 437                }
 438                entry0 = &pol->bydst;
 439        }
 440        if (!hlist_empty(list)) {
 441                entry0 = NULL;
 442                goto redo;
 443        }
 444}
 445
 446static void xfrm_idx_hash_transfer(struct hlist_head *list,
 447                                   struct hlist_head *nidxtable,
 448                                   unsigned int nhashmask)
 449{
 450        struct hlist_node *tmp;
 451        struct xfrm_policy *pol;
 452
 453        hlist_for_each_entry_safe(pol, tmp, list, byidx) {
 454                unsigned int h;
 455
 456                h = __idx_hash(pol->index, nhashmask);
 457                hlist_add_head(&pol->byidx, nidxtable+h);
 458        }
 459}
 460
 461static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
 462{
 463        return ((old_hmask + 1) << 1) - 1;
 464}
 465
 466static void xfrm_bydst_resize(struct net *net, int dir)
 467{
 468        unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
 469        unsigned int nhashmask = xfrm_new_hash_mask(hmask);
 470        unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
 471        struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
 472        struct hlist_head *ndst = xfrm_hash_alloc(nsize);
 473        int i;
 474
 475        if (!ndst)
 476                return;
 477
 478        write_lock_bh(&net->xfrm.xfrm_policy_lock);
 479
 480        for (i = hmask; i >= 0; i--)
 481                xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
 482
 483        net->xfrm.policy_bydst[dir].table = ndst;
 484        net->xfrm.policy_bydst[dir].hmask = nhashmask;
 485
 486        write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 487
 488        xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
 489}
 490
 491static void xfrm_byidx_resize(struct net *net, int total)
 492{
 493        unsigned int hmask = net->xfrm.policy_idx_hmask;
 494        unsigned int nhashmask = xfrm_new_hash_mask(hmask);
 495        unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
 496        struct hlist_head *oidx = net->xfrm.policy_byidx;
 497        struct hlist_head *nidx = xfrm_hash_alloc(nsize);
 498        int i;
 499
 500        if (!nidx)
 501                return;
 502
 503        write_lock_bh(&net->xfrm.xfrm_policy_lock);
 504
 505        for (i = hmask; i >= 0; i--)
 506                xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
 507
 508        net->xfrm.policy_byidx = nidx;
 509        net->xfrm.policy_idx_hmask = nhashmask;
 510
 511        write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 512
 513        xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
 514}
 515
 516static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
 517{
 518        unsigned int cnt = net->xfrm.policy_count[dir];
 519        unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
 520
 521        if (total)
 522                *total += cnt;
 523
 524        if ((hmask + 1) < xfrm_policy_hashmax &&
 525            cnt > hmask)
 526                return 1;
 527
 528        return 0;
 529}
 530
 531static inline int xfrm_byidx_should_resize(struct net *net, int total)
 532{
 533        unsigned int hmask = net->xfrm.policy_idx_hmask;
 534
 535        if ((hmask + 1) < xfrm_policy_hashmax &&
 536            total > hmask)
 537                return 1;
 538
 539        return 0;
 540}
 541
 542void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
 543{
 544        read_lock_bh(&net->xfrm.xfrm_policy_lock);
 545        si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
 546        si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
 547        si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
 548        si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
 549        si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
 550        si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
 551        si->spdhcnt = net->xfrm.policy_idx_hmask;
 552        si->spdhmcnt = xfrm_policy_hashmax;
 553        read_unlock_bh(&net->xfrm.xfrm_policy_lock);
 554}
 555EXPORT_SYMBOL(xfrm_spd_getinfo);
 556
 557static DEFINE_MUTEX(hash_resize_mutex);
 558static void xfrm_hash_resize(struct work_struct *work)
 559{
 560        struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
 561        int dir, total;
 562
 563        mutex_lock(&hash_resize_mutex);
 564
 565        total = 0;
 566        for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
 567                if (xfrm_bydst_should_resize(net, dir, &total))
 568                        xfrm_bydst_resize(net, dir);
 569        }
 570        if (xfrm_byidx_should_resize(net, total))
 571                xfrm_byidx_resize(net, total);
 572
 573        mutex_unlock(&hash_resize_mutex);
 574}
 575
 576static void xfrm_hash_rebuild(struct work_struct *work)
 577{
 578        struct net *net = container_of(work, struct net,
 579                                       xfrm.policy_hthresh.work);
 580        unsigned int hmask;
 581        struct xfrm_policy *pol;
 582        struct xfrm_policy *policy;
 583        struct hlist_head *chain;
 584        struct hlist_head *odst;
 585        struct hlist_node *newpos;
 586        int i;
 587        int dir;
 588        unsigned seq;
 589        u8 lbits4, rbits4, lbits6, rbits6;
 590
 591        mutex_lock(&hash_resize_mutex);
 592
 593        /* read selector prefixlen thresholds */
 594        do {
 595                seq = read_seqbegin(&net->xfrm.policy_hthresh.lock);
 596
 597                lbits4 = net->xfrm.policy_hthresh.lbits4;
 598                rbits4 = net->xfrm.policy_hthresh.rbits4;
 599                lbits6 = net->xfrm.policy_hthresh.lbits6;
 600                rbits6 = net->xfrm.policy_hthresh.rbits6;
 601        } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
 602
 603        write_lock_bh(&net->xfrm.xfrm_policy_lock);
 604
 605        /* reset the bydst and inexact table in all directions */
 606        for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
 607                INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
 608                hmask = net->xfrm.policy_bydst[dir].hmask;
 609                odst = net->xfrm.policy_bydst[dir].table;
 610                for (i = hmask; i >= 0; i--)
 611                        INIT_HLIST_HEAD(odst + i);
 612                if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
 613                        /* dir out => dst = remote, src = local */
 614                        net->xfrm.policy_bydst[dir].dbits4 = rbits4;
 615                        net->xfrm.policy_bydst[dir].sbits4 = lbits4;
 616                        net->xfrm.policy_bydst[dir].dbits6 = rbits6;
 617                        net->xfrm.policy_bydst[dir].sbits6 = lbits6;
 618                } else {
 619                        /* dir in/fwd => dst = local, src = remote */
 620                        net->xfrm.policy_bydst[dir].dbits4 = lbits4;
 621                        net->xfrm.policy_bydst[dir].sbits4 = rbits4;
 622                        net->xfrm.policy_bydst[dir].dbits6 = lbits6;
 623                        net->xfrm.policy_bydst[dir].sbits6 = rbits6;
 624                }
 625        }
 626
 627        /* re-insert all policies by order of creation */
 628        list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
 629                newpos = NULL;
 630                chain = policy_hash_bysel(net, &policy->selector,
 631                                          policy->family,
 632                                          xfrm_policy_id2dir(policy->index));
 633                hlist_for_each_entry(pol, chain, bydst) {
 634                        if (policy->priority >= pol->priority)
 635                                newpos = &pol->bydst;
 636                        else
 637                                break;
 638                }
 639                if (newpos)
 640                        hlist_add_behind(&policy->bydst, newpos);
 641                else
 642                        hlist_add_head(&policy->bydst, chain);
 643        }
 644
 645        write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 646
 647        mutex_unlock(&hash_resize_mutex);
 648}
 649
 650void xfrm_policy_hash_rebuild(struct net *net)
 651{
 652        schedule_work(&net->xfrm.policy_hthresh.work);
 653}
 654EXPORT_SYMBOL(xfrm_policy_hash_rebuild);
 655
 656/* Generate new index... KAME seems to generate them ordered by cost
 657 * of an absolute inpredictability of ordering of rules. This will not pass. */
 658static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
 659{
 660        static u32 idx_generator;
 661
 662        for (;;) {
 663                struct hlist_head *list;
 664                struct xfrm_policy *p;
 665                u32 idx;
 666                int found;
 667
 668                if (!index) {
 669                        idx = (idx_generator | dir);
 670                        idx_generator += 8;
 671                } else {
 672                        idx = index;
 673                        index = 0;
 674                }
 675
 676                if (idx == 0)
 677                        idx = 8;
 678                list = net->xfrm.policy_byidx + idx_hash(net, idx);
 679                found = 0;
 680                hlist_for_each_entry(p, list, byidx) {
 681                        if (p->index == idx) {
 682                                found = 1;
 683                                break;
 684                        }
 685                }
 686                if (!found)
 687                        return idx;
 688        }
 689}
 690
 691static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
 692{
 693        u32 *p1 = (u32 *) s1;
 694        u32 *p2 = (u32 *) s2;
 695        int len = sizeof(struct xfrm_selector) / sizeof(u32);
 696        int i;
 697
 698        for (i = 0; i < len; i++) {
 699                if (p1[i] != p2[i])
 700                        return 1;
 701        }
 702
 703        return 0;
 704}
 705
 706static void xfrm_policy_requeue(struct xfrm_policy *old,
 707                                struct xfrm_policy *new)
 708{
 709        struct xfrm_policy_queue *pq = &old->polq;
 710        struct sk_buff_head list;
 711
 712        if (skb_queue_empty(&pq->hold_queue))
 713                return;
 714
 715        __skb_queue_head_init(&list);
 716
 717        spin_lock_bh(&pq->hold_queue.lock);
 718        skb_queue_splice_init(&pq->hold_queue, &list);
 719        if (del_timer(&pq->hold_timer))
 720                xfrm_pol_put(old);
 721        spin_unlock_bh(&pq->hold_queue.lock);
 722
 723        pq = &new->polq;
 724
 725        spin_lock_bh(&pq->hold_queue.lock);
 726        skb_queue_splice(&list, &pq->hold_queue);
 727        pq->timeout = XFRM_QUEUE_TMO_MIN;
 728        if (!mod_timer(&pq->hold_timer, jiffies))
 729                xfrm_pol_hold(new);
 730        spin_unlock_bh(&pq->hold_queue.lock);
 731}
 732
 733static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
 734                                   struct xfrm_policy *pol)
 735{
 736        u32 mark = policy->mark.v & policy->mark.m;
 737
 738        if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
 739                return true;
 740
 741        if ((mark & pol->mark.m) == pol->mark.v &&
 742            policy->priority == pol->priority)
 743                return true;
 744
 745        return false;
 746}
 747
 748int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 749{
 750        struct net *net = xp_net(policy);
 751        struct xfrm_policy *pol;
 752        struct xfrm_policy *delpol;
 753        struct hlist_head *chain;
 754        struct hlist_node *newpos;
 755
 756        write_lock_bh(&net->xfrm.xfrm_policy_lock);
 757        chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
 758        delpol = NULL;
 759        newpos = NULL;
 760        hlist_for_each_entry(pol, chain, bydst) {
 761                if (pol->type == policy->type &&
 762                    !selector_cmp(&pol->selector, &policy->selector) &&
 763                    xfrm_policy_mark_match(policy, pol) &&
 764                    xfrm_sec_ctx_match(pol->security, policy->security) &&
 765                    !WARN_ON(delpol)) {
 766                        if (excl) {
 767                                write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 768                                return -EEXIST;
 769                        }
 770                        delpol = pol;
 771                        if (policy->priority > pol->priority)
 772                                continue;
 773                } else if (policy->priority >= pol->priority) {
 774                        newpos = &pol->bydst;
 775                        continue;
 776                }
 777                if (delpol)
 778                        break;
 779        }
 780        if (newpos)
 781                hlist_add_behind(&policy->bydst, newpos);
 782        else
 783                hlist_add_head(&policy->bydst, chain);
 784        __xfrm_policy_link(policy, dir);
 785        atomic_inc(&net->xfrm.flow_cache_genid);
 786
 787        /* After previous checking, family can either be AF_INET or AF_INET6 */
 788        if (policy->family == AF_INET)
 789                rt_genid_bump_ipv4(net);
 790        else
 791                rt_genid_bump_ipv6(net);
 792
 793        if (delpol) {
 794                xfrm_policy_requeue(delpol, policy);
 795                __xfrm_policy_unlink(delpol, dir);
 796        }
 797        policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);
 798        hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
 799        policy->curlft.add_time = get_seconds();
 800        policy->curlft.use_time = 0;
 801        if (!mod_timer(&policy->timer, jiffies + HZ))
 802                xfrm_pol_hold(policy);
 803        write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 804
 805        if (delpol)
 806                xfrm_policy_kill(delpol);
 807        else if (xfrm_bydst_should_resize(net, dir, NULL))
 808                schedule_work(&net->xfrm.policy_hash_work);
 809
 810        return 0;
 811}
 812EXPORT_SYMBOL(xfrm_policy_insert);
 813
 814struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
 815                                          int dir, struct xfrm_selector *sel,
 816                                          struct xfrm_sec_ctx *ctx, int delete,
 817                                          int *err)
 818{
 819        struct xfrm_policy *pol, *ret;
 820        struct hlist_head *chain;
 821
 822        *err = 0;
 823        write_lock_bh(&net->xfrm.xfrm_policy_lock);
 824        chain = policy_hash_bysel(net, sel, sel->family, dir);
 825        ret = NULL;
 826        hlist_for_each_entry(pol, chain, bydst) {
 827                if (pol->type == type &&
 828                    (mark & pol->mark.m) == pol->mark.v &&
 829                    !selector_cmp(sel, &pol->selector) &&
 830                    xfrm_sec_ctx_match(ctx, pol->security)) {
 831                        xfrm_pol_hold(pol);
 832                        if (delete) {
 833                                *err = security_xfrm_policy_delete(
 834                                                                pol->security);
 835                                if (*err) {
 836                                        write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 837                                        return pol;
 838                                }
 839                                __xfrm_policy_unlink(pol, dir);
 840                        }
 841                        ret = pol;
 842                        break;
 843                }
 844        }
 845        write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 846
 847        if (ret && delete)
 848                xfrm_policy_kill(ret);
 849        return ret;
 850}
 851EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 852
 853struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
 854                                     int dir, u32 id, int delete, int *err)
 855{
 856        struct xfrm_policy *pol, *ret;
 857        struct hlist_head *chain;
 858
 859        *err = -ENOENT;
 860        if (xfrm_policy_id2dir(id) != dir)
 861                return NULL;
 862
 863        *err = 0;
 864        write_lock_bh(&net->xfrm.xfrm_policy_lock);
 865        chain = net->xfrm.policy_byidx + idx_hash(net, id);
 866        ret = NULL;
 867        hlist_for_each_entry(pol, chain, byidx) {
 868                if (pol->type == type && pol->index == id &&
 869                    (mark & pol->mark.m) == pol->mark.v) {
 870                        xfrm_pol_hold(pol);
 871                        if (delete) {
 872                                *err = security_xfrm_policy_delete(
 873                                                                pol->security);
 874                                if (*err) {
 875                                        write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 876                                        return pol;
 877                                }
 878                                __xfrm_policy_unlink(pol, dir);
 879                        }
 880                        ret = pol;
 881                        break;
 882                }
 883        }
 884        write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 885
 886        if (ret && delete)
 887                xfrm_policy_kill(ret);
 888        return ret;
 889}
 890EXPORT_SYMBOL(xfrm_policy_byid);
 891
 892#ifdef CONFIG_SECURITY_NETWORK_XFRM
 893static inline int
 894xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
 895{
 896        int dir, err = 0;
 897
 898        for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
 899                struct xfrm_policy *pol;
 900                int i;
 901
 902                hlist_for_each_entry(pol,
 903                                     &net->xfrm.policy_inexact[dir], bydst) {
 904                        if (pol->type != type)
 905                                continue;
 906                        err = security_xfrm_policy_delete(pol->security);
 907                        if (err) {
 908                                xfrm_audit_policy_delete(pol, 0, task_valid);
 909                                return err;
 910                        }
 911                }
 912                for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
 913                        hlist_for_each_entry(pol,
 914                                             net->xfrm.policy_bydst[dir].table + i,
 915                                             bydst) {
 916                                if (pol->type != type)
 917                                        continue;
 918                                err = security_xfrm_policy_delete(
 919                                                                pol->security);
 920                                if (err) {
 921                                        xfrm_audit_policy_delete(pol, 0,
 922                                                                 task_valid);
 923                                        return err;
 924                                }
 925                        }
 926                }
 927        }
 928        return err;
 929}
 930#else
 931static inline int
 932xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
 933{
 934        return 0;
 935}
 936#endif
 937
 938int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
 939{
 940        int dir, err = 0, cnt = 0;
 941
 942        write_lock_bh(&net->xfrm.xfrm_policy_lock);
 943
 944        err = xfrm_policy_flush_secctx_check(net, type, task_valid);
 945        if (err)
 946                goto out;
 947
 948        for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
 949                struct xfrm_policy *pol;
 950                int i;
 951
 952        again1:
 953                hlist_for_each_entry(pol,
 954                                     &net->xfrm.policy_inexact[dir], bydst) {
 955                        if (pol->type != type)
 956                                continue;
 957                        __xfrm_policy_unlink(pol, dir);
 958                        write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 959                        cnt++;
 960
 961                        xfrm_audit_policy_delete(pol, 1, task_valid);
 962
 963                        xfrm_policy_kill(pol);
 964
 965                        write_lock_bh(&net->xfrm.xfrm_policy_lock);
 966                        goto again1;
 967                }
 968
 969                for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
 970        again2:
 971                        hlist_for_each_entry(pol,
 972                                             net->xfrm.policy_bydst[dir].table + i,
 973                                             bydst) {
 974                                if (pol->type != type)
 975                                        continue;
 976                                __xfrm_policy_unlink(pol, dir);
 977                                write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 978                                cnt++;
 979
 980                                xfrm_audit_policy_delete(pol, 1, task_valid);
 981                                xfrm_policy_kill(pol);
 982
 983                                write_lock_bh(&net->xfrm.xfrm_policy_lock);
 984                                goto again2;
 985                        }
 986                }
 987
 988        }
 989        if (!cnt)
 990                err = -ESRCH;
 991out:
 992        write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 993        return err;
 994}
 995EXPORT_SYMBOL(xfrm_policy_flush);
 996
 997int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
 998                     int (*func)(struct xfrm_policy *, int, int, void*),
 999                     void *data)
1000{
1001        struct xfrm_policy *pol;
1002        struct xfrm_policy_walk_entry *x;
1003        int error = 0;
1004
1005        if (walk->type >= XFRM_POLICY_TYPE_MAX &&
1006            walk->type != XFRM_POLICY_TYPE_ANY)
1007                return -EINVAL;
1008
1009        if (list_empty(&walk->walk.all) && walk->seq != 0)
1010                return 0;
1011
1012        write_lock_bh(&net->xfrm.xfrm_policy_lock);
1013        if (list_empty(&walk->walk.all))
1014                x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
1015        else
1016                x = list_first_entry(&walk->walk.all,
1017                                     struct xfrm_policy_walk_entry, all);
1018
1019        list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
1020                if (x->dead)
1021                        continue;
1022                pol = container_of(x, struct xfrm_policy, walk);
1023                if (walk->type != XFRM_POLICY_TYPE_ANY &&
1024                    walk->type != pol->type)
1025                        continue;
1026                error = func(pol, xfrm_policy_id2dir(pol->index),
1027                             walk->seq, data);
1028                if (error) {
1029                        list_move_tail(&walk->walk.all, &x->all);
1030                        goto out;
1031                }
1032                walk->seq++;
1033        }
1034        if (walk->seq == 0) {
1035                error = -ENOENT;
1036                goto out;
1037        }
1038        list_del_init(&walk->walk.all);
1039out:
1040        write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1041        return error;
1042}
1043EXPORT_SYMBOL(xfrm_policy_walk);
1044
1045void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
1046{
1047        INIT_LIST_HEAD(&walk->walk.all);
1048        walk->walk.dead = 1;
1049        walk->type = type;
1050        walk->seq = 0;
1051}
1052EXPORT_SYMBOL(xfrm_policy_walk_init);
1053
1054void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)
1055{
1056        if (list_empty(&walk->walk.all))
1057                return;
1058
1059        write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
1060        list_del(&walk->walk.all);
1061        write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1062}
1063EXPORT_SYMBOL(xfrm_policy_walk_done);
1064
1065/*
1066 * Find policy to apply to this flow.
1067 *
1068 * Returns 0 if policy found, else an -errno.
1069 */
1070static int xfrm_policy_match(const struct xfrm_policy *pol,
1071                             const struct flowi *fl,
1072                             u8 type, u16 family, int dir)
1073{
1074        const struct xfrm_selector *sel = &pol->selector;
1075        int ret = -ESRCH;
1076        bool match;
1077
1078        if (pol->family != family ||
1079            (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
1080            pol->type != type)
1081                return ret;
1082
1083        match = xfrm_selector_match(sel, fl, family);
1084        if (match)
1085                ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
1086                                                  dir);
1087
1088        return ret;
1089}
1090
1091static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
1092                                                     const struct flowi *fl,
1093                                                     u16 family, u8 dir)
1094{
1095        int err;
1096        struct xfrm_policy *pol, *ret;
1097        const xfrm_address_t *daddr, *saddr;
1098        struct hlist_head *chain;
1099        u32 priority = ~0U;
1100
1101        daddr = xfrm_flowi_daddr(fl, family);
1102        saddr = xfrm_flowi_saddr(fl, family);
1103        if (unlikely(!daddr || !saddr))
1104                return NULL;
1105
1106        read_lock_bh(&net->xfrm.xfrm_policy_lock);
1107        chain = policy_hash_direct(net, daddr, saddr, family, dir);
1108        ret = NULL;
1109        hlist_for_each_entry(pol, chain, bydst) {
1110                err = xfrm_policy_match(pol, fl, type, family, dir);
1111                if (err) {
1112                        if (err == -ESRCH)
1113                                continue;
1114                        else {
1115                                ret = ERR_PTR(err);
1116                                goto fail;
1117                        }
1118                } else {
1119                        ret = pol;
1120                        priority = ret->priority;
1121                        break;
1122                }
1123        }
1124        chain = &net->xfrm.policy_inexact[dir];
1125        hlist_for_each_entry(pol, chain, bydst) {
1126                if ((pol->priority >= priority) && ret)
1127                        break;
1128
1129                err = xfrm_policy_match(pol, fl, type, family, dir);
1130                if (err) {
1131                        if (err == -ESRCH)
1132                                continue;
1133                        else {
1134                                ret = ERR_PTR(err);
1135                                goto fail;
1136                        }
1137                } else {
1138                        ret = pol;
1139                        break;
1140                }
1141        }
1142
1143        xfrm_pol_hold(ret);
1144fail:
1145        read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1146
1147        return ret;
1148}
1149
1150static struct xfrm_policy *
1151__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
1152{
1153#ifdef CONFIG_XFRM_SUB_POLICY
1154        struct xfrm_policy *pol;
1155
1156        pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
1157        if (pol != NULL)
1158                return pol;
1159#endif
1160        return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1161}
1162
1163static int flow_to_policy_dir(int dir)
1164{
1165        if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1166            XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1167            XFRM_POLICY_FWD == FLOW_DIR_FWD)
1168                return dir;
1169
1170        switch (dir) {
1171        default:
1172        case FLOW_DIR_IN:
1173                return XFRM_POLICY_IN;
1174        case FLOW_DIR_OUT:
1175                return XFRM_POLICY_OUT;
1176        case FLOW_DIR_FWD:
1177                return XFRM_POLICY_FWD;
1178        }
1179}
1180
1181static struct flow_cache_object *
1182xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
1183                   u8 dir, struct flow_cache_object *old_obj, void *ctx)
1184{
1185        struct xfrm_policy *pol;
1186
1187        if (old_obj)
1188                xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
1189
1190        pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
1191        if (IS_ERR_OR_NULL(pol))
1192                return ERR_CAST(pol);
1193
1194        /* Resolver returns two references:
1195         * one for cache and one for caller of flow_cache_lookup() */
1196        xfrm_pol_hold(pol);
1197
1198        return &pol->flo;
1199}
1200
1201static inline int policy_to_flow_dir(int dir)
1202{
1203        if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1204            XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1205            XFRM_POLICY_FWD == FLOW_DIR_FWD)
1206                return dir;
1207        switch (dir) {
1208        default:
1209        case XFRM_POLICY_IN:
1210                return FLOW_DIR_IN;
1211        case XFRM_POLICY_OUT:
1212                return FLOW_DIR_OUT;
1213        case XFRM_POLICY_FWD:
1214                return FLOW_DIR_FWD;
1215        }
1216}
1217
1218static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
1219                                                 const struct flowi *fl)
1220{
1221        struct xfrm_policy *pol;
1222        struct net *net = sock_net(sk);
1223
1224        rcu_read_lock();
1225        read_lock_bh(&net->xfrm.xfrm_policy_lock);
1226        pol = rcu_dereference(sk->sk_policy[dir]);
1227        if (pol != NULL) {
1228                bool match = xfrm_selector_match(&pol->selector, fl,
1229                                                 sk->sk_family);
1230                int err = 0;
1231
1232                if (match) {
1233                        if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
1234                                pol = NULL;
1235                                goto out;
1236                        }
1237                        err = security_xfrm_policy_lookup(pol->security,
1238                                                      fl->flowi_secid,
1239                                                      policy_to_flow_dir(dir));
1240                        if (!err)
1241                                xfrm_pol_hold(pol);
1242                        else if (err == -ESRCH)
1243                                pol = NULL;
1244                        else
1245                                pol = ERR_PTR(err);
1246                } else
1247                        pol = NULL;
1248        }
1249out:
1250        read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1251        rcu_read_unlock();
1252        return pol;
1253}
1254
1255static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1256{
1257        struct net *net = xp_net(pol);
1258
1259        list_add(&pol->walk.all, &net->xfrm.policy_all);
1260        net->xfrm.policy_count[dir]++;
1261        xfrm_pol_hold(pol);
1262}
1263
1264static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1265                                                int dir)
1266{
1267        struct net *net = xp_net(pol);
1268
1269        if (list_empty(&pol->walk.all))
1270                return NULL;
1271
1272        /* Socket policies are not hashed. */
1273        if (!hlist_unhashed(&pol->bydst)) {
1274                hlist_del(&pol->bydst);
1275                hlist_del(&pol->byidx);
1276        }
1277
1278        list_del_init(&pol->walk.all);
1279        net->xfrm.policy_count[dir]--;
1280
1281        return pol;
1282}
1283
1284static void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir)
1285{
1286        __xfrm_policy_link(pol, XFRM_POLICY_MAX + dir);
1287}
1288
1289static void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir)
1290{
1291        __xfrm_policy_unlink(pol, XFRM_POLICY_MAX + dir);
1292}
1293
1294int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1295{
1296        struct net *net = xp_net(pol);
1297
1298        write_lock_bh(&net->xfrm.xfrm_policy_lock);
1299        pol = __xfrm_policy_unlink(pol, dir);
1300        write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1301        if (pol) {
1302                xfrm_policy_kill(pol);
1303                return 0;
1304        }
1305        return -ENOENT;
1306}
1307EXPORT_SYMBOL(xfrm_policy_delete);
1308
1309int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1310{
1311        struct net *net = xp_net(pol);
1312        struct xfrm_policy *old_pol;
1313
1314#ifdef CONFIG_XFRM_SUB_POLICY
1315        if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1316                return -EINVAL;
1317#endif
1318
1319        write_lock_bh(&net->xfrm.xfrm_policy_lock);
1320        old_pol = rcu_dereference_protected(sk->sk_policy[dir],
1321                                lockdep_is_held(&net->xfrm.xfrm_policy_lock));
1322        if (pol) {
1323                pol->curlft.add_time = get_seconds();
1324                pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
1325                xfrm_sk_policy_link(pol, dir);
1326        }
1327        rcu_assign_pointer(sk->sk_policy[dir], pol);
1328        if (old_pol) {
1329                if (pol)
1330                        xfrm_policy_requeue(old_pol, pol);
1331
1332                /* Unlinking succeeds always. This is the only function
1333                 * allowed to delete or replace socket policy.
1334                 */
1335                xfrm_sk_policy_unlink(old_pol, dir);
1336        }
1337        write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1338
1339        if (old_pol) {
1340                xfrm_policy_kill(old_pol);
1341        }
1342        return 0;
1343}
1344
1345static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
1346{
1347        struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
1348        struct net *net = xp_net(old);
1349
1350        if (newp) {
1351                newp->selector = old->selector;
1352                if (security_xfrm_policy_clone(old->security,
1353                                               &newp->security)) {
1354                        kfree(newp);
1355                        return NULL;  /* ENOMEM */
1356                }
1357                newp->lft = old->lft;
1358                newp->curlft = old->curlft;
1359                newp->mark = old->mark;
1360                newp->action = old->action;
1361                newp->flags = old->flags;
1362                newp->xfrm_nr = old->xfrm_nr;
1363                newp->index = old->index;
1364                newp->type = old->type;
1365                memcpy(newp->xfrm_vec, old->xfrm_vec,
1366                       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1367                write_lock_bh(&net->xfrm.xfrm_policy_lock);
1368                xfrm_sk_policy_link(newp, dir);
1369                write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1370                xfrm_pol_put(newp);
1371        }
1372        return newp;
1373}
1374
1375int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
1376{
1377        const struct xfrm_policy *p;
1378        struct xfrm_policy *np;
1379        int i, ret = 0;
1380
1381        rcu_read_lock();
1382        for (i = 0; i < 2; i++) {
1383                p = rcu_dereference(osk->sk_policy[i]);
1384                if (p) {
1385                        np = clone_policy(p, i);
1386                        if (unlikely(!np)) {
1387                                ret = -ENOMEM;
1388                                break;
1389                        }
1390                        rcu_assign_pointer(sk->sk_policy[i], np);
1391                }
1392        }
1393        rcu_read_unlock();
1394        return ret;
1395}
1396
1397static int
1398xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
1399               xfrm_address_t *remote, unsigned short family)
1400{
1401        int err;
1402        struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1403
1404        if (unlikely(afinfo == NULL))
1405                return -EINVAL;
1406        err = afinfo->get_saddr(net, oif, local, remote);
1407        xfrm_policy_put_afinfo(afinfo);
1408        return err;
1409}
1410
1411/* Resolve list of templates for the flow, given policy. */
1412
1413static int
1414xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
1415                      struct xfrm_state **xfrm, unsigned short family)
1416{
1417        struct net *net = xp_net(policy);
1418        int nx;
1419        int i, error;
1420        xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1421        xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1422        xfrm_address_t tmp;
1423
1424        for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
1425                struct xfrm_state *x;
1426                xfrm_address_t *remote = daddr;
1427                xfrm_address_t *local  = saddr;
1428                struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1429
1430                if (tmpl->mode == XFRM_MODE_TUNNEL ||
1431                    tmpl->mode == XFRM_MODE_BEET) {
1432                        remote = &tmpl->id.daddr;
1433                        local = &tmpl->saddr;
1434                        if (xfrm_addr_any(local, tmpl->encap_family)) {
1435                                error = xfrm_get_saddr(net, fl->flowi_oif,
1436                                                       &tmp, remote,
1437                                                       tmpl->encap_family);
1438                                if (error)
1439                                        goto fail;
1440                                local = &tmp;
1441                        }
1442                }
1443
1444                x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1445
1446                if (x && x->km.state == XFRM_STATE_VALID) {
1447                        xfrm[nx++] = x;
1448                        daddr = remote;
1449                        saddr = local;
1450                        continue;
1451                }
1452                if (x) {
1453                        error = (x->km.state == XFRM_STATE_ERROR ?
1454                                 -EINVAL : -EAGAIN);
1455                        xfrm_state_put(x);
1456                } else if (error == -ESRCH) {
1457                        error = -EAGAIN;
1458                }
1459
1460                if (!tmpl->optional)
1461                        goto fail;
1462        }
1463        return nx;
1464
1465fail:
1466        for (nx--; nx >= 0; nx--)
1467                xfrm_state_put(xfrm[nx]);
1468        return error;
1469}
1470
1471static int
1472xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
1473                  struct xfrm_state **xfrm, unsigned short family)
1474{
1475        struct xfrm_state *tp[XFRM_MAX_DEPTH];
1476        struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1477        int cnx = 0;
1478        int error;
1479        int ret;
1480        int i;
1481
1482        for (i = 0; i < npols; i++) {
1483                if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1484                        error = -ENOBUFS;
1485                        goto fail;
1486                }
1487
1488                ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1489                if (ret < 0) {
1490                        error = ret;
1491                        goto fail;
1492                } else
1493                        cnx += ret;
1494        }
1495
1496        /* found states are sorted for outbound processing */
1497        if (npols > 1)
1498                xfrm_state_sort(xfrm, tpp, cnx, family);
1499
1500        return cnx;
1501
1502 fail:
1503        for (cnx--; cnx >= 0; cnx--)
1504                xfrm_state_put(tpp[cnx]);
1505        return error;
1506
1507}
1508
1509/* Check that the bundle accepts the flow and its components are
1510 * still valid.
1511 */
1512
1513static inline int xfrm_get_tos(const struct flowi *fl, int family)
1514{
1515        struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1516        int tos;
1517
1518        if (!afinfo)
1519                return -EINVAL;
1520
1521        tos = afinfo->get_tos(fl);
1522
1523        xfrm_policy_put_afinfo(afinfo);
1524
1525        return tos;
1526}
1527
1528static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
1529{
1530        struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1531        struct dst_entry *dst = &xdst->u.dst;
1532
1533        if (xdst->route == NULL) {
1534                /* Dummy bundle - if it has xfrms we were not
1535                 * able to build bundle as template resolution failed.
1536                 * It means we need to try again resolving. */
1537                if (xdst->num_xfrms > 0)
1538                        return NULL;
1539        } else if (dst->flags & DST_XFRM_QUEUE) {
1540                return NULL;
1541        } else {
1542                /* Real bundle */
1543                if (stale_bundle(dst))
1544                        return NULL;
1545        }
1546
1547        dst_hold(dst);
1548        return flo;
1549}
1550
1551static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
1552{
1553        struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1554        struct dst_entry *dst = &xdst->u.dst;
1555
1556        if (!xdst->route)
1557                return 0;
1558        if (stale_bundle(dst))
1559                return 0;
1560
1561        return 1;
1562}
1563
1564static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
1565{
1566        struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1567        struct dst_entry *dst = &xdst->u.dst;
1568
1569        dst_free(dst);
1570}
1571
1572static const struct flow_cache_ops xfrm_bundle_fc_ops = {
1573        .get = xfrm_bundle_flo_get,
1574        .check = xfrm_bundle_flo_check,
1575        .delete = xfrm_bundle_flo_delete,
1576};
1577
1578static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1579{
1580        struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1581        struct dst_ops *dst_ops;
1582        struct xfrm_dst *xdst;
1583
1584        if (!afinfo)
1585                return ERR_PTR(-EINVAL);
1586
1587        switch (family) {
1588        case AF_INET:
1589                dst_ops = &net->xfrm.xfrm4_dst_ops;
1590                break;
1591#if IS_ENABLED(CONFIG_IPV6)
1592        case AF_INET6:
1593                dst_ops = &net->xfrm.xfrm6_dst_ops;
1594                break;
1595#endif
1596        default:
1597                BUG();
1598        }
1599        xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0);
1600
1601        if (likely(xdst)) {
1602                struct dst_entry *dst = &xdst->u.dst;
1603
1604                memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
1605                xdst->flo.ops = &xfrm_bundle_fc_ops;
1606        } else
1607                xdst = ERR_PTR(-ENOBUFS);
1608
1609        xfrm_policy_put_afinfo(afinfo);
1610
1611        return xdst;
1612}
1613
1614static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1615                                 int nfheader_len)
1616{
1617        struct xfrm_policy_afinfo *afinfo =
1618                xfrm_policy_get_afinfo(dst->ops->family);
1619        int err;
1620
1621        if (!afinfo)
1622                return -EINVAL;
1623
1624        err = afinfo->init_path(path, dst, nfheader_len);
1625
1626        xfrm_policy_put_afinfo(afinfo);
1627
1628        return err;
1629}
1630
1631static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1632                                const struct flowi *fl)
1633{
1634        struct xfrm_policy_afinfo *afinfo =
1635                xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1636        int err;
1637
1638        if (!afinfo)
1639                return -EINVAL;
1640
1641        err = afinfo->fill_dst(xdst, dev, fl);
1642
1643        xfrm_policy_put_afinfo(afinfo);
1644
1645        return err;
1646}
1647
1648
1649/* Allocate chain of dst_entry's, attach known xfrm's, calculate
1650 * all the metrics... Shortly, bundle a bundle.
1651 */
1652
1653static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1654                                            struct xfrm_state **xfrm, int nx,
1655                                            const struct flowi *fl,
1656                                            struct dst_entry *dst)
1657{
1658        struct net *net = xp_net(policy);
1659        unsigned long now = jiffies;
1660        struct net_device *dev;
1661        struct xfrm_mode *inner_mode;
1662        struct dst_entry *dst_prev = NULL;
1663        struct dst_entry *dst0 = NULL;
1664        int i = 0;
1665        int err;
1666        int header_len = 0;
1667        int nfheader_len = 0;
1668        int trailer_len = 0;
1669        int tos;
1670        int family = policy->selector.family;
1671        xfrm_address_t saddr, daddr;
1672
1673        xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1674
1675        tos = xfrm_get_tos(fl, family);
1676        err = tos;
1677        if (tos < 0)
1678                goto put_states;
1679
1680        dst_hold(dst);
1681
1682        for (; i < nx; i++) {
1683                struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
1684                struct dst_entry *dst1 = &xdst->u.dst;
1685
1686                err = PTR_ERR(xdst);
1687                if (IS_ERR(xdst)) {
1688                        dst_release(dst);
1689                        goto put_states;
1690                }
1691
1692                if (xfrm[i]->sel.family == AF_UNSPEC) {
1693                        inner_mode = xfrm_ip2inner_mode(xfrm[i],
1694                                                        xfrm_af2proto(family));
1695                        if (!inner_mode) {
1696                                err = -EAFNOSUPPORT;
1697                                dst_release(dst);
1698                                goto put_states;
1699                        }
1700                } else
1701                        inner_mode = xfrm[i]->inner_mode;
1702
1703                if (!dst_prev)
1704                        dst0 = dst1;
1705                else {
1706                        dst_prev->child = dst_clone(dst1);
1707                        dst1->flags |= DST_NOHASH;
1708                }
1709
1710                xdst->route = dst;
1711                dst_copy_metrics(dst1, dst);
1712
1713                if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1714                        family = xfrm[i]->props.family;
1715                        dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
1716                                              &saddr, &daddr, family);
1717                        err = PTR_ERR(dst);
1718                        if (IS_ERR(dst))
1719                                goto put_states;
1720                } else
1721                        dst_hold(dst);
1722
1723                dst1->xfrm = xfrm[i];
1724                xdst->xfrm_genid = xfrm[i]->genid;
1725
1726                dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
1727                dst1->flags |= DST_HOST;
1728                dst1->lastuse = now;
1729
1730                dst1->input = dst_discard;
1731                dst1->output = inner_mode->afinfo->output;
1732
1733                dst1->next = dst_prev;
1734                dst_prev = dst1;
1735
1736                header_len += xfrm[i]->props.header_len;
1737                if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1738                        nfheader_len += xfrm[i]->props.header_len;
1739                trailer_len += xfrm[i]->props.trailer_len;
1740        }
1741
1742        dst_prev->child = dst;
1743        dst0->path = dst;
1744
1745        err = -ENODEV;
1746        dev = dst->dev;
1747        if (!dev)
1748                goto free_dst;
1749
1750        xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1751        xfrm_init_pmtu(dst_prev);
1752
1753        for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1754                struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1755
1756                err = xfrm_fill_dst(xdst, dev, fl);
1757                if (err)
1758                        goto free_dst;
1759
1760                dst_prev->header_len = header_len;
1761                dst_prev->trailer_len = trailer_len;
1762                header_len -= xdst->u.dst.xfrm->props.header_len;
1763                trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1764        }
1765
1766out:
1767        return dst0;
1768
1769put_states:
1770        for (; i < nx; i++)
1771                xfrm_state_put(xfrm[i]);
1772free_dst:
1773        if (dst0)
1774                dst_free(dst0);
1775        dst0 = ERR_PTR(err);
1776        goto out;
1777}
1778
1779#ifdef CONFIG_XFRM_SUB_POLICY
1780static int xfrm_dst_alloc_copy(void **target, const void *src, int size)
1781{
1782        if (!*target) {
1783                *target = kmalloc(size, GFP_ATOMIC);
1784                if (!*target)
1785                        return -ENOMEM;
1786        }
1787
1788        memcpy(*target, src, size);
1789        return 0;
1790}
1791#endif
1792
1793static int xfrm_dst_update_parent(struct dst_entry *dst,
1794                                  const struct xfrm_selector *sel)
1795{
1796#ifdef CONFIG_XFRM_SUB_POLICY
1797        struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1798        return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1799                                   sel, sizeof(*sel));
1800#else
1801        return 0;
1802#endif
1803}
1804
1805static int xfrm_dst_update_origin(struct dst_entry *dst,
1806                                  const struct flowi *fl)
1807{
1808#ifdef CONFIG_XFRM_SUB_POLICY
1809        struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1810        return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1811#else
1812        return 0;
1813#endif
1814}
1815
1816static int xfrm_expand_policies(const struct flowi *fl, u16 family,
1817                                struct xfrm_policy **pols,
1818                                int *num_pols, int *num_xfrms)
1819{
1820        int i;
1821
1822        if (*num_pols == 0 || !pols[0]) {
1823                *num_pols = 0;
1824                *num_xfrms = 0;
1825                return 0;
1826        }
1827        if (IS_ERR(pols[0]))
1828                return PTR_ERR(pols[0]);
1829
1830        *num_xfrms = pols[0]->xfrm_nr;
1831
1832#ifdef CONFIG_XFRM_SUB_POLICY
1833        if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
1834            pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1835                pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
1836                                                    XFRM_POLICY_TYPE_MAIN,
1837                                                    fl, family,
1838                                                    XFRM_POLICY_OUT);
1839                if (pols[1]) {
1840                        if (IS_ERR(pols[1])) {
1841                                xfrm_pols_put(pols, *num_pols);
1842                                return PTR_ERR(pols[1]);
1843                        }
1844                        (*num_pols)++;
1845                        (*num_xfrms) += pols[1]->xfrm_nr;
1846                }
1847        }
1848#endif
1849        for (i = 0; i < *num_pols; i++) {
1850                if (pols[i]->action != XFRM_POLICY_ALLOW) {
1851                        *num_xfrms = -1;
1852                        break;
1853                }
1854        }
1855
1856        return 0;
1857
1858}
1859
1860static struct xfrm_dst *
1861xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1862                               const struct flowi *fl, u16 family,
1863                               struct dst_entry *dst_orig)
1864{
1865        struct net *net = xp_net(pols[0]);
1866        struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1867        struct dst_entry *dst;
1868        struct xfrm_dst *xdst;
1869        int err;
1870
1871        /* Try to instantiate a bundle */
1872        err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1873        if (err <= 0) {
1874                if (err != 0 && err != -EAGAIN)
1875                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1876                return ERR_PTR(err);
1877        }
1878
1879        dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
1880        if (IS_ERR(dst)) {
1881                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1882                return ERR_CAST(dst);
1883        }
1884
1885        xdst = (struct xfrm_dst *)dst;
1886        xdst->num_xfrms = err;
1887        if (num_pols > 1)
1888                err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1889        else
1890                err = xfrm_dst_update_origin(dst, fl);
1891        if (unlikely(err)) {
1892                dst_free(dst);
1893                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1894                return ERR_PTR(err);
1895        }
1896
1897        xdst->num_pols = num_pols;
1898        memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
1899        xdst->policy_genid = atomic_read(&pols[0]->genid);
1900
1901        return xdst;
1902}
1903
1904static void xfrm_policy_queue_process(unsigned long arg)
1905{
1906        struct sk_buff *skb;
1907        struct sock *sk;
1908        struct dst_entry *dst;
1909        struct xfrm_policy *pol = (struct xfrm_policy *)arg;
1910        struct net *net = xp_net(pol);
1911        struct xfrm_policy_queue *pq = &pol->polq;
1912        struct flowi fl;
1913        struct sk_buff_head list;
1914
1915        spin_lock(&pq->hold_queue.lock);
1916        skb = skb_peek(&pq->hold_queue);
1917        if (!skb) {
1918                spin_unlock(&pq->hold_queue.lock);
1919                goto out;
1920        }
1921        dst = skb_dst(skb);
1922        sk = skb->sk;
1923        xfrm_decode_session(skb, &fl, dst->ops->family);
1924        spin_unlock(&pq->hold_queue.lock);
1925
1926        dst_hold(dst->path);
1927        dst = xfrm_lookup(net, dst->path, &fl, sk, 0);
1928        if (IS_ERR(dst))
1929                goto purge_queue;
1930
1931        if (dst->flags & DST_XFRM_QUEUE) {
1932                dst_release(dst);
1933
1934                if (pq->timeout >= XFRM_QUEUE_TMO_MAX)
1935                        goto purge_queue;
1936
1937                pq->timeout = pq->timeout << 1;
1938                if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
1939                        xfrm_pol_hold(pol);
1940        goto out;
1941        }
1942
1943        dst_release(dst);
1944
1945        __skb_queue_head_init(&list);
1946
1947        spin_lock(&pq->hold_queue.lock);
1948        pq->timeout = 0;
1949        skb_queue_splice_init(&pq->hold_queue, &list);
1950        spin_unlock(&pq->hold_queue.lock);
1951
1952        while (!skb_queue_empty(&list)) {
1953                skb = __skb_dequeue(&list);
1954
1955                xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
1956                dst_hold(skb_dst(skb)->path);
1957                dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0);
1958                if (IS_ERR(dst)) {
1959                        kfree_skb(skb);
1960                        continue;
1961                }
1962
1963                nf_reset(skb);
1964                skb_dst_drop(skb);
1965                skb_dst_set(skb, dst);
1966
1967                dst_output(net, skb->sk, skb);
1968        }
1969
1970out:
1971        xfrm_pol_put(pol);
1972        return;
1973
1974purge_queue:
1975        pq->timeout = 0;
1976        skb_queue_purge(&pq->hold_queue);
1977        xfrm_pol_put(pol);
1978}
1979
1980static int xdst_queue_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1981{
1982        unsigned long sched_next;
1983        struct dst_entry *dst = skb_dst(skb);
1984        struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
1985        struct xfrm_policy *pol = xdst->pols[0];
1986        struct xfrm_policy_queue *pq = &pol->polq;
1987
1988        if (unlikely(skb_fclone_busy(sk, skb))) {
1989                kfree_skb(skb);
1990                return 0;
1991        }
1992
1993        if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
1994                kfree_skb(skb);
1995                return -EAGAIN;
1996        }
1997
1998        skb_dst_force(skb);
1999
2000        spin_lock_bh(&pq->hold_queue.lock);
2001
2002        if (!pq->timeout)
2003                pq->timeout = XFRM_QUEUE_TMO_MIN;
2004
2005        sched_next = jiffies + pq->timeout;
2006
2007        if (del_timer(&pq->hold_timer)) {
2008                if (time_before(pq->hold_timer.expires, sched_next))
2009                        sched_next = pq->hold_timer.expires;
2010                xfrm_pol_put(pol);
2011        }
2012
2013        __skb_queue_tail(&pq->hold_queue, skb);
2014        if (!mod_timer(&pq->hold_timer, sched_next))
2015                xfrm_pol_hold(pol);
2016
2017        spin_unlock_bh(&pq->hold_queue.lock);
2018
2019        return 0;
2020}
2021
2022static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
2023                                                 struct xfrm_flo *xflo,
2024                                                 const struct flowi *fl,
2025                                                 int num_xfrms,
2026                                                 u16 family)
2027{
2028        int err;
2029        struct net_device *dev;
2030        struct dst_entry *dst;
2031        struct dst_entry *dst1;
2032        struct xfrm_dst *xdst;
2033
2034        xdst = xfrm_alloc_dst(net, family);
2035        if (IS_ERR(xdst))
2036                return xdst;
2037
2038        if (!(xflo->flags & XFRM_LOOKUP_QUEUE) ||
2039            net->xfrm.sysctl_larval_drop ||
2040            num_xfrms <= 0)
2041                return xdst;
2042
2043        dst = xflo->dst_orig;
2044        dst1 = &xdst->u.dst;
2045        dst_hold(dst);
2046        xdst->route = dst;
2047
2048        dst_copy_metrics(dst1, dst);
2049
2050        dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
2051        dst1->flags |= DST_HOST | DST_XFRM_QUEUE;
2052        dst1->lastuse = jiffies;
2053
2054        dst1->input = dst_discard;
2055        dst1->output = xdst_queue_output;
2056
2057        dst_hold(dst);
2058        dst1->child = dst;
2059        dst1->path = dst;
2060
2061        xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
2062
2063        err = -ENODEV;
2064        dev = dst->dev;
2065        if (!dev)
2066                goto free_dst;
2067
2068        err = xfrm_fill_dst(xdst, dev, fl);
2069        if (err)
2070                goto free_dst;
2071
2072out:
2073        return xdst;
2074
2075free_dst:
2076        dst_release(dst1);
2077        xdst = ERR_PTR(err);
2078        goto out;
2079}
2080
2081static struct flow_cache_object *
2082xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
2083                   struct flow_cache_object *oldflo, void *ctx)
2084{
2085        struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
2086        struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2087        struct xfrm_dst *xdst, *new_xdst;
2088        int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
2089
2090        /* Check if the policies from old bundle are usable */
2091        xdst = NULL;
2092        if (oldflo) {
2093                xdst = container_of(oldflo, struct xfrm_dst, flo);
2094                num_pols = xdst->num_pols;
2095                num_xfrms = xdst->num_xfrms;
2096                pol_dead = 0;
2097                for (i = 0; i < num_pols; i++) {
2098                        pols[i] = xdst->pols[i];
2099                        pol_dead |= pols[i]->walk.dead;
2100                }
2101                if (pol_dead) {
2102                        dst_free(&xdst->u.dst);
2103                        xdst = NULL;
2104                        num_pols = 0;
2105                        num_xfrms = 0;
2106                        oldflo = NULL;
2107                }
2108        }
2109
2110        /* Resolve policies to use if we couldn't get them from
2111         * previous cache entry */
2112        if (xdst == NULL) {
2113                num_pols = 1;
2114                pols[0] = __xfrm_policy_lookup(net, fl, family,
2115                                               flow_to_policy_dir(dir));
2116                err = xfrm_expand_policies(fl, family, pols,
2117                                           &num_pols, &num_xfrms);
2118                if (err < 0)
2119                        goto inc_error;
2120                if (num_pols == 0)
2121                        return NULL;
2122                if (num_xfrms <= 0)
2123                        goto make_dummy_bundle;
2124        }
2125
2126        new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
2127                                                  xflo->dst_orig);
2128        if (IS_ERR(new_xdst)) {
2129                err = PTR_ERR(new_xdst);
2130                if (err != -EAGAIN)
2131                        goto error;
2132                if (oldflo == NULL)
2133                        goto make_dummy_bundle;
2134                dst_hold(&xdst->u.dst);
2135                return oldflo;
2136        } else if (new_xdst == NULL) {
2137                num_xfrms = 0;
2138                if (oldflo == NULL)
2139                        goto make_dummy_bundle;
2140                xdst->num_xfrms = 0;
2141                dst_hold(&xdst->u.dst);
2142                return oldflo;
2143        }
2144
2145        /* Kill the previous bundle */
2146        if (xdst) {
2147                /* The policies were stolen for newly generated bundle */
2148                xdst->num_pols = 0;
2149                dst_free(&xdst->u.dst);
2150        }
2151
2152        /* Flow cache does not have reference, it dst_free()'s,
2153         * but we do need to return one reference for original caller */
2154        dst_hold(&new_xdst->u.dst);
2155        return &new_xdst->flo;
2156
2157make_dummy_bundle:
2158        /* We found policies, but there's no bundles to instantiate:
2159         * either because the policy blocks, has no transformations or
2160         * we could not build template (no xfrm_states).*/
2161        xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family);
2162        if (IS_ERR(xdst)) {
2163                xfrm_pols_put(pols, num_pols);
2164                return ERR_CAST(xdst);
2165        }
2166        xdst->num_pols = num_pols;
2167        xdst->num_xfrms = num_xfrms;
2168        memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
2169
2170        dst_hold(&xdst->u.dst);
2171        return &xdst->flo;
2172
2173inc_error:
2174        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
2175error:
2176        if (xdst != NULL)
2177                dst_free(&xdst->u.dst);
2178        else
2179                xfrm_pols_put(pols, num_pols);
2180        return ERR_PTR(err);
2181}
2182
2183static struct dst_entry *make_blackhole(struct net *net, u16 family,
2184                                        struct dst_entry *dst_orig)
2185{
2186        struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2187        struct dst_entry *ret;
2188
2189        if (!afinfo) {
2190                dst_release(dst_orig);
2191                return ERR_PTR(-EINVAL);
2192        } else {
2193                ret = afinfo->blackhole_route(net, dst_orig);
2194        }
2195        xfrm_policy_put_afinfo(afinfo);
2196
2197        return ret;
2198}
2199
2200/* Main function: finds/creates a bundle for given flow.
2201 *
2202 * At the moment we eat a raw IP route. Mostly to speed up lookups
2203 * on interfaces with disabled IPsec.
2204 */
2205struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
2206                              const struct flowi *fl,
2207                              const struct sock *sk, int flags)
2208{
2209        struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2210        struct flow_cache_object *flo;
2211        struct xfrm_dst *xdst;
2212        struct dst_entry *dst, *route;
2213        u16 family = dst_orig->ops->family;
2214        u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
2215        int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
2216
2217        dst = NULL;
2218        xdst = NULL;
2219        route = NULL;
2220
2221        sk = sk_const_to_full_sk(sk);
2222        if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
2223                num_pols = 1;
2224                pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
2225                err = xfrm_expand_policies(fl, family, pols,
2226                                           &num_pols, &num_xfrms);
2227                if (err < 0)
2228                        goto dropdst;
2229
2230                if (num_pols) {
2231                        if (num_xfrms <= 0) {
2232                                drop_pols = num_pols;
2233                                goto no_transform;
2234                        }
2235
2236                        xdst = xfrm_resolve_and_create_bundle(
2237                                        pols, num_pols, fl,
2238                                        family, dst_orig);
2239                        if (IS_ERR(xdst)) {
2240                                xfrm_pols_put(pols, num_pols);
2241                                err = PTR_ERR(xdst);
2242                                goto dropdst;
2243                        } else if (xdst == NULL) {
2244                                num_xfrms = 0;
2245                                drop_pols = num_pols;
2246                                goto no_transform;
2247                        }
2248
2249                        dst_hold(&xdst->u.dst);
2250                        xdst->u.dst.flags |= DST_NOCACHE;
2251                        route = xdst->route;
2252                }
2253        }
2254
2255        if (xdst == NULL) {
2256                struct xfrm_flo xflo;
2257
2258                xflo.dst_orig = dst_orig;
2259                xflo.flags = flags;
2260
2261                /* To accelerate a bit...  */
2262                if ((dst_orig->flags & DST_NOXFRM) ||
2263                    !net->xfrm.policy_count[XFRM_POLICY_OUT])
2264                        goto nopol;
2265
2266                flo = flow_cache_lookup(net, fl, family, dir,
2267                                        xfrm_bundle_lookup, &xflo);
2268                if (flo == NULL)
2269                        goto nopol;
2270                if (IS_ERR(flo)) {
2271                        err = PTR_ERR(flo);
2272                        goto dropdst;
2273                }
2274                xdst = container_of(flo, struct xfrm_dst, flo);
2275
2276                num_pols = xdst->num_pols;
2277                num_xfrms = xdst->num_xfrms;
2278                memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols);
2279                route = xdst->route;
2280        }
2281
2282        dst = &xdst->u.dst;
2283        if (route == NULL && num_xfrms > 0) {
2284                /* The only case when xfrm_bundle_lookup() returns a
2285                 * bundle with null route, is when the template could
2286                 * not be resolved. It means policies are there, but
2287                 * bundle could not be created, since we don't yet
2288                 * have the xfrm_state's. We need to wait for KM to
2289                 * negotiate new SA's or bail out with error.*/
2290                if (net->xfrm.sysctl_larval_drop) {
2291                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2292                        err = -EREMOTE;
2293                        goto error;
2294                }
2295
2296                err = -EAGAIN;
2297
2298                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2299                goto error;
2300        }
2301
2302no_transform:
2303        if (num_pols == 0)
2304                goto nopol;
2305
2306        if ((flags & XFRM_LOOKUP_ICMP) &&
2307            !(pols[0]->flags & XFRM_POLICY_ICMP)) {
2308                err = -ENOENT;
2309                goto error;
2310        }
2311
2312        for (i = 0; i < num_pols; i++)
2313                pols[i]->curlft.use_time = get_seconds();
2314
2315        if (num_xfrms < 0) {
2316                /* Prohibit the flow */
2317                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
2318                err = -EPERM;
2319                goto error;
2320        } else if (num_xfrms > 0) {
2321                /* Flow transformed */
2322                dst_release(dst_orig);
2323        } else {
2324                /* Flow passes untransformed */
2325                dst_release(dst);
2326                dst = dst_orig;
2327        }
2328ok:
2329        xfrm_pols_put(pols, drop_pols);
2330        if (dst && dst->xfrm &&
2331            dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
2332                dst->flags |= DST_XFRM_TUNNEL;
2333        return dst;
2334
2335nopol:
2336        if (!(flags & XFRM_LOOKUP_ICMP)) {
2337                dst = dst_orig;
2338                goto ok;
2339        }
2340        err = -ENOENT;
2341error:
2342        dst_release(dst);
2343dropdst:
2344        if (!(flags & XFRM_LOOKUP_KEEP_DST_REF))
2345                dst_release(dst_orig);
2346        xfrm_pols_put(pols, drop_pols);
2347        return ERR_PTR(err);
2348}
2349EXPORT_SYMBOL(xfrm_lookup);
2350
2351/* Callers of xfrm_lookup_route() must ensure a call to dst_output().
2352 * Otherwise we may send out blackholed packets.
2353 */
2354struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
2355                                    const struct flowi *fl,
2356                                    const struct sock *sk, int flags)
2357{
2358        struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
2359                                            flags | XFRM_LOOKUP_QUEUE |
2360                                            XFRM_LOOKUP_KEEP_DST_REF);
2361
2362        if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
2363                return make_blackhole(net, dst_orig->ops->family, dst_orig);
2364
2365        return dst;
2366}
2367EXPORT_SYMBOL(xfrm_lookup_route);
2368
2369static inline int
2370xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
2371{
2372        struct xfrm_state *x;
2373
2374        if (!skb->sp || idx < 0 || idx >= skb->sp->len)
2375                return 0;
2376        x = skb->sp->xvec[idx];
2377        if (!x->type->reject)
2378                return 0;
2379        return x->type->reject(x, skb, fl);
2380}
2381
2382/* When skb is transformed back to its "native" form, we have to
2383 * check policy restrictions. At the moment we make this in maximally
2384 * stupid way. Shame on me. :-) Of course, connected sockets must
2385 * have policy cached at them.
2386 */
2387
2388static inline int
2389xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
2390              unsigned short family)
2391{
2392        if (xfrm_state_kern(x))
2393                return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
2394        return  x->id.proto == tmpl->id.proto &&
2395                (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
2396                (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
2397                x->props.mode == tmpl->mode &&
2398                (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
2399                 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
2400                !(x->props.mode != XFRM_MODE_TRANSPORT &&
2401                  xfrm_state_addr_cmp(tmpl, x, family));
2402}
2403
2404/*
2405 * 0 or more than 0 is returned when validation is succeeded (either bypass
2406 * because of optional transport mode, or next index of the mathced secpath
2407 * state with the template.
2408 * -1 is returned when no matching template is found.
2409 * Otherwise "-2 - errored_index" is returned.
2410 */
2411static inline int
2412xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
2413               unsigned short family)
2414{
2415        int idx = start;
2416
2417        if (tmpl->optional) {
2418                if (tmpl->mode == XFRM_MODE_TRANSPORT)
2419                        return start;
2420        } else
2421                start = -1;
2422        for (; idx < sp->len; idx++) {
2423                if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
2424                        return ++idx;
2425                if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
2426                        if (start == -1)
2427                                start = -2-idx;
2428                        break;
2429                }
2430        }
2431        return start;
2432}
2433
2434int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
2435                          unsigned int family, int reverse)
2436{
2437        struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2438        int err;
2439
2440        if (unlikely(afinfo == NULL))
2441                return -EAFNOSUPPORT;
2442
2443        afinfo->decode_session(skb, fl, reverse);
2444        err = security_xfrm_decode_session(skb, &fl->flowi_secid);
2445        xfrm_policy_put_afinfo(afinfo);
2446        return err;
2447}
2448EXPORT_SYMBOL(__xfrm_decode_session);
2449
2450static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
2451{
2452        for (; k < sp->len; k++) {
2453                if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
2454                        *idxp = k;
2455                        return 1;
2456                }
2457        }
2458
2459        return 0;
2460}
2461
2462int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
2463                        unsigned short family)
2464{
2465        struct net *net = dev_net(skb->dev);
2466        struct xfrm_policy *pol;
2467        struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2468        int npols = 0;
2469        int xfrm_nr;
2470        int pi;
2471        int reverse;
2472        struct flowi fl;
2473        u8 fl_dir;
2474        int xerr_idx = -1;
2475
2476        reverse = dir & ~XFRM_POLICY_MASK;
2477        dir &= XFRM_POLICY_MASK;
2478        fl_dir = policy_to_flow_dir(dir);
2479
2480        if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
2481                XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
2482                return 0;
2483        }
2484
2485        nf_nat_decode_session(skb, &fl, family);
2486
2487        /* First, check used SA against their selectors. */
2488        if (skb->sp) {
2489                int i;
2490
2491                for (i = skb->sp->len-1; i >= 0; i--) {
2492                        struct xfrm_state *x = skb->sp->xvec[i];
2493                        if (!xfrm_selector_match(&x->sel, &fl, family)) {
2494                                XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
2495                                return 0;
2496                        }
2497                }
2498        }
2499
2500        pol = NULL;
2501        sk = sk_to_full_sk(sk);
2502        if (sk && sk->sk_policy[dir]) {
2503                pol = xfrm_sk_policy_lookup(sk, dir, &fl);
2504                if (IS_ERR(pol)) {
2505                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2506                        return 0;
2507                }
2508        }
2509
2510        if (!pol) {
2511                struct flow_cache_object *flo;
2512
2513                flo = flow_cache_lookup(net, &fl, family, fl_dir,
2514                                        xfrm_policy_lookup, NULL);
2515                if (IS_ERR_OR_NULL(flo))
2516                        pol = ERR_CAST(flo);
2517                else
2518                        pol = container_of(flo, struct xfrm_policy, flo);
2519        }
2520
2521        if (IS_ERR(pol)) {
2522                XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2523                return 0;
2524        }
2525
2526        if (!pol) {
2527                if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
2528                        xfrm_secpath_reject(xerr_idx, skb, &fl);
2529                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
2530                        return 0;
2531                }
2532                return 1;
2533        }
2534
2535        pol->curlft.use_time = get_seconds();
2536
2537        pols[0] = pol;
2538        npols++;
2539#ifdef CONFIG_XFRM_SUB_POLICY
2540        if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
2541                pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
2542                                                    &fl, family,
2543                                                    XFRM_POLICY_IN);
2544                if (pols[1]) {
2545                        if (IS_ERR(pols[1])) {
2546                                XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2547                                return 0;
2548                        }
2549                        pols[1]->curlft.use_time = get_seconds();
2550                        npols++;
2551                }
2552        }
2553#endif
2554
2555        if (pol->action == XFRM_POLICY_ALLOW) {
2556                struct sec_path *sp;
2557                static struct sec_path dummy;
2558                struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
2559                struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
2560                struct xfrm_tmpl **tpp = tp;
2561                int ti = 0;
2562                int i, k;
2563
2564                if ((sp = skb->sp) == NULL)
2565                        sp = &dummy;
2566
2567                for (pi = 0; pi < npols; pi++) {
2568                        if (pols[pi] != pol &&
2569                            pols[pi]->action != XFRM_POLICY_ALLOW) {
2570                                XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2571                                goto reject;
2572                        }
2573                        if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
2574                                XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
2575                                goto reject_error;
2576                        }
2577                        for (i = 0; i < pols[pi]->xfrm_nr; i++)
2578                                tpp[ti++] = &pols[pi]->xfrm_vec[i];
2579                }
2580                xfrm_nr = ti;
2581                if (npols > 1) {
2582                        xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net);
2583                        tpp = stp;
2584                }
2585
2586                /* For each tunnel xfrm, find the first matching tmpl.
2587                 * For each tmpl before that, find corresponding xfrm.
2588                 * Order is _important_. Later we will implement
2589                 * some barriers, but at the moment barriers
2590                 * are implied between each two transformations.
2591                 */
2592                for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
2593                        k = xfrm_policy_ok(tpp[i], sp, k, family);
2594                        if (k < 0) {
2595                                if (k < -1)
2596                                        /* "-2 - errored_index" returned */
2597                                        xerr_idx = -(2+k);
2598                                XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2599                                goto reject;
2600                        }
2601                }
2602
2603                if (secpath_has_nontransport(sp, k, &xerr_idx)) {
2604                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2605                        goto reject;
2606                }
2607
2608                xfrm_pols_put(pols, npols);
2609                return 1;
2610        }
2611        XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2612
2613reject:
2614        xfrm_secpath_reject(xerr_idx, skb, &fl);
2615reject_error:
2616        xfrm_pols_put(pols, npols);
2617        return 0;
2618}
2619EXPORT_SYMBOL(__xfrm_policy_check);
2620
2621int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2622{
2623        struct net *net = dev_net(skb->dev);
2624        struct flowi fl;
2625        struct dst_entry *dst;
2626        int res = 1;
2627
2628        if (xfrm_decode_session(skb, &fl, family) < 0) {
2629                XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
2630                return 0;
2631        }
2632
2633        skb_dst_force(skb);
2634
2635        dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
2636        if (IS_ERR(dst)) {
2637                res = 0;
2638                dst = NULL;
2639        }
2640        skb_dst_set(skb, dst);
2641        return res;
2642}
2643EXPORT_SYMBOL(__xfrm_route_forward);
2644
2645/* Optimize later using cookies and generation ids. */
2646
2647static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2648{
2649        /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2650         * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to
2651         * get validated by dst_ops->check on every use.  We do this
2652         * because when a normal route referenced by an XFRM dst is
2653         * obsoleted we do not go looking around for all parent
2654         * referencing XFRM dsts so that we can invalidate them.  It
2655         * is just too much work.  Instead we make the checks here on
2656         * every use.  For example:
2657         *
2658         *      XFRM dst A --> IPv4 dst X
2659         *
2660         * X is the "xdst->route" of A (X is also the "dst->path" of A
2661         * in this example).  If X is marked obsolete, "A" will not
2662         * notice.  That's what we are validating here via the
2663         * stale_bundle() check.
2664         *
2665         * When a policy's bundle is pruned, we dst_free() the XFRM
2666         * dst which causes it's ->obsolete field to be set to
2667         * DST_OBSOLETE_DEAD.  If an XFRM dst has been pruned like
2668         * this, we want to force a new route lookup.
2669         */
2670        if (dst->obsolete < 0 && !stale_bundle(dst))
2671                return dst;
2672
2673        return NULL;
2674}
2675
2676static int stale_bundle(struct dst_entry *dst)
2677{
2678        return !xfrm_bundle_ok((struct xfrm_dst *)dst);
2679}
2680
2681void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2682{
2683        while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2684                dst->dev = dev_net(dev)->loopback_dev;
2685                dev_hold(dst->dev);
2686                dev_put(dev);
2687        }
2688}
2689EXPORT_SYMBOL(xfrm_dst_ifdown);
2690
2691static void xfrm_link_failure(struct sk_buff *skb)
2692{
2693        /* Impossible. Such dst must be popped before reaches point of failure. */
2694}
2695
2696static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2697{
2698        if (dst) {
2699                if (dst->obsolete) {
2700                        dst_release(dst);
2701                        dst = NULL;
2702                }
2703        }
2704        return dst;
2705}
2706
2707void xfrm_garbage_collect(struct net *net)
2708{
2709        flow_cache_flush(net);
2710}
2711EXPORT_SYMBOL(xfrm_garbage_collect);
2712
2713static void xfrm_garbage_collect_deferred(struct net *net)
2714{
2715        flow_cache_flush_deferred(net);
2716}
2717
2718static void xfrm_init_pmtu(struct dst_entry *dst)
2719{
2720        do {
2721                struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2722                u32 pmtu, route_mtu_cached;
2723
2724                pmtu = dst_mtu(dst->child);
2725                xdst->child_mtu_cached = pmtu;
2726
2727                pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2728
2729                route_mtu_cached = dst_mtu(xdst->route);
2730                xdst->route_mtu_cached = route_mtu_cached;
2731
2732                if (pmtu > route_mtu_cached)
2733                        pmtu = route_mtu_cached;
2734
2735                dst_metric_set(dst, RTAX_MTU, pmtu);
2736        } while ((dst = dst->next));
2737}
2738
2739/* Check that the bundle accepts the flow and its components are
2740 * still valid.
2741 */
2742
2743static int xfrm_bundle_ok(struct xfrm_dst *first)
2744{
2745        struct dst_entry *dst = &first->u.dst;
2746        struct xfrm_dst *last;
2747        u32 mtu;
2748
2749        if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2750            (dst->dev && !netif_running(dst->dev)))
2751                return 0;
2752
2753        if (dst->flags & DST_XFRM_QUEUE)
2754                return 1;
2755
2756        last = NULL;
2757
2758        do {
2759                struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2760
2761                if (dst->xfrm->km.state != XFRM_STATE_VALID)
2762                        return 0;
2763                if (xdst->xfrm_genid != dst->xfrm->genid)
2764                        return 0;
2765                if (xdst->num_pols > 0 &&
2766                    xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2767                        return 0;
2768
2769                mtu = dst_mtu(dst->child);
2770                if (xdst->child_mtu_cached != mtu) {
2771                        last = xdst;
2772                        xdst->child_mtu_cached = mtu;
2773                }
2774
2775                if (!dst_check(xdst->route, xdst->route_cookie))
2776                        return 0;
2777                mtu = dst_mtu(xdst->route);
2778                if (xdst->route_mtu_cached != mtu) {
2779                        last = xdst;
2780                        xdst->route_mtu_cached = mtu;
2781                }
2782
2783                dst = dst->child;
2784        } while (dst->xfrm);
2785
2786        if (likely(!last))
2787                return 1;
2788
2789        mtu = last->child_mtu_cached;
2790        for (;;) {
2791                dst = &last->u.dst;
2792
2793                mtu = xfrm_state_mtu(dst->xfrm, mtu);
2794                if (mtu > last->route_mtu_cached)
2795                        mtu = last->route_mtu_cached;
2796                dst_metric_set(dst, RTAX_MTU, mtu);
2797
2798                if (last == first)
2799                        break;
2800
2801                last = (struct xfrm_dst *)last->u.dst.next;
2802                last->child_mtu_cached = mtu;
2803        }
2804
2805        return 1;
2806}
2807
2808static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
2809{
2810        return dst_metric_advmss(dst->path);
2811}
2812
2813static unsigned int xfrm_mtu(const struct dst_entry *dst)
2814{
2815        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2816
2817        return mtu ? : dst_mtu(dst->path);
2818}
2819
2820static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
2821                                           struct sk_buff *skb,
2822                                           const void *daddr)
2823{
2824        return dst->path->ops->neigh_lookup(dst, skb, daddr);
2825}
2826
2827int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2828{
2829        int err = 0;
2830        if (unlikely(afinfo == NULL))
2831                return -EINVAL;
2832        if (unlikely(afinfo->family >= NPROTO))
2833                return -EAFNOSUPPORT;
2834        spin_lock(&xfrm_policy_afinfo_lock);
2835        if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2836                err = -EEXIST;
2837        else {
2838                struct dst_ops *dst_ops = afinfo->dst_ops;
2839                if (likely(dst_ops->kmem_cachep == NULL))
2840                        dst_ops->kmem_cachep = xfrm_dst_cache;
2841                if (likely(dst_ops->check == NULL))
2842                        dst_ops->check = xfrm_dst_check;
2843                if (likely(dst_ops->default_advmss == NULL))
2844                        dst_ops->default_advmss = xfrm_default_advmss;
2845                if (likely(dst_ops->mtu == NULL))
2846                        dst_ops->mtu = xfrm_mtu;
2847                if (likely(dst_ops->negative_advice == NULL))
2848                        dst_ops->negative_advice = xfrm_negative_advice;
2849                if (likely(dst_ops->link_failure == NULL))
2850                        dst_ops->link_failure = xfrm_link_failure;
2851                if (likely(dst_ops->neigh_lookup == NULL))
2852                        dst_ops->neigh_lookup = xfrm_neigh_lookup;
2853                if (likely(afinfo->garbage_collect == NULL))
2854                        afinfo->garbage_collect = xfrm_garbage_collect_deferred;
2855                rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo);
2856        }
2857        spin_unlock(&xfrm_policy_afinfo_lock);
2858
2859        return err;
2860}
2861EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2862
2863int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2864{
2865        int err = 0;
2866        if (unlikely(afinfo == NULL))
2867                return -EINVAL;
2868        if (unlikely(afinfo->family >= NPROTO))
2869                return -EAFNOSUPPORT;
2870        spin_lock(&xfrm_policy_afinfo_lock);
2871        if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2872                if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2873                        err = -EINVAL;
2874                else
2875                        RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family],
2876                                         NULL);
2877        }
2878        spin_unlock(&xfrm_policy_afinfo_lock);
2879        if (!err) {
2880                struct dst_ops *dst_ops = afinfo->dst_ops;
2881
2882                synchronize_rcu();
2883
2884                dst_ops->kmem_cachep = NULL;
2885                dst_ops->check = NULL;
2886                dst_ops->negative_advice = NULL;
2887                dst_ops->link_failure = NULL;
2888                afinfo->garbage_collect = NULL;
2889        }
2890        return err;
2891}
2892EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2893
2894static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2895{
2896        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2897
2898        switch (event) {
2899        case NETDEV_DOWN:
2900                xfrm_garbage_collect(dev_net(dev));
2901        }
2902        return NOTIFY_DONE;
2903}
2904
2905static struct notifier_block xfrm_dev_notifier = {
2906        .notifier_call  = xfrm_dev_event,
2907};
2908
2909#ifdef CONFIG_XFRM_STATISTICS
2910static int __net_init xfrm_statistics_init(struct net *net)
2911{
2912        int rv;
2913        net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib);
2914        if (!net->mib.xfrm_statistics)
2915                return -ENOMEM;
2916        rv = xfrm_proc_init(net);
2917        if (rv < 0)
2918                free_percpu(net->mib.xfrm_statistics);
2919        return rv;
2920}
2921
2922static void xfrm_statistics_fini(struct net *net)
2923{
2924        xfrm_proc_fini(net);
2925        free_percpu(net->mib.xfrm_statistics);
2926}
2927#else
2928static int __net_init xfrm_statistics_init(struct net *net)
2929{
2930        return 0;
2931}
2932
2933static void xfrm_statistics_fini(struct net *net)
2934{
2935}
2936#endif
2937
2938static int __net_init xfrm_policy_init(struct net *net)
2939{
2940        unsigned int hmask, sz;
2941        int dir;
2942
2943        if (net_eq(net, &init_net))
2944                xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2945                                           sizeof(struct xfrm_dst),
2946                                           0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2947                                           NULL);
2948
2949        hmask = 8 - 1;
2950        sz = (hmask+1) * sizeof(struct hlist_head);
2951
2952        net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
2953        if (!net->xfrm.policy_byidx)
2954                goto out_byidx;
2955        net->xfrm.policy_idx_hmask = hmask;
2956
2957        for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
2958                struct xfrm_policy_hash *htab;
2959
2960                net->xfrm.policy_count[dir] = 0;
2961                net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0;
2962                INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
2963
2964                htab = &net->xfrm.policy_bydst[dir];
2965                htab->table = xfrm_hash_alloc(sz);
2966                if (!htab->table)
2967                        goto out_bydst;
2968                htab->hmask = hmask;
2969                htab->dbits4 = 32;
2970                htab->sbits4 = 32;
2971                htab->dbits6 = 128;
2972                htab->sbits6 = 128;
2973        }
2974        net->xfrm.policy_hthresh.lbits4 = 32;
2975        net->xfrm.policy_hthresh.rbits4 = 32;
2976        net->xfrm.policy_hthresh.lbits6 = 128;
2977        net->xfrm.policy_hthresh.rbits6 = 128;
2978
2979        seqlock_init(&net->xfrm.policy_hthresh.lock);
2980
2981        INIT_LIST_HEAD(&net->xfrm.policy_all);
2982        INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
2983        INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
2984        if (net_eq(net, &init_net))
2985                register_netdevice_notifier(&xfrm_dev_notifier);
2986        return 0;
2987
2988out_bydst:
2989        for (dir--; dir >= 0; dir--) {
2990                struct xfrm_policy_hash *htab;
2991
2992                htab = &net->xfrm.policy_bydst[dir];
2993                xfrm_hash_free(htab->table, sz);
2994        }
2995        xfrm_hash_free(net->xfrm.policy_byidx, sz);
2996out_byidx:
2997        return -ENOMEM;
2998}
2999
3000static void xfrm_policy_fini(struct net *net)
3001{
3002        unsigned int sz;
3003        int dir;
3004
3005        flush_work(&net->xfrm.policy_hash_work);
3006#ifdef CONFIG_XFRM_SUB_POLICY
3007        xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
3008#endif
3009        xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
3010
3011        WARN_ON(!list_empty(&net->xfrm.policy_all));
3012
3013        for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
3014                struct xfrm_policy_hash *htab;
3015
3016                WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
3017
3018                htab = &net->xfrm.policy_bydst[dir];
3019                sz = (htab->hmask + 1) * sizeof(struct hlist_head);
3020                WARN_ON(!hlist_empty(htab->table));
3021                xfrm_hash_free(htab->table, sz);
3022        }
3023
3024        sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
3025        WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
3026        xfrm_hash_free(net->xfrm.policy_byidx, sz);
3027}
3028
3029static int __net_init xfrm_net_init(struct net *net)
3030{
3031        int rv;
3032
3033        rv = xfrm_statistics_init(net);
3034        if (rv < 0)
3035                goto out_statistics;
3036        rv = xfrm_state_init(net);
3037        if (rv < 0)
3038                goto out_state;
3039        rv = xfrm_policy_init(net);
3040        if (rv < 0)
3041                goto out_policy;
3042        rv = xfrm_sysctl_init(net);
3043        if (rv < 0)
3044                goto out_sysctl;
3045        rv = flow_cache_init(net);
3046        if (rv < 0)
3047                goto out;
3048
3049        /* Initialize the per-net locks here */
3050        spin_lock_init(&net->xfrm.xfrm_state_lock);
3051        rwlock_init(&net->xfrm.xfrm_policy_lock);
3052        mutex_init(&net->xfrm.xfrm_cfg_mutex);
3053
3054        return 0;
3055
3056out:
3057        xfrm_sysctl_fini(net);
3058out_sysctl:
3059        xfrm_policy_fini(net);
3060out_policy:
3061        xfrm_state_fini(net);
3062out_state:
3063        xfrm_statistics_fini(net);
3064out_statistics:
3065        return rv;
3066}
3067
3068static void __net_exit xfrm_net_exit(struct net *net)
3069{
3070        flow_cache_fini(net);
3071        xfrm_sysctl_fini(net);
3072        xfrm_policy_fini(net);
3073        xfrm_state_fini(net);
3074        xfrm_statistics_fini(net);
3075}
3076
3077static struct pernet_operations __net_initdata xfrm_net_ops = {
3078        .init = xfrm_net_init,
3079        .exit = xfrm_net_exit,
3080};
3081
3082void __init xfrm_init(void)
3083{
3084        register_pernet_subsys(&xfrm_net_ops);
3085        xfrm_input_init();
3086}
3087
3088#ifdef CONFIG_AUDITSYSCALL
3089static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
3090                                         struct audit_buffer *audit_buf)
3091{
3092        struct xfrm_sec_ctx *ctx = xp->security;
3093        struct xfrm_selector *sel = &xp->selector;
3094
3095        if (ctx)
3096                audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
3097                                 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
3098
3099        switch (sel->family) {
3100        case AF_INET:
3101                audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
3102                if (sel->prefixlen_s != 32)
3103                        audit_log_format(audit_buf, " src_prefixlen=%d",
3104                                         sel->prefixlen_s);
3105                audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
3106                if (sel->prefixlen_d != 32)
3107                        audit_log_format(audit_buf, " dst_prefixlen=%d",
3108                                         sel->prefixlen_d);
3109                break;
3110        case AF_INET6:
3111                audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
3112                if (sel->prefixlen_s != 128)
3113                        audit_log_format(audit_buf, " src_prefixlen=%d",
3114                                         sel->prefixlen_s);
3115                audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
3116                if (sel->prefixlen_d != 128)
3117                        audit_log_format(audit_buf, " dst_prefixlen=%d",
3118                                         sel->prefixlen_d);
3119                break;
3120        }
3121}
3122
3123void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid)
3124{
3125        struct audit_buffer *audit_buf;
3126
3127        audit_buf = xfrm_audit_start("SPD-add");
3128        if (audit_buf == NULL)
3129                return;
3130        xfrm_audit_helper_usrinfo(task_valid, audit_buf);
3131        audit_log_format(audit_buf, " res=%u", result);
3132        xfrm_audit_common_policyinfo(xp, audit_buf);
3133        audit_log_end(audit_buf);
3134}
3135EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
3136
3137void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
3138                              bool task_valid)
3139{
3140        struct audit_buffer *audit_buf;
3141
3142        audit_buf = xfrm_audit_start("SPD-delete");
3143        if (audit_buf == NULL)
3144                return;
3145        xfrm_audit_helper_usrinfo(task_valid, audit_buf);
3146        audit_log_format(audit_buf, " res=%u", result);
3147        xfrm_audit_common_policyinfo(xp, audit_buf);
3148        audit_log_end(audit_buf);
3149}
3150EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
3151#endif
3152
3153#ifdef CONFIG_XFRM_MIGRATE
3154static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
3155                                        const struct xfrm_selector *sel_tgt)
3156{
3157        if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
3158                if (sel_tgt->family == sel_cmp->family &&
3159                    xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
3160                                    sel_cmp->family) &&
3161                    xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
3162                                    sel_cmp->family) &&
3163                    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
3164                    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
3165                        return true;
3166                }
3167        } else {
3168                if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
3169                        return true;
3170                }
3171        }
3172        return false;
3173}
3174
3175static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
3176                                                    u8 dir, u8 type, struct net *net)
3177{
3178        struct xfrm_policy *pol, *ret = NULL;
3179        struct hlist_head *chain;
3180        u32 priority = ~0U;
3181
3182        read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/
3183        chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
3184        hlist_for_each_entry(pol, chain, bydst) {
3185                if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3186                    pol->type == type) {
3187                        ret = pol;
3188                        priority = ret->priority;
3189                        break;
3190                }
3191        }
3192        chain = &net->xfrm.policy_inexact[dir];
3193        hlist_for_each_entry(pol, chain, bydst) {
3194                if ((pol->priority >= priority) && ret)
3195                        break;
3196
3197                if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3198                    pol->type == type) {
3199                        ret = pol;
3200                        break;
3201                }
3202        }
3203
3204        xfrm_pol_hold(ret);
3205
3206        read_unlock_bh(&net->xfrm.xfrm_policy_lock);
3207
3208        return ret;
3209}
3210
3211static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
3212{
3213        int match = 0;
3214
3215        if (t->mode == m->mode && t->id.proto == m->proto &&
3216            (m->reqid == 0 || t->reqid == m->reqid)) {
3217                switch (t->mode) {
3218                case XFRM_MODE_TUNNEL:
3219                case XFRM_MODE_BEET:
3220                        if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
3221                                            m->old_family) &&
3222                            xfrm_addr_equal(&t->saddr, &m->old_saddr,
3223                                            m->old_family)) {
3224                                match = 1;
3225                        }
3226                        break;
3227                case XFRM_MODE_TRANSPORT:
3228                        /* in case of transport mode, template does not store
3229                           any IP addresses, hence we just compare mode and
3230                           protocol */
3231                        match = 1;
3232                        break;
3233                default:
3234                        break;
3235                }
3236        }
3237        return match;
3238}
3239
3240/* update endpoint address(es) of template(s) */
3241static int xfrm_policy_migrate(struct xfrm_policy *pol,
3242                               struct xfrm_migrate *m, int num_migrate)
3243{
3244        struct xfrm_migrate *mp;
3245        int i, j, n = 0;
3246
3247        write_lock_bh(&pol->lock);
3248        if (unlikely(pol->walk.dead)) {
3249                /* target policy has been deleted */
3250                write_unlock_bh(&pol->lock);
3251                return -ENOENT;
3252        }
3253
3254        for (i = 0; i < pol->xfrm_nr; i++) {
3255                for (j = 0, mp = m; j < num_migrate; j++, mp++) {
3256                        if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
3257                                continue;
3258                        n++;
3259                        if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
3260                            pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
3261                                continue;
3262                        /* update endpoints */
3263                        memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
3264                               sizeof(pol->xfrm_vec[i].id.daddr));
3265                        memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
3266                               sizeof(pol->xfrm_vec[i].saddr));
3267                        pol->xfrm_vec[i].encap_family = mp->new_family;
3268                        /* flush bundles */
3269                        atomic_inc(&pol->genid);
3270                }
3271        }
3272
3273        write_unlock_bh(&pol->lock);
3274
3275        if (!n)
3276                return -ENODATA;
3277
3278        return 0;
3279}
3280
3281static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
3282{
3283        int i, j;
3284
3285        if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
3286                return -EINVAL;
3287
3288        for (i = 0; i < num_migrate; i++) {
3289                if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr,
3290                                    m[i].old_family) &&
3291                    xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr,
3292                                    m[i].old_family))
3293                        return -EINVAL;
3294                if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
3295                    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
3296                        return -EINVAL;
3297
3298                /* check if there is any duplicated entry */
3299                for (j = i + 1; j < num_migrate; j++) {
3300                        if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
3301                                    sizeof(m[i].old_daddr)) &&
3302                            !memcmp(&m[i].old_saddr, &m[j].old_saddr,
3303                                    sizeof(m[i].old_saddr)) &&
3304                            m[i].proto == m[j].proto &&
3305                            m[i].mode == m[j].mode &&
3306                            m[i].reqid == m[j].reqid &&
3307                            m[i].old_family == m[j].old_family)
3308                                return -EINVAL;
3309                }
3310        }
3311
3312        return 0;
3313}
3314
3315int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
3316                 struct xfrm_migrate *m, int num_migrate,
3317                 struct xfrm_kmaddress *k, struct net *net)
3318{
3319        int i, err, nx_cur = 0, nx_new = 0;
3320        struct xfrm_policy *pol = NULL;
3321        struct xfrm_state *x, *xc;
3322        struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
3323        struct xfrm_state *x_new[XFRM_MAX_DEPTH];
3324        struct xfrm_migrate *mp;
3325
3326        if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
3327                goto out;
3328
3329        /* Stage 1 - find policy */
3330        if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
3331                err = -ENOENT;
3332                goto out;
3333        }
3334
3335        /* Stage 2 - find and update state(s) */
3336        for (i = 0, mp = m; i < num_migrate; i++, mp++) {
3337                if ((x = xfrm_migrate_state_find(mp, net))) {
3338                        x_cur[nx_cur] = x;
3339                        nx_cur++;
3340                        if ((xc = xfrm_state_migrate(x, mp))) {
3341                                x_new[nx_new] = xc;
3342                                nx_new++;
3343                        } else {
3344                                err = -ENODATA;
3345                                goto restore_state;
3346                        }
3347                }
3348        }
3349
3350        /* Stage 3 - update policy */
3351        if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
3352                goto restore_state;
3353
3354        /* Stage 4 - delete old state(s) */
3355        if (nx_cur) {
3356                xfrm_states_put(x_cur, nx_cur);
3357                xfrm_states_delete(x_cur, nx_cur);
3358        }
3359
3360        /* Stage 5 - announce */
3361        km_migrate(sel, dir, type, m, num_migrate, k);
3362
3363        xfrm_pol_put(pol);
3364
3365        return 0;
3366out:
3367        return err;
3368
3369restore_state:
3370        if (pol)
3371                xfrm_pol_put(pol);
3372        if (nx_cur)
3373                xfrm_states_put(x_cur, nx_cur);
3374        if (nx_new)
3375                xfrm_states_delete(x_new, nx_new);
3376
3377        return err;
3378}
3379EXPORT_SYMBOL(xfrm_migrate);
3380#endif
3381