linux/net/ipv4/route.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              ROUTE - implementation of the IP router.
   7 *
   8 * Authors:     Ross Biro
   9 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  10 *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  11 *              Linus Torvalds, <Linus.Torvalds@helsinki.fi>
  12 *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  13 *
  14 * Fixes:
  15 *              Alan Cox        :       Verify area fixes.
  16 *              Alan Cox        :       cli() protects routing changes
  17 *              Rui Oliveira    :       ICMP routing table updates
  18 *              (rco@di.uminho.pt)      Routing table insertion and update
  19 *              Linus Torvalds  :       Rewrote bits to be sensible
  20 *              Alan Cox        :       Added BSD route gw semantics
  21 *              Alan Cox        :       Super /proc >4K
  22 *              Alan Cox        :       MTU in route table
  23 *              Alan Cox        :       MSS actually. Also added the window
  24 *                                      clamper.
  25 *              Sam Lantinga    :       Fixed route matching in rt_del()
  26 *              Alan Cox        :       Routing cache support.
  27 *              Alan Cox        :       Removed compatibility cruft.
  28 *              Alan Cox        :       RTF_REJECT support.
  29 *              Alan Cox        :       TCP irtt support.
  30 *              Jonathan Naylor :       Added Metric support.
  31 *      Miquel van Smoorenburg  :       BSD API fixes.
  32 *      Miquel van Smoorenburg  :       Metrics.
  33 *              Alan Cox        :       Use __u32 properly
  34 *              Alan Cox        :       Aligned routing errors more closely with BSD
  35 *                                      our system is still very different.
  36 *              Alan Cox        :       Faster /proc handling
  37 *      Alexey Kuznetsov        :       Massive rework to support tree based routing,
  38 *                                      routing caches and better behaviour.
  39 *
  40 *              Olaf Erb        :       irtt wasn't being copied right.
  41 *              Bjorn Ekwall    :       Kerneld route support.
  42 *              Alan Cox        :       Multicast fixed (I hope)
  43 *              Pavel Krauz     :       Limited broadcast fixed
  44 *              Mike McLagan    :       Routing by source
  45 *      Alexey Kuznetsov        :       End of old history. Split to fib.c and
  46 *                                      route.c and rewritten from scratch.
  47 *              Andi Kleen      :       Load-limit warning messages.
  48 *      Vitaly E. Lavrov        :       Transparent proxy revived after year coma.
  49 *      Vitaly E. Lavrov        :       Race condition in ip_route_input_slow.
  50 *      Tobias Ringstrom        :       Uninitialized res.type in ip_route_output_slow.
  51 *      Vladimir V. Ivanov      :       IP rule info (flowid) is really useful.
  52 *              Marc Boucher    :       routing by fwmark
  53 *      Robert Olsson           :       Added rt_cache statistics
  54 *      Arnaldo C. Melo         :       Convert proc stuff to seq_file
  55 *      Eric Dumazet            :       hashed spinlocks and rt_check_expire() fixes.
  56 *      Ilia Sotnikov           :       Ignore TOS on PMTUD and Redirect
  57 *      Ilia Sotnikov           :       Removed TOS from hash calculations
  58 *
  59 *              This program is free software; you can redistribute it and/or
  60 *              modify it under the terms of the GNU General Public License
  61 *              as published by the Free Software Foundation; either version
  62 *              2 of the License, or (at your option) any later version.
  63 */
  64
  65#define pr_fmt(fmt) "IPv4: " fmt
  66
  67#include <linux/module.h>
  68#include <linux/uaccess.h>
  69#include <linux/bitops.h>
  70#include <linux/types.h>
  71#include <linux/kernel.h>
  72#include <linux/mm.h>
  73#include <linux/string.h>
  74#include <linux/socket.h>
  75#include <linux/sockios.h>
  76#include <linux/errno.h>
  77#include <linux/in.h>
  78#include <linux/inet.h>
  79#include <linux/netdevice.h>
  80#include <linux/proc_fs.h>
  81#include <linux/init.h>
  82#include <linux/skbuff.h>
  83#include <linux/inetdevice.h>
  84#include <linux/igmp.h>
  85#include <linux/pkt_sched.h>
  86#include <linux/mroute.h>
  87#include <linux/netfilter_ipv4.h>
  88#include <linux/random.h>
  89#include <linux/rcupdate.h>
  90#include <linux/times.h>
  91#include <linux/slab.h>
  92#include <linux/jhash.h>
  93#include <net/dst.h>
  94#include <net/dst_metadata.h>
  95#include <net/net_namespace.h>
  96#include <net/protocol.h>
  97#include <net/ip.h>
  98#include <net/route.h>
  99#include <net/inetpeer.h>
 100#include <net/sock.h>
 101#include <net/ip_fib.h>
 102#include <net/arp.h>
 103#include <net/tcp.h>
 104#include <net/icmp.h>
 105#include <net/xfrm.h>
 106#include <net/lwtunnel.h>
 107#include <net/netevent.h>
 108#include <net/rtnetlink.h>
 109#ifdef CONFIG_SYSCTL
 110#include <linux/sysctl.h>
 111#include <linux/kmemleak.h>
 112#endif
 113#include <net/secure_seq.h>
 114#include <net/ip_tunnels.h>
 115#include <net/l3mdev.h>
 116
 117#include "fib_lookup.h"
 118
 119#define RT_FL_TOS(oldflp4) \
 120        ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
 121
 122#define RT_GC_TIMEOUT (300*HZ)
 123
 124static int ip_rt_max_size;
 125static int ip_rt_redirect_number __read_mostly  = 9;
 126static int ip_rt_redirect_load __read_mostly    = HZ / 50;
 127static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
 128static int ip_rt_error_cost __read_mostly       = HZ;
 129static int ip_rt_error_burst __read_mostly      = 5 * HZ;
 130static int ip_rt_mtu_expires __read_mostly      = 10 * 60 * HZ;
 131static int ip_rt_min_pmtu __read_mostly         = 512 + 20 + 20;
 132static int ip_rt_min_advmss __read_mostly       = 256;
 133
 134static int ip_rt_gc_timeout __read_mostly       = RT_GC_TIMEOUT;
 135/*
 136 *      Interface to generic destination cache.
 137 */
 138
 139static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
 140static unsigned int      ipv4_default_advmss(const struct dst_entry *dst);
 141static unsigned int      ipv4_mtu(const struct dst_entry *dst);
 142static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 143static void              ipv4_link_failure(struct sk_buff *skb);
 144static void              ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 145                                           struct sk_buff *skb, u32 mtu);
 146static void              ip_do_redirect(struct dst_entry *dst, struct sock *sk,
 147                                        struct sk_buff *skb);
 148static void             ipv4_dst_destroy(struct dst_entry *dst);
 149
 150static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
 151{
 152        WARN_ON(1);
 153        return NULL;
 154}
 155
 156static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
 157                                           struct sk_buff *skb,
 158                                           const void *daddr);
 159static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr);
 160
 161static struct dst_ops ipv4_dst_ops = {
 162        .family =               AF_INET,
 163        .check =                ipv4_dst_check,
 164        .default_advmss =       ipv4_default_advmss,
 165        .mtu =                  ipv4_mtu,
 166        .cow_metrics =          ipv4_cow_metrics,
 167        .destroy =              ipv4_dst_destroy,
 168        .negative_advice =      ipv4_negative_advice,
 169        .link_failure =         ipv4_link_failure,
 170        .update_pmtu =          ip_rt_update_pmtu,
 171        .redirect =             ip_do_redirect,
 172        .local_out =            __ip_local_out,
 173        .neigh_lookup =         ipv4_neigh_lookup,
 174        .confirm_neigh =        ipv4_confirm_neigh,
 175};
 176
 177#define ECN_OR_COST(class)      TC_PRIO_##class
 178
 179const __u8 ip_tos2prio[16] = {
 180        TC_PRIO_BESTEFFORT,
 181        ECN_OR_COST(BESTEFFORT),
 182        TC_PRIO_BESTEFFORT,
 183        ECN_OR_COST(BESTEFFORT),
 184        TC_PRIO_BULK,
 185        ECN_OR_COST(BULK),
 186        TC_PRIO_BULK,
 187        ECN_OR_COST(BULK),
 188        TC_PRIO_INTERACTIVE,
 189        ECN_OR_COST(INTERACTIVE),
 190        TC_PRIO_INTERACTIVE,
 191        ECN_OR_COST(INTERACTIVE),
 192        TC_PRIO_INTERACTIVE_BULK,
 193        ECN_OR_COST(INTERACTIVE_BULK),
 194        TC_PRIO_INTERACTIVE_BULK,
 195        ECN_OR_COST(INTERACTIVE_BULK)
 196};
 197EXPORT_SYMBOL(ip_tos2prio);
 198
 199static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
 200#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
 201
 202#ifdef CONFIG_PROC_FS
 203static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
 204{
 205        if (*pos)
 206                return NULL;
 207        return SEQ_START_TOKEN;
 208}
 209
 210static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 211{
 212        ++*pos;
 213        return NULL;
 214}
 215
 216static void rt_cache_seq_stop(struct seq_file *seq, void *v)
 217{
 218}
 219
 220static int rt_cache_seq_show(struct seq_file *seq, void *v)
 221{
 222        if (v == SEQ_START_TOKEN)
 223                seq_printf(seq, "%-127s\n",
 224                           "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
 225                           "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
 226                           "HHUptod\tSpecDst");
 227        return 0;
 228}
 229
 230static const struct seq_operations rt_cache_seq_ops = {
 231        .start  = rt_cache_seq_start,
 232        .next   = rt_cache_seq_next,
 233        .stop   = rt_cache_seq_stop,
 234        .show   = rt_cache_seq_show,
 235};
 236
 237static int rt_cache_seq_open(struct inode *inode, struct file *file)
 238{
 239        return seq_open(file, &rt_cache_seq_ops);
 240}
 241
 242static const struct file_operations rt_cache_seq_fops = {
 243        .owner   = THIS_MODULE,
 244        .open    = rt_cache_seq_open,
 245        .read    = seq_read,
 246        .llseek  = seq_lseek,
 247        .release = seq_release,
 248};
 249
 250
 251static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 252{
 253        int cpu;
 254
 255        if (*pos == 0)
 256                return SEQ_START_TOKEN;
 257
 258        for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
 259                if (!cpu_possible(cpu))
 260                        continue;
 261                *pos = cpu+1;
 262                return &per_cpu(rt_cache_stat, cpu);
 263        }
 264        return NULL;
 265}
 266
 267static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 268{
 269        int cpu;
 270
 271        for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
 272                if (!cpu_possible(cpu))
 273                        continue;
 274                *pos = cpu+1;
 275                return &per_cpu(rt_cache_stat, cpu);
 276        }
 277        return NULL;
 278
 279}
 280
 281static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
 282{
 283
 284}
 285
 286static int rt_cpu_seq_show(struct seq_file *seq, void *v)
 287{
 288        struct rt_cache_stat *st = v;
 289
 290        if (v == SEQ_START_TOKEN) {
 291                seq_printf(seq, "entries  in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src  out_hit out_slow_tot out_slow_mc  gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
 292                return 0;
 293        }
 294
 295        seq_printf(seq,"%08x  %08x %08x %08x %08x %08x %08x %08x "
 296                   " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
 297                   dst_entries_get_slow(&ipv4_dst_ops),
 298                   0, /* st->in_hit */
 299                   st->in_slow_tot,
 300                   st->in_slow_mc,
 301                   st->in_no_route,
 302                   st->in_brd,
 303                   st->in_martian_dst,
 304                   st->in_martian_src,
 305
 306                   0, /* st->out_hit */
 307                   st->out_slow_tot,
 308                   st->out_slow_mc,
 309
 310                   0, /* st->gc_total */
 311                   0, /* st->gc_ignored */
 312                   0, /* st->gc_goal_miss */
 313                   0, /* st->gc_dst_overflow */
 314                   0, /* st->in_hlist_search */
 315                   0  /* st->out_hlist_search */
 316                );
 317        return 0;
 318}
 319
 320static const struct seq_operations rt_cpu_seq_ops = {
 321        .start  = rt_cpu_seq_start,
 322        .next   = rt_cpu_seq_next,
 323        .stop   = rt_cpu_seq_stop,
 324        .show   = rt_cpu_seq_show,
 325};
 326
 327
 328static int rt_cpu_seq_open(struct inode *inode, struct file *file)
 329{
 330        return seq_open(file, &rt_cpu_seq_ops);
 331}
 332
 333static const struct file_operations rt_cpu_seq_fops = {
 334        .owner   = THIS_MODULE,
 335        .open    = rt_cpu_seq_open,
 336        .read    = seq_read,
 337        .llseek  = seq_lseek,
 338        .release = seq_release,
 339};
 340
 341#ifdef CONFIG_IP_ROUTE_CLASSID
 342static int rt_acct_proc_show(struct seq_file *m, void *v)
 343{
 344        struct ip_rt_acct *dst, *src;
 345        unsigned int i, j;
 346
 347        dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
 348        if (!dst)
 349                return -ENOMEM;
 350
 351        for_each_possible_cpu(i) {
 352                src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
 353                for (j = 0; j < 256; j++) {
 354                        dst[j].o_bytes   += src[j].o_bytes;
 355                        dst[j].o_packets += src[j].o_packets;
 356                        dst[j].i_bytes   += src[j].i_bytes;
 357                        dst[j].i_packets += src[j].i_packets;
 358                }
 359        }
 360
 361        seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
 362        kfree(dst);
 363        return 0;
 364}
 365
 366static int rt_acct_proc_open(struct inode *inode, struct file *file)
 367{
 368        return single_open(file, rt_acct_proc_show, NULL);
 369}
 370
 371static const struct file_operations rt_acct_proc_fops = {
 372        .owner          = THIS_MODULE,
 373        .open           = rt_acct_proc_open,
 374        .read           = seq_read,
 375        .llseek         = seq_lseek,
 376        .release        = single_release,
 377};
 378#endif
 379
 380static int __net_init ip_rt_do_proc_init(struct net *net)
 381{
 382        struct proc_dir_entry *pde;
 383
 384        pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
 385                          &rt_cache_seq_fops);
 386        if (!pde)
 387                goto err1;
 388
 389        pde = proc_create("rt_cache", S_IRUGO,
 390                          net->proc_net_stat, &rt_cpu_seq_fops);
 391        if (!pde)
 392                goto err2;
 393
 394#ifdef CONFIG_IP_ROUTE_CLASSID
 395        pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
 396        if (!pde)
 397                goto err3;
 398#endif
 399        return 0;
 400
 401#ifdef CONFIG_IP_ROUTE_CLASSID
 402err3:
 403        remove_proc_entry("rt_cache", net->proc_net_stat);
 404#endif
 405err2:
 406        remove_proc_entry("rt_cache", net->proc_net);
 407err1:
 408        return -ENOMEM;
 409}
 410
 411static void __net_exit ip_rt_do_proc_exit(struct net *net)
 412{
 413        remove_proc_entry("rt_cache", net->proc_net_stat);
 414        remove_proc_entry("rt_cache", net->proc_net);
 415#ifdef CONFIG_IP_ROUTE_CLASSID
 416        remove_proc_entry("rt_acct", net->proc_net);
 417#endif
 418}
 419
 420static struct pernet_operations ip_rt_proc_ops __net_initdata =  {
 421        .init = ip_rt_do_proc_init,
 422        .exit = ip_rt_do_proc_exit,
 423};
 424
 425static int __init ip_rt_proc_init(void)
 426{
 427        return register_pernet_subsys(&ip_rt_proc_ops);
 428}
 429
 430#else
 431static inline int ip_rt_proc_init(void)
 432{
 433        return 0;
 434}
 435#endif /* CONFIG_PROC_FS */
 436
 437static inline bool rt_is_expired(const struct rtable *rth)
 438{
 439        return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
 440}
 441
 442void rt_cache_flush(struct net *net)
 443{
 444        rt_genid_bump_ipv4(net);
 445}
 446
 447static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
 448                                           struct sk_buff *skb,
 449                                           const void *daddr)
 450{
 451        struct net_device *dev = dst->dev;
 452        const __be32 *pkey = daddr;
 453        const struct rtable *rt;
 454        struct neighbour *n;
 455
 456        rt = (const struct rtable *) dst;
 457        if (rt->rt_gateway)
 458                pkey = (const __be32 *) &rt->rt_gateway;
 459        else if (skb)
 460                pkey = &ip_hdr(skb)->daddr;
 461
 462        n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
 463        if (n)
 464                return n;
 465        return neigh_create(&arp_tbl, pkey, dev);
 466}
 467
 468static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 469{
 470        struct net_device *dev = dst->dev;
 471        const __be32 *pkey = daddr;
 472        const struct rtable *rt;
 473
 474        rt = (const struct rtable *)dst;
 475        if (rt->rt_gateway)
 476                pkey = (const __be32 *)&rt->rt_gateway;
 477        else if (!daddr ||
 478                 (rt->rt_flags &
 479                  (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL)))
 480                return;
 481
 482        __ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
 483}
 484
 485#define IP_IDENTS_SZ 2048u
 486
 487static atomic_t *ip_idents __read_mostly;
 488static u32 *ip_tstamps __read_mostly;
 489
 490/* In order to protect privacy, we add a perturbation to identifiers
 491 * if one generator is seldom used. This makes hard for an attacker
 492 * to infer how many packets were sent between two points in time.
 493 */
 494u32 ip_idents_reserve(u32 hash, int segs)
 495{
 496        u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
 497        atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
 498        u32 old = ACCESS_ONCE(*p_tstamp);
 499        u32 now = (u32)jiffies;
 500        u32 new, delta = 0;
 501
 502        if (old != now && cmpxchg(p_tstamp, old, now) == old)
 503                delta = prandom_u32_max(now - old);
 504
 505        /* Do not use atomic_add_return() as it makes UBSAN unhappy */
 506        do {
 507                old = (u32)atomic_read(p_id);
 508                new = old + delta + segs;
 509        } while (atomic_cmpxchg(p_id, old, new) != old);
 510
 511        return new - segs;
 512}
 513EXPORT_SYMBOL(ip_idents_reserve);
 514
 515void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
 516{
 517        static u32 ip_idents_hashrnd __read_mostly;
 518        u32 hash, id;
 519
 520        net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
 521
 522        hash = jhash_3words((__force u32)iph->daddr,
 523                            (__force u32)iph->saddr,
 524                            iph->protocol ^ net_hash_mix(net),
 525                            ip_idents_hashrnd);
 526        id = ip_idents_reserve(hash, segs);
 527        iph->id = htons(id);
 528}
 529EXPORT_SYMBOL(__ip_select_ident);
 530
 531static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
 532                             const struct sock *sk,
 533                             const struct iphdr *iph,
 534                             int oif, u8 tos,
 535                             u8 prot, u32 mark, int flow_flags)
 536{
 537        if (sk) {
 538                const struct inet_sock *inet = inet_sk(sk);
 539
 540                oif = sk->sk_bound_dev_if;
 541                mark = sk->sk_mark;
 542                tos = RT_CONN_FLAGS(sk);
 543                prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
 544        }
 545        flowi4_init_output(fl4, oif, mark, tos,
 546                           RT_SCOPE_UNIVERSE, prot,
 547                           flow_flags,
 548                           iph->daddr, iph->saddr, 0, 0,
 549                           sock_net_uid(net, sk));
 550}
 551
 552static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
 553                               const struct sock *sk)
 554{
 555        const struct net *net = dev_net(skb->dev);
 556        const struct iphdr *iph = ip_hdr(skb);
 557        int oif = skb->dev->ifindex;
 558        u8 tos = RT_TOS(iph->tos);
 559        u8 prot = iph->protocol;
 560        u32 mark = skb->mark;
 561
 562        __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
 563}
 564
 565static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
 566{
 567        const struct inet_sock *inet = inet_sk(sk);
 568        const struct ip_options_rcu *inet_opt;
 569        __be32 daddr = inet->inet_daddr;
 570
 571        rcu_read_lock();
 572        inet_opt = rcu_dereference(inet->inet_opt);
 573        if (inet_opt && inet_opt->opt.srr)
 574                daddr = inet_opt->opt.faddr;
 575        flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
 576                           RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 577                           inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
 578                           inet_sk_flowi_flags(sk),
 579                           daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
 580        rcu_read_unlock();
 581}
 582
 583static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
 584                                 const struct sk_buff *skb)
 585{
 586        if (skb)
 587                build_skb_flow_key(fl4, skb, sk);
 588        else
 589                build_sk_flow_key(fl4, sk);
 590}
 591
 592static DEFINE_SPINLOCK(fnhe_lock);
 593
 594static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
 595{
 596        struct rtable *rt;
 597
 598        rt = rcu_dereference(fnhe->fnhe_rth_input);
 599        if (rt) {
 600                RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
 601                dst_dev_put(&rt->dst);
 602                dst_release(&rt->dst);
 603        }
 604        rt = rcu_dereference(fnhe->fnhe_rth_output);
 605        if (rt) {
 606                RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
 607                dst_dev_put(&rt->dst);
 608                dst_release(&rt->dst);
 609        }
 610}
 611
 612static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
 613{
 614        struct fib_nh_exception *fnhe, *oldest;
 615
 616        oldest = rcu_dereference(hash->chain);
 617        for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
 618             fnhe = rcu_dereference(fnhe->fnhe_next)) {
 619                if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
 620                        oldest = fnhe;
 621        }
 622        fnhe_flush_routes(oldest);
 623        return oldest;
 624}
 625
 626static inline u32 fnhe_hashfun(__be32 daddr)
 627{
 628        static u32 fnhe_hashrnd __read_mostly;
 629        u32 hval;
 630
 631        net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
 632        hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
 633        return hash_32(hval, FNHE_HASH_SHIFT);
 634}
 635
 636static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
 637{
 638        rt->rt_pmtu = fnhe->fnhe_pmtu;
 639        rt->dst.expires = fnhe->fnhe_expires;
 640
 641        if (fnhe->fnhe_gw) {
 642                rt->rt_flags |= RTCF_REDIRECTED;
 643                rt->rt_gateway = fnhe->fnhe_gw;
 644                rt->rt_uses_gateway = 1;
 645        }
 646}
 647
 648static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 649                                  u32 pmtu, unsigned long expires)
 650{
 651        struct fnhe_hash_bucket *hash;
 652        struct fib_nh_exception *fnhe;
 653        struct rtable *rt;
 654        unsigned int i;
 655        int depth;
 656        u32 hval = fnhe_hashfun(daddr);
 657
 658        spin_lock_bh(&fnhe_lock);
 659
 660        hash = rcu_dereference(nh->nh_exceptions);
 661        if (!hash) {
 662                hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
 663                if (!hash)
 664                        goto out_unlock;
 665                rcu_assign_pointer(nh->nh_exceptions, hash);
 666        }
 667
 668        hash += hval;
 669
 670        depth = 0;
 671        for (fnhe = rcu_dereference(hash->chain); fnhe;
 672             fnhe = rcu_dereference(fnhe->fnhe_next)) {
 673                if (fnhe->fnhe_daddr == daddr)
 674                        break;
 675                depth++;
 676        }
 677
 678        if (fnhe) {
 679                if (gw)
 680                        fnhe->fnhe_gw = gw;
 681                if (pmtu) {
 682                        fnhe->fnhe_pmtu = pmtu;
 683                        fnhe->fnhe_expires = max(1UL, expires);
 684                }
 685                /* Update all cached dsts too */
 686                rt = rcu_dereference(fnhe->fnhe_rth_input);
 687                if (rt)
 688                        fill_route_from_fnhe(rt, fnhe);
 689                rt = rcu_dereference(fnhe->fnhe_rth_output);
 690                if (rt)
 691                        fill_route_from_fnhe(rt, fnhe);
 692        } else {
 693                if (depth > FNHE_RECLAIM_DEPTH)
 694                        fnhe = fnhe_oldest(hash);
 695                else {
 696                        fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
 697                        if (!fnhe)
 698                                goto out_unlock;
 699
 700                        fnhe->fnhe_next = hash->chain;
 701                        rcu_assign_pointer(hash->chain, fnhe);
 702                }
 703                fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
 704                fnhe->fnhe_daddr = daddr;
 705                fnhe->fnhe_gw = gw;
 706                fnhe->fnhe_pmtu = pmtu;
 707                fnhe->fnhe_expires = expires;
 708
 709                /* Exception created; mark the cached routes for the nexthop
 710                 * stale, so anyone caching it rechecks if this exception
 711                 * applies to them.
 712                 */
 713                rt = rcu_dereference(nh->nh_rth_input);
 714                if (rt)
 715                        rt->dst.obsolete = DST_OBSOLETE_KILL;
 716
 717                for_each_possible_cpu(i) {
 718                        struct rtable __rcu **prt;
 719                        prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
 720                        rt = rcu_dereference(*prt);
 721                        if (rt)
 722                                rt->dst.obsolete = DST_OBSOLETE_KILL;
 723                }
 724        }
 725
 726        fnhe->fnhe_stamp = jiffies;
 727
 728out_unlock:
 729        spin_unlock_bh(&fnhe_lock);
 730}
 731
 732static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
 733                             bool kill_route)
 734{
 735        __be32 new_gw = icmp_hdr(skb)->un.gateway;
 736        __be32 old_gw = ip_hdr(skb)->saddr;
 737        struct net_device *dev = skb->dev;
 738        struct in_device *in_dev;
 739        struct fib_result res;
 740        struct neighbour *n;
 741        struct net *net;
 742
 743        switch (icmp_hdr(skb)->code & 7) {
 744        case ICMP_REDIR_NET:
 745        case ICMP_REDIR_NETTOS:
 746        case ICMP_REDIR_HOST:
 747        case ICMP_REDIR_HOSTTOS:
 748                break;
 749
 750        default:
 751                return;
 752        }
 753
 754        if (rt->rt_gateway != old_gw)
 755                return;
 756
 757        in_dev = __in_dev_get_rcu(dev);
 758        if (!in_dev)
 759                return;
 760
 761        net = dev_net(dev);
 762        if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
 763            ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
 764            ipv4_is_zeronet(new_gw))
 765                goto reject_redirect;
 766
 767        if (!IN_DEV_SHARED_MEDIA(in_dev)) {
 768                if (!inet_addr_onlink(in_dev, new_gw, old_gw))
 769                        goto reject_redirect;
 770                if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
 771                        goto reject_redirect;
 772        } else {
 773                if (inet_addr_type(net, new_gw) != RTN_UNICAST)
 774                        goto reject_redirect;
 775        }
 776
 777        n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
 778        if (!n)
 779                n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
 780        if (!IS_ERR(n)) {
 781                if (!(n->nud_state & NUD_VALID)) {
 782                        neigh_event_send(n, NULL);
 783                } else {
 784                        if (fib_lookup(net, fl4, &res, 0) == 0) {
 785                                struct fib_nh *nh = &FIB_RES_NH(res);
 786
 787                                update_or_create_fnhe(nh, fl4->daddr, new_gw,
 788                                                0, jiffies + ip_rt_gc_timeout);
 789                        }
 790                        if (kill_route)
 791                                rt->dst.obsolete = DST_OBSOLETE_KILL;
 792                        call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
 793                }
 794                neigh_release(n);
 795        }
 796        return;
 797
 798reject_redirect:
 799#ifdef CONFIG_IP_ROUTE_VERBOSE
 800        if (IN_DEV_LOG_MARTIANS(in_dev)) {
 801                const struct iphdr *iph = (const struct iphdr *) skb->data;
 802                __be32 daddr = iph->daddr;
 803                __be32 saddr = iph->saddr;
 804
 805                net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
 806                                     "  Advised path = %pI4 -> %pI4\n",
 807                                     &old_gw, dev->name, &new_gw,
 808                                     &saddr, &daddr);
 809        }
 810#endif
 811        ;
 812}
 813
 814static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
 815{
 816        struct rtable *rt;
 817        struct flowi4 fl4;
 818        const struct iphdr *iph = (const struct iphdr *) skb->data;
 819        struct net *net = dev_net(skb->dev);
 820        int oif = skb->dev->ifindex;
 821        u8 tos = RT_TOS(iph->tos);
 822        u8 prot = iph->protocol;
 823        u32 mark = skb->mark;
 824
 825        rt = (struct rtable *) dst;
 826
 827        __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
 828        __ip_do_redirect(rt, skb, &fl4, true);
 829}
 830
 831static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
 832{
 833        struct rtable *rt = (struct rtable *)dst;
 834        struct dst_entry *ret = dst;
 835
 836        if (rt) {
 837                if (dst->obsolete > 0) {
 838                        ip_rt_put(rt);
 839                        ret = NULL;
 840                } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
 841                           rt->dst.expires) {
 842                        ip_rt_put(rt);
 843                        ret = NULL;
 844                }
 845        }
 846        return ret;
 847}
 848
 849/*
 850 * Algorithm:
 851 *      1. The first ip_rt_redirect_number redirects are sent
 852 *         with exponential backoff, then we stop sending them at all,
 853 *         assuming that the host ignores our redirects.
 854 *      2. If we did not see packets requiring redirects
 855 *         during ip_rt_redirect_silence, we assume that the host
 856 *         forgot redirected route and start to send redirects again.
 857 *
 858 * This algorithm is much cheaper and more intelligent than dumb load limiting
 859 * in icmp.c.
 860 *
 861 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
 862 * and "frag. need" (breaks PMTU discovery) in icmp.c.
 863 */
 864
 865void ip_rt_send_redirect(struct sk_buff *skb)
 866{
 867        struct rtable *rt = skb_rtable(skb);
 868        struct in_device *in_dev;
 869        struct inet_peer *peer;
 870        struct net *net;
 871        int log_martians;
 872        int vif;
 873
 874        rcu_read_lock();
 875        in_dev = __in_dev_get_rcu(rt->dst.dev);
 876        if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
 877                rcu_read_unlock();
 878                return;
 879        }
 880        log_martians = IN_DEV_LOG_MARTIANS(in_dev);
 881        vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
 882        rcu_read_unlock();
 883
 884        net = dev_net(rt->dst.dev);
 885        peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
 886        if (!peer) {
 887                icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
 888                          rt_nexthop(rt, ip_hdr(skb)->daddr));
 889                return;
 890        }
 891
 892        /* No redirected packets during ip_rt_redirect_silence;
 893         * reset the algorithm.
 894         */
 895        if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
 896                peer->rate_tokens = 0;
 897
 898        /* Too many ignored redirects; do not send anything
 899         * set dst.rate_last to the last seen redirected packet.
 900         */
 901        if (peer->rate_tokens >= ip_rt_redirect_number) {
 902                peer->rate_last = jiffies;
 903                goto out_put_peer;
 904        }
 905
 906        /* Check for load limit; set rate_last to the latest sent
 907         * redirect.
 908         */
 909        if (peer->rate_tokens == 0 ||
 910            time_after(jiffies,
 911                       (peer->rate_last +
 912                        (ip_rt_redirect_load << peer->rate_tokens)))) {
 913                __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
 914
 915                icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
 916                peer->rate_last = jiffies;
 917                ++peer->rate_tokens;
 918#ifdef CONFIG_IP_ROUTE_VERBOSE
 919                if (log_martians &&
 920                    peer->rate_tokens == ip_rt_redirect_number)
 921                        net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
 922                                             &ip_hdr(skb)->saddr, inet_iif(skb),
 923                                             &ip_hdr(skb)->daddr, &gw);
 924#endif
 925        }
 926out_put_peer:
 927        inet_putpeer(peer);
 928}
 929
 930static int ip_error(struct sk_buff *skb)
 931{
 932        struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
 933        struct rtable *rt = skb_rtable(skb);
 934        struct inet_peer *peer;
 935        unsigned long now;
 936        struct net *net;
 937        bool send;
 938        int code;
 939
 940        /* IP on this device is disabled. */
 941        if (!in_dev)
 942                goto out;
 943
 944        net = dev_net(rt->dst.dev);
 945        if (!IN_DEV_FORWARD(in_dev)) {
 946                switch (rt->dst.error) {
 947                case EHOSTUNREACH:
 948                        __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
 949                        break;
 950
 951                case ENETUNREACH:
 952                        __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
 953                        break;
 954                }
 955                goto out;
 956        }
 957
 958        switch (rt->dst.error) {
 959        case EINVAL:
 960        default:
 961                goto out;
 962        case EHOSTUNREACH:
 963                code = ICMP_HOST_UNREACH;
 964                break;
 965        case ENETUNREACH:
 966                code = ICMP_NET_UNREACH;
 967                __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
 968                break;
 969        case EACCES:
 970                code = ICMP_PKT_FILTERED;
 971                break;
 972        }
 973
 974        peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
 975                               l3mdev_master_ifindex(skb->dev), 1);
 976
 977        send = true;
 978        if (peer) {
 979                now = jiffies;
 980                peer->rate_tokens += now - peer->rate_last;
 981                if (peer->rate_tokens > ip_rt_error_burst)
 982                        peer->rate_tokens = ip_rt_error_burst;
 983                peer->rate_last = now;
 984                if (peer->rate_tokens >= ip_rt_error_cost)
 985                        peer->rate_tokens -= ip_rt_error_cost;
 986                else
 987                        send = false;
 988                inet_putpeer(peer);
 989        }
 990        if (send)
 991                icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
 992
 993out:    kfree_skb(skb);
 994        return 0;
 995}
 996
 997static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 998{
 999        struct dst_entry *dst = &rt->dst;
1000        struct fib_result res;
1001
1002        if (dst_metric_locked(dst, RTAX_MTU))
1003                return;
1004
1005        if (ipv4_mtu(dst) < mtu)
1006                return;
1007
1008        if (mtu < ip_rt_min_pmtu)
1009                mtu = ip_rt_min_pmtu;
1010
1011        if (rt->rt_pmtu == mtu &&
1012            time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
1013                return;
1014
1015        rcu_read_lock();
1016        if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
1017                struct fib_nh *nh = &FIB_RES_NH(res);
1018
1019                update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
1020                                      jiffies + ip_rt_mtu_expires);
1021        }
1022        rcu_read_unlock();
1023}
1024
1025static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1026                              struct sk_buff *skb, u32 mtu)
1027{
1028        struct rtable *rt = (struct rtable *) dst;
1029        struct flowi4 fl4;
1030
1031        ip_rt_build_flow_key(&fl4, sk, skb);
1032        __ip_rt_update_pmtu(rt, &fl4, mtu);
1033}
1034
1035void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1036                      int oif, u32 mark, u8 protocol, int flow_flags)
1037{
1038        const struct iphdr *iph = (const struct iphdr *) skb->data;
1039        struct flowi4 fl4;
1040        struct rtable *rt;
1041
1042        if (!mark)
1043                mark = IP4_REPLY_MARK(net, skb->mark);
1044
1045        __build_flow_key(net, &fl4, NULL, iph, oif,
1046                         RT_TOS(iph->tos), protocol, mark, flow_flags);
1047        rt = __ip_route_output_key(net, &fl4);
1048        if (!IS_ERR(rt)) {
1049                __ip_rt_update_pmtu(rt, &fl4, mtu);
1050                ip_rt_put(rt);
1051        }
1052}
1053EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1054
1055static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1056{
1057        const struct iphdr *iph = (const struct iphdr *) skb->data;
1058        struct flowi4 fl4;
1059        struct rtable *rt;
1060
1061        __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
1062
1063        if (!fl4.flowi4_mark)
1064                fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1065
1066        rt = __ip_route_output_key(sock_net(sk), &fl4);
1067        if (!IS_ERR(rt)) {
1068                __ip_rt_update_pmtu(rt, &fl4, mtu);
1069                ip_rt_put(rt);
1070        }
1071}
1072
1073void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1074{
1075        const struct iphdr *iph = (const struct iphdr *) skb->data;
1076        struct flowi4 fl4;
1077        struct rtable *rt;
1078        struct dst_entry *odst = NULL;
1079        bool new = false;
1080        struct net *net = sock_net(sk);
1081
1082        bh_lock_sock(sk);
1083
1084        if (!ip_sk_accept_pmtu(sk))
1085                goto out;
1086
1087        odst = sk_dst_get(sk);
1088
1089        if (sock_owned_by_user(sk) || !odst) {
1090                __ipv4_sk_update_pmtu(skb, sk, mtu);
1091                goto out;
1092        }
1093
1094        __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1095
1096        rt = (struct rtable *)odst;
1097        if (odst->obsolete && !odst->ops->check(odst, 0)) {
1098                rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1099                if (IS_ERR(rt))
1100                        goto out;
1101
1102                new = true;
1103        }
1104
1105        __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
1106
1107        if (!dst_check(&rt->dst, 0)) {
1108                if (new)
1109                        dst_release(&rt->dst);
1110
1111                rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1112                if (IS_ERR(rt))
1113                        goto out;
1114
1115                new = true;
1116        }
1117
1118        if (new)
1119                sk_dst_set(sk, &rt->dst);
1120
1121out:
1122        bh_unlock_sock(sk);
1123        dst_release(odst);
1124}
1125EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
1126
1127void ipv4_redirect(struct sk_buff *skb, struct net *net,
1128                   int oif, u32 mark, u8 protocol, int flow_flags)
1129{
1130        const struct iphdr *iph = (const struct iphdr *) skb->data;
1131        struct flowi4 fl4;
1132        struct rtable *rt;
1133
1134        __build_flow_key(net, &fl4, NULL, iph, oif,
1135                         RT_TOS(iph->tos), protocol, mark, flow_flags);
1136        rt = __ip_route_output_key(net, &fl4);
1137        if (!IS_ERR(rt)) {
1138                __ip_do_redirect(rt, skb, &fl4, false);
1139                ip_rt_put(rt);
1140        }
1141}
1142EXPORT_SYMBOL_GPL(ipv4_redirect);
1143
1144void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1145{
1146        const struct iphdr *iph = (const struct iphdr *) skb->data;
1147        struct flowi4 fl4;
1148        struct rtable *rt;
1149        struct net *net = sock_net(sk);
1150
1151        __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1152        rt = __ip_route_output_key(net, &fl4);
1153        if (!IS_ERR(rt)) {
1154                __ip_do_redirect(rt, skb, &fl4, false);
1155                ip_rt_put(rt);
1156        }
1157}
1158EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1159
1160static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1161{
1162        struct rtable *rt = (struct rtable *) dst;
1163
1164        /* All IPV4 dsts are created with ->obsolete set to the value
1165         * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1166         * into this function always.
1167         *
1168         * When a PMTU/redirect information update invalidates a route,
1169         * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
1170         * DST_OBSOLETE_DEAD by dst_free().
1171         */
1172        if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
1173                return NULL;
1174        return dst;
1175}
1176
1177static void ipv4_link_failure(struct sk_buff *skb)
1178{
1179        struct rtable *rt;
1180
1181        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1182
1183        rt = skb_rtable(skb);
1184        if (rt)
1185                dst_set_expires(&rt->dst, 0);
1186}
1187
1188static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
1189{
1190        pr_debug("%s: %pI4 -> %pI4, %s\n",
1191                 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1192                 skb->dev ? skb->dev->name : "?");
1193        kfree_skb(skb);
1194        WARN_ON(1);
1195        return 0;
1196}
1197
1198/*
1199   We do not cache source address of outgoing interface,
1200   because it is used only by IP RR, TS and SRR options,
1201   so that it out of fast path.
1202
1203   BTW remember: "addr" is allowed to be not aligned
1204   in IP options!
1205 */
1206
1207void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
1208{
1209        __be32 src;
1210
1211        if (rt_is_output_route(rt))
1212                src = ip_hdr(skb)->saddr;
1213        else {
1214                struct fib_result res;
1215                struct flowi4 fl4;
1216                struct iphdr *iph;
1217
1218                iph = ip_hdr(skb);
1219
1220                memset(&fl4, 0, sizeof(fl4));
1221                fl4.daddr = iph->daddr;
1222                fl4.saddr = iph->saddr;
1223                fl4.flowi4_tos = RT_TOS(iph->tos);
1224                fl4.flowi4_oif = rt->dst.dev->ifindex;
1225                fl4.flowi4_iif = skb->dev->ifindex;
1226                fl4.flowi4_mark = skb->mark;
1227
1228                rcu_read_lock();
1229                if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
1230                        src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
1231                else
1232                        src = inet_select_addr(rt->dst.dev,
1233                                               rt_nexthop(rt, iph->daddr),
1234                                               RT_SCOPE_UNIVERSE);
1235                rcu_read_unlock();
1236        }
1237        memcpy(addr, &src, 4);
1238}
1239
1240#ifdef CONFIG_IP_ROUTE_CLASSID
1241static void set_class_tag(struct rtable *rt, u32 tag)
1242{
1243        if (!(rt->dst.tclassid & 0xFFFF))
1244                rt->dst.tclassid |= tag & 0xFFFF;
1245        if (!(rt->dst.tclassid & 0xFFFF0000))
1246                rt->dst.tclassid |= tag & 0xFFFF0000;
1247}
1248#endif
1249
1250static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1251{
1252        unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
1253        unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size,
1254                                    ip_rt_min_advmss);
1255
1256        return min(advmss, IPV4_MAX_PMTU - header_size);
1257}
1258
1259static unsigned int ipv4_mtu(const struct dst_entry *dst)
1260{
1261        const struct rtable *rt = (const struct rtable *) dst;
1262        unsigned int mtu = rt->rt_pmtu;
1263
1264        if (!mtu || time_after_eq(jiffies, rt->dst.expires))
1265                mtu = dst_metric_raw(dst, RTAX_MTU);
1266
1267        if (mtu)
1268                return mtu;
1269
1270        mtu = READ_ONCE(dst->dev->mtu);
1271
1272        if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
1273                if (rt->rt_uses_gateway && mtu > 576)
1274                        mtu = 576;
1275        }
1276
1277        mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
1278
1279        return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1280}
1281
1282static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
1283{
1284        struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
1285        struct fib_nh_exception *fnhe;
1286        u32 hval;
1287
1288        if (!hash)
1289                return NULL;
1290
1291        hval = fnhe_hashfun(daddr);
1292
1293        for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1294             fnhe = rcu_dereference(fnhe->fnhe_next)) {
1295                if (fnhe->fnhe_daddr == daddr)
1296                        return fnhe;
1297        }
1298        return NULL;
1299}
1300
1301static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1302                              __be32 daddr, const bool do_cache)
1303{
1304        bool ret = false;
1305
1306        spin_lock_bh(&fnhe_lock);
1307
1308        if (daddr == fnhe->fnhe_daddr) {
1309                struct rtable __rcu **porig;
1310                struct rtable *orig;
1311                int genid = fnhe_genid(dev_net(rt->dst.dev));
1312
1313                if (rt_is_input_route(rt))
1314                        porig = &fnhe->fnhe_rth_input;
1315                else
1316                        porig = &fnhe->fnhe_rth_output;
1317                orig = rcu_dereference(*porig);
1318
1319                if (fnhe->fnhe_genid != genid) {
1320                        fnhe->fnhe_genid = genid;
1321                        fnhe->fnhe_gw = 0;
1322                        fnhe->fnhe_pmtu = 0;
1323                        fnhe->fnhe_expires = 0;
1324                        fnhe_flush_routes(fnhe);
1325                        orig = NULL;
1326                }
1327                fill_route_from_fnhe(rt, fnhe);
1328                if (!rt->rt_gateway)
1329                        rt->rt_gateway = daddr;
1330
1331                if (do_cache) {
1332                        dst_hold(&rt->dst);
1333                        rcu_assign_pointer(*porig, rt);
1334                        if (orig) {
1335                                dst_dev_put(&orig->dst);
1336                                dst_release(&orig->dst);
1337                        }
1338                        ret = true;
1339                }
1340
1341                fnhe->fnhe_stamp = jiffies;
1342        }
1343        spin_unlock_bh(&fnhe_lock);
1344
1345        return ret;
1346}
1347
1348static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1349{
1350        struct rtable *orig, *prev, **p;
1351        bool ret = true;
1352
1353        if (rt_is_input_route(rt)) {
1354                p = (struct rtable **)&nh->nh_rth_input;
1355        } else {
1356                p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output);
1357        }
1358        orig = *p;
1359
1360        /* hold dst before doing cmpxchg() to avoid race condition
1361         * on this dst
1362         */
1363        dst_hold(&rt->dst);
1364        prev = cmpxchg(p, orig, rt);
1365        if (prev == orig) {
1366                if (orig) {
1367                        dst_dev_put(&orig->dst);
1368                        dst_release(&orig->dst);
1369                }
1370        } else {
1371                dst_release(&rt->dst);
1372                ret = false;
1373        }
1374
1375        return ret;
1376}
1377
1378struct uncached_list {
1379        spinlock_t              lock;
1380        struct list_head        head;
1381};
1382
1383static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
1384
1385static void rt_add_uncached_list(struct rtable *rt)
1386{
1387        struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
1388
1389        rt->rt_uncached_list = ul;
1390
1391        spin_lock_bh(&ul->lock);
1392        list_add_tail(&rt->rt_uncached, &ul->head);
1393        spin_unlock_bh(&ul->lock);
1394}
1395
1396static void ipv4_dst_destroy(struct dst_entry *dst)
1397{
1398        struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
1399        struct rtable *rt = (struct rtable *) dst;
1400
1401        if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
1402                kfree(p);
1403
1404        if (!list_empty(&rt->rt_uncached)) {
1405                struct uncached_list *ul = rt->rt_uncached_list;
1406
1407                spin_lock_bh(&ul->lock);
1408                list_del(&rt->rt_uncached);
1409                spin_unlock_bh(&ul->lock);
1410        }
1411}
1412
1413void rt_flush_dev(struct net_device *dev)
1414{
1415        struct net *net = dev_net(dev);
1416        struct rtable *rt;
1417        int cpu;
1418
1419        for_each_possible_cpu(cpu) {
1420                struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
1421
1422                spin_lock_bh(&ul->lock);
1423                list_for_each_entry(rt, &ul->head, rt_uncached) {
1424                        if (rt->dst.dev != dev)
1425                                continue;
1426                        rt->dst.dev = net->loopback_dev;
1427                        dev_hold(rt->dst.dev);
1428                        dev_put(dev);
1429                }
1430                spin_unlock_bh(&ul->lock);
1431        }
1432}
1433
1434static bool rt_cache_valid(const struct rtable *rt)
1435{
1436        return  rt &&
1437                rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1438                !rt_is_expired(rt);
1439}
1440
1441static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
1442                           const struct fib_result *res,
1443                           struct fib_nh_exception *fnhe,
1444                           struct fib_info *fi, u16 type, u32 itag,
1445                           const bool do_cache)
1446{
1447        bool cached = false;
1448
1449        if (fi) {
1450                struct fib_nh *nh = &FIB_RES_NH(*res);
1451
1452                if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
1453                        rt->rt_gateway = nh->nh_gw;
1454                        rt->rt_uses_gateway = 1;
1455                }
1456                dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
1457                if (fi->fib_metrics != &dst_default_metrics) {
1458                        rt->dst._metrics |= DST_METRICS_REFCOUNTED;
1459                        refcount_inc(&fi->fib_metrics->refcnt);
1460                }
1461#ifdef CONFIG_IP_ROUTE_CLASSID
1462                rt->dst.tclassid = nh->nh_tclassid;
1463#endif
1464                rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
1465                if (unlikely(fnhe))
1466                        cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
1467                else if (do_cache)
1468                        cached = rt_cache_route(nh, rt);
1469                if (unlikely(!cached)) {
1470                        /* Routes we intend to cache in nexthop exception or
1471                         * FIB nexthop have the DST_NOCACHE bit clear.
1472                         * However, if we are unsuccessful at storing this
1473                         * route into the cache we really need to set it.
1474                         */
1475                        if (!rt->rt_gateway)
1476                                rt->rt_gateway = daddr;
1477                        rt_add_uncached_list(rt);
1478                }
1479        } else
1480                rt_add_uncached_list(rt);
1481
1482#ifdef CONFIG_IP_ROUTE_CLASSID
1483#ifdef CONFIG_IP_MULTIPLE_TABLES
1484        set_class_tag(rt, res->tclassid);
1485#endif
1486        set_class_tag(rt, itag);
1487#endif
1488}
1489
1490struct rtable *rt_dst_alloc(struct net_device *dev,
1491                            unsigned int flags, u16 type,
1492                            bool nopolicy, bool noxfrm, bool will_cache)
1493{
1494        struct rtable *rt;
1495
1496        rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1497                       (will_cache ? 0 : DST_HOST) |
1498                       (nopolicy ? DST_NOPOLICY : 0) |
1499                       (noxfrm ? DST_NOXFRM : 0));
1500
1501        if (rt) {
1502                rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1503                rt->rt_flags = flags;
1504                rt->rt_type = type;
1505                rt->rt_is_input = 0;
1506                rt->rt_iif = 0;
1507                rt->rt_pmtu = 0;
1508                rt->rt_gateway = 0;
1509                rt->rt_uses_gateway = 0;
1510                rt->rt_table_id = 0;
1511                INIT_LIST_HEAD(&rt->rt_uncached);
1512
1513                rt->dst.output = ip_output;
1514                if (flags & RTCF_LOCAL)
1515                        rt->dst.input = ip_local_deliver;
1516        }
1517
1518        return rt;
1519}
1520EXPORT_SYMBOL(rt_dst_alloc);
1521
1522/* called in rcu_read_lock() section */
1523int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1524                          u8 tos, struct net_device *dev,
1525                          struct in_device *in_dev, u32 *itag)
1526{
1527        int err;
1528
1529        /* Primary sanity checks. */
1530        if (!in_dev)
1531                return -EINVAL;
1532
1533        if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1534            skb->protocol != htons(ETH_P_IP))
1535                return -EINVAL;
1536
1537        if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
1538                return -EINVAL;
1539
1540        if (ipv4_is_zeronet(saddr)) {
1541                if (!ipv4_is_local_multicast(daddr))
1542                        return -EINVAL;
1543        } else {
1544                err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1545                                          in_dev, itag);
1546                if (err < 0)
1547                        return err;
1548        }
1549        return 0;
1550}
1551
1552/* called in rcu_read_lock() section */
1553static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1554                             u8 tos, struct net_device *dev, int our)
1555{
1556        struct in_device *in_dev = __in_dev_get_rcu(dev);
1557        unsigned int flags = RTCF_MULTICAST;
1558        struct rtable *rth;
1559        u32 itag = 0;
1560        int err;
1561
1562        err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
1563        if (err)
1564                return err;
1565
1566        if (our)
1567                flags |= RTCF_LOCAL;
1568
1569        rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
1570                           IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
1571        if (!rth)
1572                return -ENOBUFS;
1573
1574#ifdef CONFIG_IP_ROUTE_CLASSID
1575        rth->dst.tclassid = itag;
1576#endif
1577        rth->dst.output = ip_rt_bug;
1578        rth->rt_is_input= 1;
1579
1580#ifdef CONFIG_IP_MROUTE
1581        if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
1582                rth->dst.input = ip_mr_input;
1583#endif
1584        RT_CACHE_STAT_INC(in_slow_mc);
1585
1586        skb_dst_set(skb, &rth->dst);
1587        return 0;
1588}
1589
1590
1591static void ip_handle_martian_source(struct net_device *dev,
1592                                     struct in_device *in_dev,
1593                                     struct sk_buff *skb,
1594                                     __be32 daddr,
1595                                     __be32 saddr)
1596{
1597        RT_CACHE_STAT_INC(in_martian_src);
1598#ifdef CONFIG_IP_ROUTE_VERBOSE
1599        if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1600                /*
1601                 *      RFC1812 recommendation, if source is martian,
1602                 *      the only hint is MAC header.
1603                 */
1604                pr_warn("martian source %pI4 from %pI4, on dev %s\n",
1605                        &daddr, &saddr, dev->name);
1606                if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
1607                        print_hex_dump(KERN_WARNING, "ll header: ",
1608                                       DUMP_PREFIX_OFFSET, 16, 1,
1609                                       skb_mac_header(skb),
1610                                       dev->hard_header_len, true);
1611                }
1612        }
1613#endif
1614}
1615
1616static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1617{
1618        struct fnhe_hash_bucket *hash;
1619        struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1620        u32 hval = fnhe_hashfun(daddr);
1621
1622        spin_lock_bh(&fnhe_lock);
1623
1624        hash = rcu_dereference_protected(nh->nh_exceptions,
1625                                         lockdep_is_held(&fnhe_lock));
1626        hash += hval;
1627
1628        fnhe_p = &hash->chain;
1629        fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1630        while (fnhe) {
1631                if (fnhe->fnhe_daddr == daddr) {
1632                        rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1633                                fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
1634                        fnhe_flush_routes(fnhe);
1635                        kfree_rcu(fnhe, rcu);
1636                        break;
1637                }
1638                fnhe_p = &fnhe->fnhe_next;
1639                fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1640                                                 lockdep_is_held(&fnhe_lock));
1641        }
1642
1643        spin_unlock_bh(&fnhe_lock);
1644}
1645
1646static void set_lwt_redirect(struct rtable *rth)
1647{
1648        if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
1649                rth->dst.lwtstate->orig_output = rth->dst.output;
1650                rth->dst.output = lwtunnel_output;
1651        }
1652
1653        if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
1654                rth->dst.lwtstate->orig_input = rth->dst.input;
1655                rth->dst.input = lwtunnel_input;
1656        }
1657}
1658
1659/* called in rcu_read_lock() section */
1660static int __mkroute_input(struct sk_buff *skb,
1661                           const struct fib_result *res,
1662                           struct in_device *in_dev,
1663                           __be32 daddr, __be32 saddr, u32 tos)
1664{
1665        struct fib_nh_exception *fnhe;
1666        struct rtable *rth;
1667        int err;
1668        struct in_device *out_dev;
1669        bool do_cache;
1670        u32 itag = 0;
1671
1672        /* get a working reference to the output device */
1673        out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
1674        if (!out_dev) {
1675                net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
1676                return -EINVAL;
1677        }
1678
1679        err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
1680                                  in_dev->dev, in_dev, &itag);
1681        if (err < 0) {
1682                ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1683                                         saddr);
1684
1685                goto cleanup;
1686        }
1687
1688        do_cache = res->fi && !itag;
1689        if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
1690            skb->protocol == htons(ETH_P_IP) &&
1691            (IN_DEV_SHARED_MEDIA(out_dev) ||
1692             inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1693                IPCB(skb)->flags |= IPSKB_DOREDIRECT;
1694
1695        if (skb->protocol != htons(ETH_P_IP)) {
1696                /* Not IP (i.e. ARP). Do not create route, if it is
1697                 * invalid for proxy arp. DNAT routes are always valid.
1698                 *
1699                 * Proxy arp feature have been extended to allow, ARP
1700                 * replies back to the same interface, to support
1701                 * Private VLAN switch technologies. See arp.c.
1702                 */
1703                if (out_dev == in_dev &&
1704                    IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
1705                        err = -EINVAL;
1706                        goto cleanup;
1707                }
1708        }
1709
1710        fnhe = find_exception(&FIB_RES_NH(*res), daddr);
1711        if (do_cache) {
1712                if (fnhe) {
1713                        rth = rcu_dereference(fnhe->fnhe_rth_input);
1714                        if (rth && rth->dst.expires &&
1715                            time_after(jiffies, rth->dst.expires)) {
1716                                ip_del_fnhe(&FIB_RES_NH(*res), daddr);
1717                                fnhe = NULL;
1718                        } else {
1719                                goto rt_cache;
1720                        }
1721                }
1722
1723                rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1724
1725rt_cache:
1726                if (rt_cache_valid(rth)) {
1727                        skb_dst_set_noref(skb, &rth->dst);
1728                        goto out;
1729                }
1730        }
1731
1732        rth = rt_dst_alloc(out_dev->dev, 0, res->type,
1733                           IN_DEV_CONF_GET(in_dev, NOPOLICY),
1734                           IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
1735        if (!rth) {
1736                err = -ENOBUFS;
1737                goto cleanup;
1738        }
1739
1740        rth->rt_is_input = 1;
1741        if (res->table)
1742                rth->rt_table_id = res->table->tb_id;
1743        RT_CACHE_STAT_INC(in_slow_tot);
1744
1745        rth->dst.input = ip_forward;
1746
1747        rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
1748                       do_cache);
1749        set_lwt_redirect(rth);
1750        skb_dst_set(skb, &rth->dst);
1751out:
1752        err = 0;
1753 cleanup:
1754        return err;
1755}
1756
1757#ifdef CONFIG_IP_ROUTE_MULTIPATH
1758/* To make ICMP packets follow the right flow, the multipath hash is
1759 * calculated from the inner IP addresses.
1760 */
1761static void ip_multipath_l3_keys(const struct sk_buff *skb,
1762                                 struct flow_keys *hash_keys)
1763{
1764        const struct iphdr *outer_iph = ip_hdr(skb);
1765        const struct iphdr *inner_iph;
1766        const struct icmphdr *icmph;
1767        struct iphdr _inner_iph;
1768        struct icmphdr _icmph;
1769
1770        hash_keys->addrs.v4addrs.src = outer_iph->saddr;
1771        hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
1772        if (likely(outer_iph->protocol != IPPROTO_ICMP))
1773                return;
1774
1775        if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
1776                return;
1777
1778        icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
1779                                   &_icmph);
1780        if (!icmph)
1781                return;
1782
1783        if (icmph->type != ICMP_DEST_UNREACH &&
1784            icmph->type != ICMP_REDIRECT &&
1785            icmph->type != ICMP_TIME_EXCEEDED &&
1786            icmph->type != ICMP_PARAMETERPROB)
1787                return;
1788
1789        inner_iph = skb_header_pointer(skb,
1790                                       outer_iph->ihl * 4 + sizeof(_icmph),
1791                                       sizeof(_inner_iph), &_inner_iph);
1792        if (!inner_iph)
1793                return;
1794        hash_keys->addrs.v4addrs.src = inner_iph->saddr;
1795        hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
1796}
1797
1798/* if skb is set it will be used and fl4 can be NULL */
1799int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
1800                       const struct sk_buff *skb)
1801{
1802        struct net *net = fi->fib_net;
1803        struct flow_keys hash_keys;
1804        u32 mhash;
1805
1806        switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
1807        case 0:
1808                memset(&hash_keys, 0, sizeof(hash_keys));
1809                hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1810                if (skb) {
1811                        ip_multipath_l3_keys(skb, &hash_keys);
1812                } else {
1813                        hash_keys.addrs.v4addrs.src = fl4->saddr;
1814                        hash_keys.addrs.v4addrs.dst = fl4->daddr;
1815                }
1816                break;
1817        case 1:
1818                /* skb is currently provided only when forwarding */
1819                if (skb) {
1820                        unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1821                        struct flow_keys keys;
1822
1823                        /* short-circuit if we already have L4 hash present */
1824                        if (skb->l4_hash)
1825                                return skb_get_hash_raw(skb) >> 1;
1826                        memset(&hash_keys, 0, sizeof(hash_keys));
1827                        skb_flow_dissect_flow_keys(skb, &keys, flag);
1828                        hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
1829                        hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
1830                        hash_keys.ports.src = keys.ports.src;
1831                        hash_keys.ports.dst = keys.ports.dst;
1832                        hash_keys.basic.ip_proto = keys.basic.ip_proto;
1833                } else {
1834                        memset(&hash_keys, 0, sizeof(hash_keys));
1835                        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1836                        hash_keys.addrs.v4addrs.src = fl4->saddr;
1837                        hash_keys.addrs.v4addrs.dst = fl4->daddr;
1838                        hash_keys.ports.src = fl4->fl4_sport;
1839                        hash_keys.ports.dst = fl4->fl4_dport;
1840                        hash_keys.basic.ip_proto = fl4->flowi4_proto;
1841                }
1842                break;
1843        }
1844        mhash = flow_hash_from_keys(&hash_keys);
1845
1846        return mhash >> 1;
1847}
1848EXPORT_SYMBOL_GPL(fib_multipath_hash);
1849#endif /* CONFIG_IP_ROUTE_MULTIPATH */
1850
1851static int ip_mkroute_input(struct sk_buff *skb,
1852                            struct fib_result *res,
1853                            struct in_device *in_dev,
1854                            __be32 daddr, __be32 saddr, u32 tos)
1855{
1856#ifdef CONFIG_IP_ROUTE_MULTIPATH
1857        if (res->fi && res->fi->fib_nhs > 1) {
1858                int h = fib_multipath_hash(res->fi, NULL, skb);
1859
1860                fib_select_multipath(res, h);
1861        }
1862#endif
1863
1864        /* create a routing cache entry */
1865        return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
1866}
1867
1868/*
1869 *      NOTE. We drop all the packets that has local source
1870 *      addresses, because every properly looped back packet
1871 *      must have correct destination already attached by output routine.
1872 *
1873 *      Such approach solves two big problems:
1874 *      1. Not simplex devices are handled properly.
1875 *      2. IP spoofing attempts are filtered with 100% of guarantee.
1876 *      called with rcu_read_lock()
1877 */
1878
1879static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1880                               u8 tos, struct net_device *dev,
1881                               struct fib_result *res)
1882{
1883        struct in_device *in_dev = __in_dev_get_rcu(dev);
1884        struct ip_tunnel_info *tun_info;
1885        struct flowi4   fl4;
1886        unsigned int    flags = 0;
1887        u32             itag = 0;
1888        struct rtable   *rth;
1889        int             err = -EINVAL;
1890        struct net    *net = dev_net(dev);
1891        bool do_cache;
1892
1893        /* IP on this device is disabled. */
1894
1895        if (!in_dev)
1896                goto out;
1897
1898        /* Check for the most weird martians, which can be not detected
1899           by fib_lookup.
1900         */
1901
1902        tun_info = skb_tunnel_info(skb);
1903        if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1904                fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
1905        else
1906                fl4.flowi4_tun_key.tun_id = 0;
1907        skb_dst_drop(skb);
1908
1909        if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
1910                goto martian_source;
1911
1912        res->fi = NULL;
1913        res->table = NULL;
1914        if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
1915                goto brd_input;
1916
1917        /* Accept zero addresses only to limited broadcast;
1918         * I even do not know to fix it or not. Waiting for complains :-)
1919         */
1920        if (ipv4_is_zeronet(saddr))
1921                goto martian_source;
1922
1923        if (ipv4_is_zeronet(daddr))
1924                goto martian_destination;
1925
1926        /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1927         * and call it once if daddr or/and saddr are loopback addresses
1928         */
1929        if (ipv4_is_loopback(daddr)) {
1930                if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
1931                        goto martian_destination;
1932        } else if (ipv4_is_loopback(saddr)) {
1933                if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
1934                        goto martian_source;
1935        }
1936
1937        /*
1938         *      Now we are ready to route packet.
1939         */
1940        fl4.flowi4_oif = 0;
1941        fl4.flowi4_iif = dev->ifindex;
1942        fl4.flowi4_mark = skb->mark;
1943        fl4.flowi4_tos = tos;
1944        fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1945        fl4.flowi4_flags = 0;
1946        fl4.daddr = daddr;
1947        fl4.saddr = saddr;
1948        fl4.flowi4_uid = sock_net_uid(net, NULL);
1949        err = fib_lookup(net, &fl4, res, 0);
1950        if (err != 0) {
1951                if (!IN_DEV_FORWARD(in_dev))
1952                        err = -EHOSTUNREACH;
1953                goto no_route;
1954        }
1955
1956        if (res->type == RTN_BROADCAST)
1957                goto brd_input;
1958
1959        if (res->type == RTN_LOCAL) {
1960                err = fib_validate_source(skb, saddr, daddr, tos,
1961                                          0, dev, in_dev, &itag);
1962                if (err < 0)
1963                        goto martian_source;
1964                goto local_input;
1965        }
1966
1967        if (!IN_DEV_FORWARD(in_dev)) {
1968                err = -EHOSTUNREACH;
1969                goto no_route;
1970        }
1971        if (res->type != RTN_UNICAST)
1972                goto martian_destination;
1973
1974        err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
1975out:    return err;
1976
1977brd_input:
1978        if (skb->protocol != htons(ETH_P_IP))
1979                goto e_inval;
1980
1981        if (!ipv4_is_zeronet(saddr)) {
1982                err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1983                                          in_dev, &itag);
1984                if (err < 0)
1985                        goto martian_source;
1986        }
1987        flags |= RTCF_BROADCAST;
1988        res->type = RTN_BROADCAST;
1989        RT_CACHE_STAT_INC(in_brd);
1990
1991local_input:
1992        do_cache = false;
1993        if (res->fi) {
1994                if (!itag) {
1995                        rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1996                        if (rt_cache_valid(rth)) {
1997                                skb_dst_set_noref(skb, &rth->dst);
1998                                err = 0;
1999                                goto out;
2000                        }
2001                        do_cache = true;
2002                }
2003        }
2004
2005        rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
2006                           flags | RTCF_LOCAL, res->type,
2007                           IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
2008        if (!rth)
2009                goto e_nobufs;
2010
2011        rth->dst.output= ip_rt_bug;
2012#ifdef CONFIG_IP_ROUTE_CLASSID
2013        rth->dst.tclassid = itag;
2014#endif
2015        rth->rt_is_input = 1;
2016        if (res->table)
2017                rth->rt_table_id = res->table->tb_id;
2018
2019        RT_CACHE_STAT_INC(in_slow_tot);
2020        if (res->type == RTN_UNREACHABLE) {
2021                rth->dst.input= ip_error;
2022                rth->dst.error= -err;
2023                rth->rt_flags   &= ~RTCF_LOCAL;
2024        }
2025
2026        if (do_cache) {
2027                struct fib_nh *nh = &FIB_RES_NH(*res);
2028
2029                rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
2030                if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
2031                        WARN_ON(rth->dst.input == lwtunnel_input);
2032                        rth->dst.lwtstate->orig_input = rth->dst.input;
2033                        rth->dst.input = lwtunnel_input;
2034                }
2035
2036                if (unlikely(!rt_cache_route(nh, rth)))
2037                        rt_add_uncached_list(rth);
2038        }
2039        skb_dst_set(skb, &rth->dst);
2040        err = 0;
2041        goto out;
2042
2043no_route:
2044        RT_CACHE_STAT_INC(in_no_route);
2045        res->type = RTN_UNREACHABLE;
2046        res->fi = NULL;
2047        res->table = NULL;
2048        goto local_input;
2049
2050        /*
2051         *      Do not cache martian addresses: they should be logged (RFC1812)
2052         */
2053martian_destination:
2054        RT_CACHE_STAT_INC(in_martian_dst);
2055#ifdef CONFIG_IP_ROUTE_VERBOSE
2056        if (IN_DEV_LOG_MARTIANS(in_dev))
2057                net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
2058                                     &daddr, &saddr, dev->name);
2059#endif
2060
2061e_inval:
2062        err = -EINVAL;
2063        goto out;
2064
2065e_nobufs:
2066        err = -ENOBUFS;
2067        goto out;
2068
2069martian_source:
2070        ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2071        goto out;
2072}
2073
2074int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2075                         u8 tos, struct net_device *dev)
2076{
2077        struct fib_result res;
2078        int err;
2079
2080        tos &= IPTOS_RT_MASK;
2081        rcu_read_lock();
2082        err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
2083        rcu_read_unlock();
2084
2085        return err;
2086}
2087EXPORT_SYMBOL(ip_route_input_noref);
2088
2089/* called with rcu_read_lock held */
2090int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2091                       u8 tos, struct net_device *dev, struct fib_result *res)
2092{
2093        /* Multicast recognition logic is moved from route cache to here.
2094           The problem was that too many Ethernet cards have broken/missing
2095           hardware multicast filters :-( As result the host on multicasting
2096           network acquires a lot of useless route cache entries, sort of
2097           SDR messages from all the world. Now we try to get rid of them.
2098           Really, provided software IP multicast filter is organized
2099           reasonably (at least, hashed), it does not result in a slowdown
2100           comparing with route cache reject entries.
2101           Note, that multicast routers are not affected, because
2102           route cache entry is created eventually.
2103         */
2104        if (ipv4_is_multicast(daddr)) {
2105                struct in_device *in_dev = __in_dev_get_rcu(dev);
2106                int our = 0;
2107                int err = -EINVAL;
2108
2109                if (in_dev)
2110                        our = ip_check_mc_rcu(in_dev, daddr, saddr,
2111                                              ip_hdr(skb)->protocol);
2112
2113                /* check l3 master if no match yet */
2114                if ((!in_dev || !our) && netif_is_l3_slave(dev)) {
2115                        struct in_device *l3_in_dev;
2116
2117                        l3_in_dev = __in_dev_get_rcu(skb->dev);
2118                        if (l3_in_dev)
2119                                our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
2120                                                      ip_hdr(skb)->protocol);
2121                }
2122
2123                if (our
2124#ifdef CONFIG_IP_MROUTE
2125                        ||
2126                    (!ipv4_is_local_multicast(daddr) &&
2127                     IN_DEV_MFORWARD(in_dev))
2128#endif
2129                   ) {
2130                        err = ip_route_input_mc(skb, daddr, saddr,
2131                                                tos, dev, our);
2132                }
2133                return err;
2134        }
2135
2136        return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
2137}
2138
2139/* called with rcu_read_lock() */
2140static struct rtable *__mkroute_output(const struct fib_result *res,
2141                                       const struct flowi4 *fl4, int orig_oif,
2142                                       struct net_device *dev_out,
2143                                       unsigned int flags)
2144{
2145        struct fib_info *fi = res->fi;
2146        struct fib_nh_exception *fnhe;
2147        struct in_device *in_dev;
2148        u16 type = res->type;
2149        struct rtable *rth;
2150        bool do_cache;
2151
2152        in_dev = __in_dev_get_rcu(dev_out);
2153        if (!in_dev)
2154                return ERR_PTR(-EINVAL);
2155
2156        if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
2157                if (ipv4_is_loopback(fl4->saddr) &&
2158                    !(dev_out->flags & IFF_LOOPBACK) &&
2159                    !netif_is_l3_master(dev_out))
2160                        return ERR_PTR(-EINVAL);
2161
2162        if (ipv4_is_lbcast(fl4->daddr))
2163                type = RTN_BROADCAST;
2164        else if (ipv4_is_multicast(fl4->daddr))
2165                type = RTN_MULTICAST;
2166        else if (ipv4_is_zeronet(fl4->daddr))
2167                return ERR_PTR(-EINVAL);
2168
2169        if (dev_out->flags & IFF_LOOPBACK)
2170                flags |= RTCF_LOCAL;
2171
2172        do_cache = true;
2173        if (type == RTN_BROADCAST) {
2174                flags |= RTCF_BROADCAST | RTCF_LOCAL;
2175                fi = NULL;
2176        } else if (type == RTN_MULTICAST) {
2177                flags |= RTCF_MULTICAST | RTCF_LOCAL;
2178                if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2179                                     fl4->flowi4_proto))
2180                        flags &= ~RTCF_LOCAL;
2181                else
2182                        do_cache = false;
2183                /* If multicast route do not exist use
2184                 * default one, but do not gateway in this case.
2185                 * Yes, it is hack.
2186                 */
2187                if (fi && res->prefixlen < 4)
2188                        fi = NULL;
2189        } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2190                   (orig_oif != dev_out->ifindex)) {
2191                /* For local routes that require a particular output interface
2192                 * we do not want to cache the result.  Caching the result
2193                 * causes incorrect behaviour when there are multiple source
2194                 * addresses on the interface, the end result being that if the
2195                 * intended recipient is waiting on that interface for the
2196                 * packet he won't receive it because it will be delivered on
2197                 * the loopback interface and the IP_PKTINFO ipi_ifindex will
2198                 * be set to the loopback interface as well.
2199                 */
2200                fi = NULL;
2201        }
2202
2203        fnhe = NULL;
2204        do_cache &= fi != NULL;
2205        if (do_cache) {
2206                struct rtable __rcu **prth;
2207                struct fib_nh *nh = &FIB_RES_NH(*res);
2208
2209                fnhe = find_exception(nh, fl4->daddr);
2210                if (fnhe) {
2211                        prth = &fnhe->fnhe_rth_output;
2212                        rth = rcu_dereference(*prth);
2213                        if (rth && rth->dst.expires &&
2214                            time_after(jiffies, rth->dst.expires)) {
2215                                ip_del_fnhe(nh, fl4->daddr);
2216                                fnhe = NULL;
2217                        } else {
2218                                goto rt_cache;
2219                        }
2220                }
2221
2222                if (unlikely(fl4->flowi4_flags &
2223                             FLOWI_FLAG_KNOWN_NH &&
2224                             !(nh->nh_gw &&
2225                               nh->nh_scope == RT_SCOPE_LINK))) {
2226                        do_cache = false;
2227                        goto add;
2228                }
2229                prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
2230                rth = rcu_dereference(*prth);
2231
2232rt_cache:
2233                if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
2234                        return rth;
2235        }
2236
2237add:
2238        rth = rt_dst_alloc(dev_out, flags, type,
2239                           IN_DEV_CONF_GET(in_dev, NOPOLICY),
2240                           IN_DEV_CONF_GET(in_dev, NOXFRM),
2241                           do_cache);
2242        if (!rth)
2243                return ERR_PTR(-ENOBUFS);
2244
2245        rth->rt_iif = orig_oif;
2246        if (res->table)
2247                rth->rt_table_id = res->table->tb_id;
2248
2249        RT_CACHE_STAT_INC(out_slow_tot);
2250
2251        if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2252                if (flags & RTCF_LOCAL &&
2253                    !(dev_out->flags & IFF_LOOPBACK)) {
2254                        rth->dst.output = ip_mc_output;
2255                        RT_CACHE_STAT_INC(out_slow_mc);
2256                }
2257#ifdef CONFIG_IP_MROUTE
2258                if (type == RTN_MULTICAST) {
2259                        if (IN_DEV_MFORWARD(in_dev) &&
2260                            !ipv4_is_local_multicast(fl4->daddr)) {
2261                                rth->dst.input = ip_mr_input;
2262                                rth->dst.output = ip_mc_output;
2263                        }
2264                }
2265#endif
2266        }
2267
2268        rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
2269        set_lwt_redirect(rth);
2270
2271        return rth;
2272}
2273
2274/*
2275 * Major route resolver routine.
2276 */
2277
2278struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2279                                        const struct sk_buff *skb)
2280{
2281        __u8 tos = RT_FL_TOS(fl4);
2282        struct fib_result res;
2283        struct rtable *rth;
2284
2285        res.tclassid    = 0;
2286        res.fi          = NULL;
2287        res.table       = NULL;
2288
2289        fl4->flowi4_iif = LOOPBACK_IFINDEX;
2290        fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2291        fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2292                         RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
2293
2294        rcu_read_lock();
2295        rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
2296        rcu_read_unlock();
2297
2298        return rth;
2299}
2300EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
2301
2302struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
2303                                            struct fib_result *res,
2304                                            const struct sk_buff *skb)
2305{
2306        struct net_device *dev_out = NULL;
2307        int orig_oif = fl4->flowi4_oif;
2308        unsigned int flags = 0;
2309        struct rtable *rth;
2310        int err = -ENETUNREACH;
2311
2312        if (fl4->saddr) {
2313                rth = ERR_PTR(-EINVAL);
2314                if (ipv4_is_multicast(fl4->saddr) ||
2315                    ipv4_is_lbcast(fl4->saddr) ||
2316                    ipv4_is_zeronet(fl4->saddr))
2317                        goto out;
2318
2319                /* I removed check for oif == dev_out->oif here.
2320                   It was wrong for two reasons:
2321                   1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2322                      is assigned to multiple interfaces.
2323                   2. Moreover, we are allowed to send packets with saddr
2324                      of another iface. --ANK
2325                 */
2326
2327                if (fl4->flowi4_oif == 0 &&
2328                    (ipv4_is_multicast(fl4->daddr) ||
2329                     ipv4_is_lbcast(fl4->daddr))) {
2330                        /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2331                        dev_out = __ip_dev_find(net, fl4->saddr, false);
2332                        if (!dev_out)
2333                                goto out;
2334
2335                        /* Special hack: user can direct multicasts
2336                           and limited broadcast via necessary interface
2337                           without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2338                           This hack is not just for fun, it allows
2339                           vic,vat and friends to work.
2340                           They bind socket to loopback, set ttl to zero
2341                           and expect that it will work.
2342                           From the viewpoint of routing cache they are broken,
2343                           because we are not allowed to build multicast path
2344                           with loopback source addr (look, routing cache
2345                           cannot know, that ttl is zero, so that packet
2346                           will not leave this host and route is valid).
2347                           Luckily, this hack is good workaround.
2348                         */
2349
2350                        fl4->flowi4_oif = dev_out->ifindex;
2351                        goto make_route;
2352                }
2353
2354                if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
2355                        /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2356                        if (!__ip_dev_find(net, fl4->saddr, false))
2357                                goto out;
2358                }
2359        }
2360
2361
2362        if (fl4->flowi4_oif) {
2363                dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
2364                rth = ERR_PTR(-ENODEV);
2365                if (!dev_out)
2366                        goto out;
2367
2368                /* RACE: Check return value of inet_select_addr instead. */
2369                if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
2370                        rth = ERR_PTR(-ENETUNREACH);
2371                        goto out;
2372                }
2373                if (ipv4_is_local_multicast(fl4->daddr) ||
2374                    ipv4_is_lbcast(fl4->daddr) ||
2375                    fl4->flowi4_proto == IPPROTO_IGMP) {
2376                        if (!fl4->saddr)
2377                                fl4->saddr = inet_select_addr(dev_out, 0,
2378                                                              RT_SCOPE_LINK);
2379                        goto make_route;
2380                }
2381                if (!fl4->saddr) {
2382                        if (ipv4_is_multicast(fl4->daddr))
2383                                fl4->saddr = inet_select_addr(dev_out, 0,
2384                                                              fl4->flowi4_scope);
2385                        else if (!fl4->daddr)
2386                                fl4->saddr = inet_select_addr(dev_out, 0,
2387                                                              RT_SCOPE_HOST);
2388                }
2389        }
2390
2391        if (!fl4->daddr) {
2392                fl4->daddr = fl4->saddr;
2393                if (!fl4->daddr)
2394                        fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
2395                dev_out = net->loopback_dev;
2396                fl4->flowi4_oif = LOOPBACK_IFINDEX;
2397                res->type = RTN_LOCAL;
2398                flags |= RTCF_LOCAL;
2399                goto make_route;
2400        }
2401
2402        err = fib_lookup(net, fl4, res, 0);
2403        if (err) {
2404                res->fi = NULL;
2405                res->table = NULL;
2406                if (fl4->flowi4_oif &&
2407                    (ipv4_is_multicast(fl4->daddr) ||
2408                    !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
2409                        /* Apparently, routing tables are wrong. Assume,
2410                           that the destination is on link.
2411
2412                           WHY? DW.
2413                           Because we are allowed to send to iface
2414                           even if it has NO routes and NO assigned
2415                           addresses. When oif is specified, routing
2416                           tables are looked up with only one purpose:
2417                           to catch if destination is gatewayed, rather than
2418                           direct. Moreover, if MSG_DONTROUTE is set,
2419                           we send packet, ignoring both routing tables
2420                           and ifaddr state. --ANK
2421
2422
2423                           We could make it even if oif is unknown,
2424                           likely IPv6, but we do not.
2425                         */
2426
2427                        if (fl4->saddr == 0)
2428                                fl4->saddr = inet_select_addr(dev_out, 0,
2429                                                              RT_SCOPE_LINK);
2430                        res->type = RTN_UNICAST;
2431                        goto make_route;
2432                }
2433                rth = ERR_PTR(err);
2434                goto out;
2435        }
2436
2437        if (res->type == RTN_LOCAL) {
2438                if (!fl4->saddr) {
2439                        if (res->fi->fib_prefsrc)
2440                                fl4->saddr = res->fi->fib_prefsrc;
2441                        else
2442                                fl4->saddr = fl4->daddr;
2443                }
2444
2445                /* L3 master device is the loopback for that domain */
2446                dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
2447                        net->loopback_dev;
2448
2449                /* make sure orig_oif points to fib result device even
2450                 * though packet rx/tx happens over loopback or l3mdev
2451                 */
2452                orig_oif = FIB_RES_OIF(*res);
2453
2454                fl4->flowi4_oif = dev_out->ifindex;
2455                flags |= RTCF_LOCAL;
2456                goto make_route;
2457        }
2458
2459        fib_select_path(net, res, fl4, skb);
2460
2461        dev_out = FIB_RES_DEV(*res);
2462        fl4->flowi4_oif = dev_out->ifindex;
2463
2464
2465make_route:
2466        rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
2467
2468out:
2469        return rth;
2470}
2471
2472static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2473{
2474        return NULL;
2475}
2476
2477static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
2478{
2479        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2480
2481        return mtu ? : dst->dev->mtu;
2482}
2483
2484static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2485                                          struct sk_buff *skb, u32 mtu)
2486{
2487}
2488
2489static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2490                                       struct sk_buff *skb)
2491{
2492}
2493
2494static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2495                                          unsigned long old)
2496{
2497        return NULL;
2498}
2499
2500static struct dst_ops ipv4_dst_blackhole_ops = {
2501        .family                 =       AF_INET,
2502        .check                  =       ipv4_blackhole_dst_check,
2503        .mtu                    =       ipv4_blackhole_mtu,
2504        .default_advmss         =       ipv4_default_advmss,
2505        .update_pmtu            =       ipv4_rt_blackhole_update_pmtu,
2506        .redirect               =       ipv4_rt_blackhole_redirect,
2507        .cow_metrics            =       ipv4_rt_blackhole_cow_metrics,
2508        .neigh_lookup           =       ipv4_neigh_lookup,
2509};
2510
2511struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2512{
2513        struct rtable *ort = (struct rtable *) dst_orig;
2514        struct rtable *rt;
2515
2516        rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
2517        if (rt) {
2518                struct dst_entry *new = &rt->dst;
2519
2520                new->__use = 1;
2521                new->input = dst_discard;
2522                new->output = dst_discard_out;
2523
2524                new->dev = net->loopback_dev;
2525                if (new->dev)
2526                        dev_hold(new->dev);
2527
2528                rt->rt_is_input = ort->rt_is_input;
2529                rt->rt_iif = ort->rt_iif;
2530                rt->rt_pmtu = ort->rt_pmtu;
2531
2532                rt->rt_genid = rt_genid_ipv4(net);
2533                rt->rt_flags = ort->rt_flags;
2534                rt->rt_type = ort->rt_type;
2535                rt->rt_gateway = ort->rt_gateway;
2536                rt->rt_uses_gateway = ort->rt_uses_gateway;
2537
2538                INIT_LIST_HEAD(&rt->rt_uncached);
2539        }
2540
2541        dst_release(dst_orig);
2542
2543        return rt ? &rt->dst : ERR_PTR(-ENOMEM);
2544}
2545
2546struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
2547                                    const struct sock *sk)
2548{
2549        struct rtable *rt = __ip_route_output_key(net, flp4);
2550
2551        if (IS_ERR(rt))
2552                return rt;
2553
2554        if (flp4->flowi4_proto)
2555                rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
2556                                                        flowi4_to_flowi(flp4),
2557                                                        sk, 0);
2558
2559        return rt;
2560}
2561EXPORT_SYMBOL_GPL(ip_route_output_flow);
2562
2563/* called with rcu_read_lock held */
2564static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
2565                        struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
2566                        u32 seq)
2567{
2568        struct rtable *rt = skb_rtable(skb);
2569        struct rtmsg *r;
2570        struct nlmsghdr *nlh;
2571        unsigned long expires = 0;
2572        u32 error;
2573        u32 metrics[RTAX_MAX];
2574
2575        nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
2576        if (!nlh)
2577                return -EMSGSIZE;
2578
2579        r = nlmsg_data(nlh);
2580        r->rtm_family    = AF_INET;
2581        r->rtm_dst_len  = 32;
2582        r->rtm_src_len  = 0;
2583        r->rtm_tos      = fl4->flowi4_tos;
2584        r->rtm_table    = table_id < 256 ? table_id : RT_TABLE_COMPAT;
2585        if (nla_put_u32(skb, RTA_TABLE, table_id))
2586                goto nla_put_failure;
2587        r->rtm_type     = rt->rt_type;
2588        r->rtm_scope    = RT_SCOPE_UNIVERSE;
2589        r->rtm_protocol = RTPROT_UNSPEC;
2590        r->rtm_flags    = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2591        if (rt->rt_flags & RTCF_NOTIFY)
2592                r->rtm_flags |= RTM_F_NOTIFY;
2593        if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2594                r->rtm_flags |= RTCF_DOREDIRECT;
2595
2596        if (nla_put_in_addr(skb, RTA_DST, dst))
2597                goto nla_put_failure;
2598        if (src) {
2599                r->rtm_src_len = 32;
2600                if (nla_put_in_addr(skb, RTA_SRC, src))
2601                        goto nla_put_failure;
2602        }
2603        if (rt->dst.dev &&
2604            nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2605                goto nla_put_failure;
2606#ifdef CONFIG_IP_ROUTE_CLASSID
2607        if (rt->dst.tclassid &&
2608            nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2609                goto nla_put_failure;
2610#endif
2611        if (!rt_is_input_route(rt) &&
2612            fl4->saddr != src) {
2613                if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
2614                        goto nla_put_failure;
2615        }
2616        if (rt->rt_uses_gateway &&
2617            nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway))
2618                goto nla_put_failure;
2619
2620        expires = rt->dst.expires;
2621        if (expires) {
2622                unsigned long now = jiffies;
2623
2624                if (time_before(now, expires))
2625                        expires -= now;
2626                else
2627                        expires = 0;
2628        }
2629
2630        memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2631        if (rt->rt_pmtu && expires)
2632                metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2633        if (rtnetlink_put_metrics(skb, metrics) < 0)
2634                goto nla_put_failure;
2635
2636        if (fl4->flowi4_mark &&
2637            nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
2638                goto nla_put_failure;
2639
2640        if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2641            nla_put_u32(skb, RTA_UID,
2642                        from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
2643                goto nla_put_failure;
2644
2645        error = rt->dst.error;
2646
2647        if (rt_is_input_route(rt)) {
2648#ifdef CONFIG_IP_MROUTE
2649                if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2650                    IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2651                        int err = ipmr_get_route(net, skb,
2652                                                 fl4->saddr, fl4->daddr,
2653                                                 r, portid);
2654
2655                        if (err <= 0) {
2656                                if (err == 0)
2657                                        return 0;
2658                                goto nla_put_failure;
2659                        }
2660                } else
2661#endif
2662                        if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
2663                                goto nla_put_failure;
2664        }
2665
2666        if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
2667                goto nla_put_failure;
2668
2669        nlmsg_end(skb, nlh);
2670        return 0;
2671
2672nla_put_failure:
2673        nlmsg_cancel(skb, nlh);
2674        return -EMSGSIZE;
2675}
2676
2677static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2678                             struct netlink_ext_ack *extack)
2679{
2680        struct net *net = sock_net(in_skb->sk);
2681        struct rtmsg *rtm;
2682        struct nlattr *tb[RTA_MAX+1];
2683        struct fib_result res = {};
2684        struct rtable *rt = NULL;
2685        struct flowi4 fl4;
2686        __be32 dst = 0;
2687        __be32 src = 0;
2688        u32 iif;
2689        int err;
2690        int mark;
2691        struct sk_buff *skb;
2692        u32 table_id = RT_TABLE_MAIN;
2693        kuid_t uid;
2694
2695        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy,
2696                          extack);
2697        if (err < 0)
2698                goto errout;
2699
2700        rtm = nlmsg_data(nlh);
2701
2702        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2703        if (!skb) {
2704                err = -ENOBUFS;
2705                goto errout;
2706        }
2707
2708        /* Reserve room for dummy headers, this skb can pass
2709           through good chunk of routing engine.
2710         */
2711        skb_reset_mac_header(skb);
2712        skb_reset_network_header(skb);
2713
2714        src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
2715        dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
2716        iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
2717        mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
2718        if (tb[RTA_UID])
2719                uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
2720        else
2721                uid = (iif ? INVALID_UID : current_uid());
2722
2723        /* Bugfix: need to give ip_route_input enough of an IP header to
2724         * not gag.
2725         */
2726        ip_hdr(skb)->protocol = IPPROTO_UDP;
2727        ip_hdr(skb)->saddr = src;
2728        ip_hdr(skb)->daddr = dst;
2729
2730        skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2731
2732        memset(&fl4, 0, sizeof(fl4));
2733        fl4.daddr = dst;
2734        fl4.saddr = src;
2735        fl4.flowi4_tos = rtm->rtm_tos;
2736        fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2737        fl4.flowi4_mark = mark;
2738        fl4.flowi4_uid = uid;
2739
2740        rcu_read_lock();
2741
2742        if (iif) {
2743                struct net_device *dev;
2744
2745                dev = dev_get_by_index_rcu(net, iif);
2746                if (!dev) {
2747                        err = -ENODEV;
2748                        goto errout_free;
2749                }
2750
2751                skb->protocol   = htons(ETH_P_IP);
2752                skb->dev        = dev;
2753                skb->mark       = mark;
2754                err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
2755                                         dev, &res);
2756
2757                rt = skb_rtable(skb);
2758                if (err == 0 && rt->dst.error)
2759                        err = -rt->dst.error;
2760        } else {
2761                rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
2762                err = 0;
2763                if (IS_ERR(rt))
2764                        err = PTR_ERR(rt);
2765                else
2766                        skb_dst_set(skb, &rt->dst);
2767        }
2768
2769        if (err)
2770                goto errout_free;
2771
2772        if (rtm->rtm_flags & RTM_F_NOTIFY)
2773                rt->rt_flags |= RTCF_NOTIFY;
2774
2775        if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
2776                table_id = rt->rt_table_id;
2777
2778        if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
2779                if (!res.fi) {
2780                        err = fib_props[res.type].error;
2781                        if (!err)
2782                                err = -EHOSTUNREACH;
2783                        goto errout_free;
2784                }
2785                err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
2786                                    nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
2787                                    rt->rt_type, res.prefix, res.prefixlen,
2788                                    fl4.flowi4_tos, res.fi, 0);
2789        } else {
2790                err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
2791                                   NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
2792        }
2793        if (err < 0)
2794                goto errout_free;
2795
2796        rcu_read_unlock();
2797
2798        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2799errout:
2800        return err;
2801
2802errout_free:
2803        rcu_read_unlock();
2804        kfree_skb(skb);
2805        goto errout;
2806}
2807
2808void ip_rt_multicast_event(struct in_device *in_dev)
2809{
2810        rt_cache_flush(dev_net(in_dev->dev));
2811}
2812
2813#ifdef CONFIG_SYSCTL
2814static int ip_rt_gc_interval __read_mostly  = 60 * HZ;
2815static int ip_rt_gc_min_interval __read_mostly  = HZ / 2;
2816static int ip_rt_gc_elasticity __read_mostly    = 8;
2817
2818static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
2819                                        void __user *buffer,
2820                                        size_t *lenp, loff_t *ppos)
2821{
2822        struct net *net = (struct net *)__ctl->extra1;
2823
2824        if (write) {
2825                rt_cache_flush(net);
2826                fnhe_genid_bump(net);
2827                return 0;
2828        }
2829
2830        return -EINVAL;
2831}
2832
2833static struct ctl_table ipv4_route_table[] = {
2834        {
2835                .procname       = "gc_thresh",
2836                .data           = &ipv4_dst_ops.gc_thresh,
2837                .maxlen         = sizeof(int),
2838                .mode           = 0644,
2839                .proc_handler   = proc_dointvec,
2840        },
2841        {
2842                .procname       = "max_size",
2843                .data           = &ip_rt_max_size,
2844                .maxlen         = sizeof(int),
2845                .mode           = 0644,
2846                .proc_handler   = proc_dointvec,
2847        },
2848        {
2849                /*  Deprecated. Use gc_min_interval_ms */
2850
2851                .procname       = "gc_min_interval",
2852                .data           = &ip_rt_gc_min_interval,
2853                .maxlen         = sizeof(int),
2854                .mode           = 0644,
2855                .proc_handler   = proc_dointvec_jiffies,
2856        },
2857        {
2858                .procname       = "gc_min_interval_ms",
2859                .data           = &ip_rt_gc_min_interval,
2860                .maxlen         = sizeof(int),
2861                .mode           = 0644,
2862                .proc_handler   = proc_dointvec_ms_jiffies,
2863        },
2864        {
2865                .procname       = "gc_timeout",
2866                .data           = &ip_rt_gc_timeout,
2867                .maxlen         = sizeof(int),
2868                .mode           = 0644,
2869                .proc_handler   = proc_dointvec_jiffies,
2870        },
2871        {
2872                .procname       = "gc_interval",
2873                .data           = &ip_rt_gc_interval,
2874                .maxlen         = sizeof(int),
2875                .mode           = 0644,
2876                .proc_handler   = proc_dointvec_jiffies,
2877        },
2878        {
2879                .procname       = "redirect_load",
2880                .data           = &ip_rt_redirect_load,
2881                .maxlen         = sizeof(int),
2882                .mode           = 0644,
2883                .proc_handler   = proc_dointvec,
2884        },
2885        {
2886                .procname       = "redirect_number",
2887                .data           = &ip_rt_redirect_number,
2888                .maxlen         = sizeof(int),
2889                .mode           = 0644,
2890                .proc_handler   = proc_dointvec,
2891        },
2892        {
2893                .procname       = "redirect_silence",
2894                .data           = &ip_rt_redirect_silence,
2895                .maxlen         = sizeof(int),
2896                .mode           = 0644,
2897                .proc_handler   = proc_dointvec,
2898        },
2899        {
2900                .procname       = "error_cost",
2901                .data           = &ip_rt_error_cost,
2902                .maxlen         = sizeof(int),
2903                .mode           = 0644,
2904                .proc_handler   = proc_dointvec,
2905        },
2906        {
2907                .procname       = "error_burst",
2908                .data           = &ip_rt_error_burst,
2909                .maxlen         = sizeof(int),
2910                .mode           = 0644,
2911                .proc_handler   = proc_dointvec,
2912        },
2913        {
2914                .procname       = "gc_elasticity",
2915                .data           = &ip_rt_gc_elasticity,
2916                .maxlen         = sizeof(int),
2917                .mode           = 0644,
2918                .proc_handler   = proc_dointvec,
2919        },
2920        {
2921                .procname       = "mtu_expires",
2922                .data           = &ip_rt_mtu_expires,
2923                .maxlen         = sizeof(int),
2924                .mode           = 0644,
2925                .proc_handler   = proc_dointvec_jiffies,
2926        },
2927        {
2928                .procname       = "min_pmtu",
2929                .data           = &ip_rt_min_pmtu,
2930                .maxlen         = sizeof(int),
2931                .mode           = 0644,
2932                .proc_handler   = proc_dointvec,
2933        },
2934        {
2935                .procname       = "min_adv_mss",
2936                .data           = &ip_rt_min_advmss,
2937                .maxlen         = sizeof(int),
2938                .mode           = 0644,
2939                .proc_handler   = proc_dointvec,
2940        },
2941        { }
2942};
2943
2944static struct ctl_table ipv4_route_flush_table[] = {
2945        {
2946                .procname       = "flush",
2947                .maxlen         = sizeof(int),
2948                .mode           = 0200,
2949                .proc_handler   = ipv4_sysctl_rtcache_flush,
2950        },
2951        { },
2952};
2953
2954static __net_init int sysctl_route_net_init(struct net *net)
2955{
2956        struct ctl_table *tbl;
2957
2958        tbl = ipv4_route_flush_table;
2959        if (!net_eq(net, &init_net)) {
2960                tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2961                if (!tbl)
2962                        goto err_dup;
2963
2964                /* Don't export sysctls to unprivileged users */
2965                if (net->user_ns != &init_user_ns)
2966                        tbl[0].procname = NULL;
2967        }
2968        tbl[0].extra1 = net;
2969
2970        net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
2971        if (!net->ipv4.route_hdr)
2972                goto err_reg;
2973        return 0;
2974
2975err_reg:
2976        if (tbl != ipv4_route_flush_table)
2977                kfree(tbl);
2978err_dup:
2979        return -ENOMEM;
2980}
2981
2982static __net_exit void sysctl_route_net_exit(struct net *net)
2983{
2984        struct ctl_table *tbl;
2985
2986        tbl = net->ipv4.route_hdr->ctl_table_arg;
2987        unregister_net_sysctl_table(net->ipv4.route_hdr);
2988        BUG_ON(tbl == ipv4_route_flush_table);
2989        kfree(tbl);
2990}
2991
2992static __net_initdata struct pernet_operations sysctl_route_ops = {
2993        .init = sysctl_route_net_init,
2994        .exit = sysctl_route_net_exit,
2995};
2996#endif
2997
2998static __net_init int rt_genid_init(struct net *net)
2999{
3000        atomic_set(&net->ipv4.rt_genid, 0);
3001        atomic_set(&net->fnhe_genid, 0);
3002        atomic_set(&net->ipv4.dev_addr_genid, get_random_int());
3003        return 0;
3004}
3005
3006static __net_initdata struct pernet_operations rt_genid_ops = {
3007        .init = rt_genid_init,
3008};
3009
3010static int __net_init ipv4_inetpeer_init(struct net *net)
3011{
3012        struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3013
3014        if (!bp)
3015                return -ENOMEM;
3016        inet_peer_base_init(bp);
3017        net->ipv4.peers = bp;
3018        return 0;
3019}
3020
3021static void __net_exit ipv4_inetpeer_exit(struct net *net)
3022{
3023        struct inet_peer_base *bp = net->ipv4.peers;
3024
3025        net->ipv4.peers = NULL;
3026        inetpeer_invalidate_tree(bp);
3027        kfree(bp);
3028}
3029
3030static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
3031        .init   =       ipv4_inetpeer_init,
3032        .exit   =       ipv4_inetpeer_exit,
3033};
3034
3035#ifdef CONFIG_IP_ROUTE_CLASSID
3036struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
3037#endif /* CONFIG_IP_ROUTE_CLASSID */
3038
3039int __init ip_rt_init(void)
3040{
3041        int rc = 0;
3042        int cpu;
3043
3044        ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
3045        if (!ip_idents)
3046                panic("IP: failed to allocate ip_idents\n");
3047
3048        prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
3049
3050        ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
3051        if (!ip_tstamps)
3052                panic("IP: failed to allocate ip_tstamps\n");
3053
3054        for_each_possible_cpu(cpu) {
3055                struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
3056
3057                INIT_LIST_HEAD(&ul->head);
3058                spin_lock_init(&ul->lock);
3059        }
3060#ifdef CONFIG_IP_ROUTE_CLASSID
3061        ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
3062        if (!ip_rt_acct)
3063                panic("IP: failed to allocate ip_rt_acct\n");
3064#endif
3065
3066        ipv4_dst_ops.kmem_cachep =
3067                kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
3068                                  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3069
3070        ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3071
3072        if (dst_entries_init(&ipv4_dst_ops) < 0)
3073                panic("IP: failed to allocate ipv4_dst_ops counter\n");
3074
3075        if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3076                panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3077
3078        ipv4_dst_ops.gc_thresh = ~0;
3079        ip_rt_max_size = INT_MAX;
3080
3081        devinet_init();
3082        ip_fib_init();
3083
3084        if (ip_rt_proc_init())
3085                pr_err("Unable to create route proc files\n");
3086#ifdef CONFIG_XFRM
3087        xfrm_init();
3088        xfrm4_init();
3089#endif
3090        rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
3091                      RTNL_FLAG_DOIT_UNLOCKED);
3092
3093#ifdef CONFIG_SYSCTL
3094        register_pernet_subsys(&sysctl_route_ops);
3095#endif
3096        register_pernet_subsys(&rt_genid_ops);
3097        register_pernet_subsys(&ipv4_inetpeer_ops);
3098        return rc;
3099}
3100
3101#ifdef CONFIG_SYSCTL
3102/*
3103 * We really need to sanitize the damn ipv4 init order, then all
3104 * this nonsense will go away.
3105 */
3106void __init ip_static_sysctl_init(void)
3107{
3108        register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
3109}
3110#endif
3111