linux/net/ipv4/netfilter/ip_tables.c
<<
>>
Prefs
   1/*
   2 * Packet matching code.
   3 *
   4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
   5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
   6 * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net>
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License version 2 as
  10 * published by the Free Software Foundation.
  11 */
  12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  13#include <linux/cache.h>
  14#include <linux/capability.h>
  15#include <linux/skbuff.h>
  16#include <linux/kmod.h>
  17#include <linux/vmalloc.h>
  18#include <linux/netdevice.h>
  19#include <linux/module.h>
  20#include <linux/icmp.h>
  21#include <net/ip.h>
  22#include <net/compat.h>
  23#include <asm/uaccess.h>
  24#include <linux/mutex.h>
  25#include <linux/proc_fs.h>
  26#include <linux/err.h>
  27#include <linux/cpumask.h>
  28
  29#include <linux/netfilter/x_tables.h>
  30#include <linux/netfilter_ipv4/ip_tables.h>
  31#include <net/netfilter/nf_log.h>
  32#include "../../netfilter/xt_repldata.h"
  33
  34MODULE_LICENSE("GPL");
  35MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
  36MODULE_DESCRIPTION("IPv4 packet filter");
  37
  38#ifdef CONFIG_NETFILTER_DEBUG
  39#define IP_NF_ASSERT(x)         WARN_ON(!(x))
  40#else
  41#define IP_NF_ASSERT(x)
  42#endif
  43
  44void *ipt_alloc_initial_table(const struct xt_table *info)
  45{
  46        return xt_alloc_initial_table(ipt, IPT);
  47}
  48EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
  49
  50/* Returns whether matches rule or not. */
  51/* Performance critical - called for every packet */
  52static inline bool
  53ip_packet_match(const struct iphdr *ip,
  54                const char *indev,
  55                const char *outdev,
  56                const struct ipt_ip *ipinfo,
  57                int isfrag)
  58{
  59        unsigned long ret;
  60
  61        if (NF_INVF(ipinfo, IPT_INV_SRCIP,
  62                    (ip->saddr & ipinfo->smsk.s_addr) != ipinfo->src.s_addr) ||
  63            NF_INVF(ipinfo, IPT_INV_DSTIP,
  64                    (ip->daddr & ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr))
  65                return false;
  66
  67        ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
  68
  69        if (NF_INVF(ipinfo, IPT_INV_VIA_IN, ret != 0))
  70                return false;
  71
  72        ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
  73
  74        if (NF_INVF(ipinfo, IPT_INV_VIA_OUT, ret != 0))
  75                return false;
  76
  77        /* Check specific protocol */
  78        if (ipinfo->proto &&
  79            NF_INVF(ipinfo, IPT_INV_PROTO, ip->protocol != ipinfo->proto))
  80                return false;
  81
  82        /* If we have a fragment rule but the packet is not a fragment
  83         * then we return zero */
  84        if (NF_INVF(ipinfo, IPT_INV_FRAG,
  85                    (ipinfo->flags & IPT_F_FRAG) && !isfrag))
  86                return false;
  87
  88        return true;
  89}
  90
  91static bool
  92ip_checkentry(const struct ipt_ip *ip)
  93{
  94        if (ip->flags & ~IPT_F_MASK)
  95                return false;
  96        if (ip->invflags & ~IPT_INV_MASK)
  97                return false;
  98        return true;
  99}
 100
 101static unsigned int
 102ipt_error(struct sk_buff *skb, const struct xt_action_param *par)
 103{
 104        net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo);
 105
 106        return NF_DROP;
 107}
 108
 109/* Performance critical */
 110static inline struct ipt_entry *
 111get_entry(const void *base, unsigned int offset)
 112{
 113        return (struct ipt_entry *)(base + offset);
 114}
 115
 116/* All zeroes == unconditional rule. */
 117/* Mildly perf critical (only if packet tracing is on) */
 118static inline bool unconditional(const struct ipt_entry *e)
 119{
 120        static const struct ipt_ip uncond;
 121
 122        return e->target_offset == sizeof(struct ipt_entry) &&
 123               memcmp(&e->ip, &uncond, sizeof(uncond)) == 0;
 124}
 125
 126/* for const-correctness */
 127static inline const struct xt_entry_target *
 128ipt_get_target_c(const struct ipt_entry *e)
 129{
 130        return ipt_get_target((struct ipt_entry *)e);
 131}
 132
 133#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 134static const char *const hooknames[] = {
 135        [NF_INET_PRE_ROUTING]           = "PREROUTING",
 136        [NF_INET_LOCAL_IN]              = "INPUT",
 137        [NF_INET_FORWARD]               = "FORWARD",
 138        [NF_INET_LOCAL_OUT]             = "OUTPUT",
 139        [NF_INET_POST_ROUTING]          = "POSTROUTING",
 140};
 141
 142enum nf_ip_trace_comments {
 143        NF_IP_TRACE_COMMENT_RULE,
 144        NF_IP_TRACE_COMMENT_RETURN,
 145        NF_IP_TRACE_COMMENT_POLICY,
 146};
 147
 148static const char *const comments[] = {
 149        [NF_IP_TRACE_COMMENT_RULE]      = "rule",
 150        [NF_IP_TRACE_COMMENT_RETURN]    = "return",
 151        [NF_IP_TRACE_COMMENT_POLICY]    = "policy",
 152};
 153
 154static struct nf_loginfo trace_loginfo = {
 155        .type = NF_LOG_TYPE_LOG,
 156        .u = {
 157                .log = {
 158                        .level = 4,
 159                        .logflags = NF_LOG_MASK,
 160                },
 161        },
 162};
 163
 164/* Mildly perf critical (only if packet tracing is on) */
 165static inline int
 166get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
 167                      const char *hookname, const char **chainname,
 168                      const char **comment, unsigned int *rulenum)
 169{
 170        const struct xt_standard_target *t = (void *)ipt_get_target_c(s);
 171
 172        if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
 173                /* Head of user chain: ERROR target with chainname */
 174                *chainname = t->target.data;
 175                (*rulenum) = 0;
 176        } else if (s == e) {
 177                (*rulenum)++;
 178
 179                if (unconditional(s) &&
 180                    strcmp(t->target.u.kernel.target->name,
 181                           XT_STANDARD_TARGET) == 0 &&
 182                   t->verdict < 0) {
 183                        /* Tail of chains: STANDARD target (return/policy) */
 184                        *comment = *chainname == hookname
 185                                ? comments[NF_IP_TRACE_COMMENT_POLICY]
 186                                : comments[NF_IP_TRACE_COMMENT_RETURN];
 187                }
 188                return 1;
 189        } else
 190                (*rulenum)++;
 191
 192        return 0;
 193}
 194
 195static void trace_packet(struct net *net,
 196                         const struct sk_buff *skb,
 197                         unsigned int hook,
 198                         const struct net_device *in,
 199                         const struct net_device *out,
 200                         const char *tablename,
 201                         const struct xt_table_info *private,
 202                         const struct ipt_entry *e)
 203{
 204        const struct ipt_entry *root;
 205        const char *hookname, *chainname, *comment;
 206        const struct ipt_entry *iter;
 207        unsigned int rulenum = 0;
 208
 209        root = get_entry(private->entries, private->hook_entry[hook]);
 210
 211        hookname = chainname = hooknames[hook];
 212        comment = comments[NF_IP_TRACE_COMMENT_RULE];
 213
 214        xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
 215                if (get_chainname_rulenum(iter, e, hookname,
 216                    &chainname, &comment, &rulenum) != 0)
 217                        break;
 218
 219        nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo,
 220                     "TRACE: %s:%s:%s:%u ",
 221                     tablename, chainname, comment, rulenum);
 222}
 223#endif
 224
 225static inline
 226struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
 227{
 228        return (void *)entry + entry->next_offset;
 229}
 230
 231/* Returns one of the generic firewall policies, like NF_ACCEPT. */
 232unsigned int
 233ipt_do_table(struct sk_buff *skb,
 234             const struct nf_hook_state *state,
 235             struct xt_table *table)
 236{
 237        unsigned int hook = state->hook;
 238        static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 239        const struct iphdr *ip;
 240        /* Initializing verdict to NF_DROP keeps gcc happy. */
 241        unsigned int verdict = NF_DROP;
 242        const char *indev, *outdev;
 243        const void *table_base;
 244        struct ipt_entry *e, **jumpstack;
 245        unsigned int stackidx, cpu;
 246        const struct xt_table_info *private;
 247        struct xt_action_param acpar;
 248        unsigned int addend;
 249
 250        /* Initialization */
 251        stackidx = 0;
 252        ip = ip_hdr(skb);
 253        indev = state->in ? state->in->name : nulldevname;
 254        outdev = state->out ? state->out->name : nulldevname;
 255        /* We handle fragments by dealing with the first fragment as
 256         * if it was a normal packet.  All other fragments are treated
 257         * normally, except that they will NEVER match rules that ask
 258         * things we don't know, ie. tcp syn flag or ports).  If the
 259         * rule is also a fragment-specific rule, non-fragments won't
 260         * match it. */
 261        acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
 262        acpar.thoff   = ip_hdrlen(skb);
 263        acpar.hotdrop = false;
 264        acpar.net     = state->net;
 265        acpar.in      = state->in;
 266        acpar.out     = state->out;
 267        acpar.family  = NFPROTO_IPV4;
 268        acpar.hooknum = hook;
 269
 270        IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 271        local_bh_disable();
 272        addend = xt_write_recseq_begin();
 273        private = table->private;
 274        cpu        = smp_processor_id();
 275        /*
 276         * Ensure we load private-> members after we've fetched the base
 277         * pointer.
 278         */
 279        smp_read_barrier_depends();
 280        table_base = private->entries;
 281        jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
 282
 283        /* Switch to alternate jumpstack if we're being invoked via TEE.
 284         * TEE issues XT_CONTINUE verdict on original skb so we must not
 285         * clobber the jumpstack.
 286         *
 287         * For recursion via REJECT or SYNPROXY the stack will be clobbered
 288         * but it is no problem since absolute verdict is issued by these.
 289         */
 290        if (static_key_false(&xt_tee_enabled))
 291                jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
 292
 293        e = get_entry(table_base, private->hook_entry[hook]);
 294
 295        do {
 296                const struct xt_entry_target *t;
 297                const struct xt_entry_match *ematch;
 298                struct xt_counters *counter;
 299
 300                IP_NF_ASSERT(e);
 301                if (!ip_packet_match(ip, indev, outdev,
 302                    &e->ip, acpar.fragoff)) {
 303 no_match:
 304                        e = ipt_next_entry(e);
 305                        continue;
 306                }
 307
 308                xt_ematch_foreach(ematch, e) {
 309                        acpar.match     = ematch->u.kernel.match;
 310                        acpar.matchinfo = ematch->data;
 311                        if (!acpar.match->match(skb, &acpar))
 312                                goto no_match;
 313                }
 314
 315                counter = xt_get_this_cpu_counter(&e->counters);
 316                ADD_COUNTER(*counter, skb->len, 1);
 317
 318                t = ipt_get_target(e);
 319                IP_NF_ASSERT(t->u.kernel.target);
 320
 321#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 322                /* The packet is traced: log it */
 323                if (unlikely(skb->nf_trace))
 324                        trace_packet(state->net, skb, hook, state->in,
 325                                     state->out, table->name, private, e);
 326#endif
 327                /* Standard target? */
 328                if (!t->u.kernel.target->target) {
 329                        int v;
 330
 331                        v = ((struct xt_standard_target *)t)->verdict;
 332                        if (v < 0) {
 333                                /* Pop from stack? */
 334                                if (v != XT_RETURN) {
 335                                        verdict = (unsigned int)(-v) - 1;
 336                                        break;
 337                                }
 338                                if (stackidx == 0) {
 339                                        e = get_entry(table_base,
 340                                            private->underflow[hook]);
 341                                } else {
 342                                        e = jumpstack[--stackidx];
 343                                        e = ipt_next_entry(e);
 344                                }
 345                                continue;
 346                        }
 347                        if (table_base + v != ipt_next_entry(e) &&
 348                            !(e->ip.flags & IPT_F_GOTO))
 349                                jumpstack[stackidx++] = e;
 350
 351                        e = get_entry(table_base, v);
 352                        continue;
 353                }
 354
 355                acpar.target   = t->u.kernel.target;
 356                acpar.targinfo = t->data;
 357
 358                verdict = t->u.kernel.target->target(skb, &acpar);
 359                /* Target might have changed stuff. */
 360                ip = ip_hdr(skb);
 361                if (verdict == XT_CONTINUE)
 362                        e = ipt_next_entry(e);
 363                else
 364                        /* Verdict */
 365                        break;
 366        } while (!acpar.hotdrop);
 367
 368        xt_write_recseq_end(addend);
 369        local_bh_enable();
 370
 371        if (acpar.hotdrop)
 372                return NF_DROP;
 373        else return verdict;
 374}
 375
 376/* Figures out from what hook each rule can be called: returns 0 if
 377   there are loops.  Puts hook bitmask in comefrom. */
 378static int
 379mark_source_chains(const struct xt_table_info *newinfo,
 380                   unsigned int valid_hooks, void *entry0,
 381                   unsigned int *offsets)
 382{
 383        unsigned int hook;
 384
 385        /* No recursion; use packet counter to save back ptrs (reset
 386           to 0 as we leave), and comefrom to save source hook bitmask */
 387        for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
 388                unsigned int pos = newinfo->hook_entry[hook];
 389                struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
 390
 391                if (!(valid_hooks & (1 << hook)))
 392                        continue;
 393
 394                /* Set initial back pointer. */
 395                e->counters.pcnt = pos;
 396
 397                for (;;) {
 398                        const struct xt_standard_target *t
 399                                = (void *)ipt_get_target_c(e);
 400                        int visited = e->comefrom & (1 << hook);
 401
 402                        if (e->comefrom & (1 << NF_INET_NUMHOOKS))
 403                                return 0;
 404
 405                        e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
 406
 407                        /* Unconditional return/END. */
 408                        if ((unconditional(e) &&
 409                             (strcmp(t->target.u.user.name,
 410                                     XT_STANDARD_TARGET) == 0) &&
 411                             t->verdict < 0) || visited) {
 412                                unsigned int oldpos, size;
 413
 414                                if ((strcmp(t->target.u.user.name,
 415                                            XT_STANDARD_TARGET) == 0) &&
 416                                    t->verdict < -NF_MAX_VERDICT - 1)
 417                                        return 0;
 418
 419                                /* Return: backtrack through the last
 420                                   big jump. */
 421                                do {
 422                                        e->comefrom ^= (1<<NF_INET_NUMHOOKS);
 423                                        oldpos = pos;
 424                                        pos = e->counters.pcnt;
 425                                        e->counters.pcnt = 0;
 426
 427                                        /* We're at the start. */
 428                                        if (pos == oldpos)
 429                                                goto next;
 430
 431                                        e = (struct ipt_entry *)
 432                                                (entry0 + pos);
 433                                } while (oldpos == pos + e->next_offset);
 434
 435                                /* Move along one */
 436                                size = e->next_offset;
 437                                e = (struct ipt_entry *)
 438                                        (entry0 + pos + size);
 439                                if (pos + size >= newinfo->size)
 440                                        return 0;
 441                                e->counters.pcnt = pos;
 442                                pos += size;
 443                        } else {
 444                                int newpos = t->verdict;
 445
 446                                if (strcmp(t->target.u.user.name,
 447                                           XT_STANDARD_TARGET) == 0 &&
 448                                    newpos >= 0) {
 449                                        /* This a jump; chase it. */
 450                                        if (!xt_find_jump_offset(offsets, newpos,
 451                                                                 newinfo->number))
 452                                                return 0;
 453                                        e = (struct ipt_entry *)
 454                                                (entry0 + newpos);
 455                                } else {
 456                                        /* ... this is a fallthru */
 457                                        newpos = pos + e->next_offset;
 458                                        if (newpos >= newinfo->size)
 459                                                return 0;
 460                                }
 461                                e = (struct ipt_entry *)
 462                                        (entry0 + newpos);
 463                                e->counters.pcnt = pos;
 464                                pos = newpos;
 465                        }
 466                }
 467next:           ;
 468        }
 469        return 1;
 470}
 471
 472static void cleanup_match(struct xt_entry_match *m, struct net *net)
 473{
 474        struct xt_mtdtor_param par;
 475
 476        par.net       = net;
 477        par.match     = m->u.kernel.match;
 478        par.matchinfo = m->data;
 479        par.family    = NFPROTO_IPV4;
 480        if (par.match->destroy != NULL)
 481                par.match->destroy(&par);
 482        module_put(par.match->me);
 483}
 484
 485static int
 486check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
 487{
 488        const struct ipt_ip *ip = par->entryinfo;
 489
 490        par->match     = m->u.kernel.match;
 491        par->matchinfo = m->data;
 492
 493        return xt_check_match(par, m->u.match_size - sizeof(*m),
 494                              ip->proto, ip->invflags & IPT_INV_PROTO);
 495}
 496
 497static int
 498find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
 499{
 500        struct xt_match *match;
 501        int ret;
 502
 503        match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
 504                                      m->u.user.revision);
 505        if (IS_ERR(match))
 506                return PTR_ERR(match);
 507        m->u.kernel.match = match;
 508
 509        ret = check_match(m, par);
 510        if (ret)
 511                goto err;
 512
 513        return 0;
 514err:
 515        module_put(m->u.kernel.match->me);
 516        return ret;
 517}
 518
 519static int check_target(struct ipt_entry *e, struct net *net, const char *name)
 520{
 521        struct xt_entry_target *t = ipt_get_target(e);
 522        struct xt_tgchk_param par = {
 523                .net       = net,
 524                .table     = name,
 525                .entryinfo = e,
 526                .target    = t->u.kernel.target,
 527                .targinfo  = t->data,
 528                .hook_mask = e->comefrom,
 529                .family    = NFPROTO_IPV4,
 530        };
 531
 532        return xt_check_target(&par, t->u.target_size - sizeof(*t),
 533                               e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
 534}
 535
 536static int
 537find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 538                 unsigned int size)
 539{
 540        struct xt_entry_target *t;
 541        struct xt_target *target;
 542        int ret;
 543        unsigned int j;
 544        struct xt_mtchk_param mtpar;
 545        struct xt_entry_match *ematch;
 546        unsigned long pcnt;
 547
 548        pcnt = xt_percpu_counter_alloc();
 549        if (IS_ERR_VALUE(pcnt))
 550                return -ENOMEM;
 551        e->counters.pcnt = pcnt;
 552
 553        j = 0;
 554        mtpar.net       = net;
 555        mtpar.table     = name;
 556        mtpar.entryinfo = &e->ip;
 557        mtpar.hook_mask = e->comefrom;
 558        mtpar.family    = NFPROTO_IPV4;
 559        xt_ematch_foreach(ematch, e) {
 560                ret = find_check_match(ematch, &mtpar);
 561                if (ret != 0)
 562                        goto cleanup_matches;
 563                ++j;
 564        }
 565
 566        t = ipt_get_target(e);
 567        target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
 568                                        t->u.user.revision);
 569        if (IS_ERR(target)) {
 570                ret = PTR_ERR(target);
 571                goto cleanup_matches;
 572        }
 573        t->u.kernel.target = target;
 574
 575        ret = check_target(e, net, name);
 576        if (ret)
 577                goto err;
 578
 579        return 0;
 580 err:
 581        module_put(t->u.kernel.target->me);
 582 cleanup_matches:
 583        xt_ematch_foreach(ematch, e) {
 584                if (j-- == 0)
 585                        break;
 586                cleanup_match(ematch, net);
 587        }
 588
 589        xt_percpu_counter_free(e->counters.pcnt);
 590
 591        return ret;
 592}
 593
 594static bool check_underflow(const struct ipt_entry *e)
 595{
 596        const struct xt_entry_target *t;
 597        unsigned int verdict;
 598
 599        if (!unconditional(e))
 600                return false;
 601        t = ipt_get_target_c(e);
 602        if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
 603                return false;
 604        verdict = ((struct xt_standard_target *)t)->verdict;
 605        verdict = -verdict - 1;
 606        return verdict == NF_DROP || verdict == NF_ACCEPT;
 607}
 608
 609static int
 610check_entry_size_and_hooks(struct ipt_entry *e,
 611                           struct xt_table_info *newinfo,
 612                           const unsigned char *base,
 613                           const unsigned char *limit,
 614                           const unsigned int *hook_entries,
 615                           const unsigned int *underflows,
 616                           unsigned int valid_hooks)
 617{
 618        unsigned int h;
 619        int err;
 620
 621        if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
 622            (unsigned char *)e + sizeof(struct ipt_entry) >= limit ||
 623            (unsigned char *)e + e->next_offset > limit)
 624                return -EINVAL;
 625
 626        if (e->next_offset
 627            < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target))
 628                return -EINVAL;
 629
 630        if (!ip_checkentry(&e->ip))
 631                return -EINVAL;
 632
 633        err = xt_check_entry_offsets(e, e->elems, e->target_offset,
 634                                     e->next_offset);
 635        if (err)
 636                return err;
 637
 638        /* Check hooks & underflows */
 639        for (h = 0; h < NF_INET_NUMHOOKS; h++) {
 640                if (!(valid_hooks & (1 << h)))
 641                        continue;
 642                if ((unsigned char *)e - base == hook_entries[h])
 643                        newinfo->hook_entry[h] = hook_entries[h];
 644                if ((unsigned char *)e - base == underflows[h]) {
 645                        if (!check_underflow(e))
 646                                return -EINVAL;
 647
 648                        newinfo->underflow[h] = underflows[h];
 649                }
 650        }
 651
 652        /* Clear counters and comefrom */
 653        e->counters = ((struct xt_counters) { 0, 0 });
 654        e->comefrom = 0;
 655        return 0;
 656}
 657
 658static void
 659cleanup_entry(struct ipt_entry *e, struct net *net)
 660{
 661        struct xt_tgdtor_param par;
 662        struct xt_entry_target *t;
 663        struct xt_entry_match *ematch;
 664
 665        /* Cleanup all matches */
 666        xt_ematch_foreach(ematch, e)
 667                cleanup_match(ematch, net);
 668        t = ipt_get_target(e);
 669
 670        par.net      = net;
 671        par.target   = t->u.kernel.target;
 672        par.targinfo = t->data;
 673        par.family   = NFPROTO_IPV4;
 674        if (par.target->destroy != NULL)
 675                par.target->destroy(&par);
 676        module_put(par.target->me);
 677        xt_percpu_counter_free(e->counters.pcnt);
 678}
 679
 680/* Checks and translates the user-supplied table segment (held in
 681   newinfo) */
 682static int
 683translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 684                const struct ipt_replace *repl)
 685{
 686        struct ipt_entry *iter;
 687        unsigned int *offsets;
 688        unsigned int i;
 689        int ret = 0;
 690
 691        newinfo->size = repl->size;
 692        newinfo->number = repl->num_entries;
 693
 694        /* Init all hooks to impossible value. */
 695        for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 696                newinfo->hook_entry[i] = 0xFFFFFFFF;
 697                newinfo->underflow[i] = 0xFFFFFFFF;
 698        }
 699
 700        offsets = xt_alloc_entry_offsets(newinfo->number);
 701        if (!offsets)
 702                return -ENOMEM;
 703        i = 0;
 704        /* Walk through entries, checking offsets. */
 705        xt_entry_foreach(iter, entry0, newinfo->size) {
 706                ret = check_entry_size_and_hooks(iter, newinfo, entry0,
 707                                                 entry0 + repl->size,
 708                                                 repl->hook_entry,
 709                                                 repl->underflow,
 710                                                 repl->valid_hooks);
 711                if (ret != 0)
 712                        goto out_free;
 713                if (i < repl->num_entries)
 714                        offsets[i] = (void *)iter - entry0;
 715                ++i;
 716                if (strcmp(ipt_get_target(iter)->u.user.name,
 717                    XT_ERROR_TARGET) == 0)
 718                        ++newinfo->stacksize;
 719        }
 720
 721        ret = -EINVAL;
 722        if (i != repl->num_entries)
 723                goto out_free;
 724
 725        /* Check hooks all assigned */
 726        for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 727                /* Only hooks which are valid */
 728                if (!(repl->valid_hooks & (1 << i)))
 729                        continue;
 730                if (newinfo->hook_entry[i] == 0xFFFFFFFF)
 731                        goto out_free;
 732                if (newinfo->underflow[i] == 0xFFFFFFFF)
 733                        goto out_free;
 734        }
 735
 736        if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
 737                ret = -ELOOP;
 738                goto out_free;
 739        }
 740        kvfree(offsets);
 741
 742        /* Finally, each sanity check must pass */
 743        i = 0;
 744        xt_entry_foreach(iter, entry0, newinfo->size) {
 745                ret = find_check_entry(iter, net, repl->name, repl->size);
 746                if (ret != 0)
 747                        break;
 748                ++i;
 749        }
 750
 751        if (ret != 0) {
 752                xt_entry_foreach(iter, entry0, newinfo->size) {
 753                        if (i-- == 0)
 754                                break;
 755                        cleanup_entry(iter, net);
 756                }
 757                return ret;
 758        }
 759
 760        return ret;
 761 out_free:
 762        kvfree(offsets);
 763        return ret;
 764}
 765
 766static void
 767get_counters(const struct xt_table_info *t,
 768             struct xt_counters counters[])
 769{
 770        struct ipt_entry *iter;
 771        unsigned int cpu;
 772        unsigned int i;
 773
 774        for_each_possible_cpu(cpu) {
 775                seqcount_t *s = &per_cpu(xt_recseq, cpu);
 776
 777                i = 0;
 778                xt_entry_foreach(iter, t->entries, t->size) {
 779                        struct xt_counters *tmp;
 780                        u64 bcnt, pcnt;
 781                        unsigned int start;
 782
 783                        tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
 784                        do {
 785                                start = read_seqcount_begin(s);
 786                                bcnt = tmp->bcnt;
 787                                pcnt = tmp->pcnt;
 788                        } while (read_seqcount_retry(s, start));
 789
 790                        ADD_COUNTER(counters[i], bcnt, pcnt);
 791                        ++i; /* macro does multi eval of i */
 792                }
 793        }
 794}
 795
 796static struct xt_counters *alloc_counters(const struct xt_table *table)
 797{
 798        unsigned int countersize;
 799        struct xt_counters *counters;
 800        const struct xt_table_info *private = table->private;
 801
 802        /* We need atomic snapshot of counters: rest doesn't change
 803           (other than comefrom, which userspace doesn't care
 804           about). */
 805        countersize = sizeof(struct xt_counters) * private->number;
 806        counters = vzalloc(countersize);
 807
 808        if (counters == NULL)
 809                return ERR_PTR(-ENOMEM);
 810
 811        get_counters(private, counters);
 812
 813        return counters;
 814}
 815
 816static int
 817copy_entries_to_user(unsigned int total_size,
 818                     const struct xt_table *table,
 819                     void __user *userptr)
 820{
 821        unsigned int off, num;
 822        const struct ipt_entry *e;
 823        struct xt_counters *counters;
 824        const struct xt_table_info *private = table->private;
 825        int ret = 0;
 826        const void *loc_cpu_entry;
 827
 828        counters = alloc_counters(table);
 829        if (IS_ERR(counters))
 830                return PTR_ERR(counters);
 831
 832        loc_cpu_entry = private->entries;
 833        if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
 834                ret = -EFAULT;
 835                goto free_counters;
 836        }
 837
 838        /* FIXME: use iterator macros --RR */
 839        /* ... then go back and fix counters and names */
 840        for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
 841                unsigned int i;
 842                const struct xt_entry_match *m;
 843                const struct xt_entry_target *t;
 844
 845                e = (struct ipt_entry *)(loc_cpu_entry + off);
 846                if (copy_to_user(userptr + off
 847                                 + offsetof(struct ipt_entry, counters),
 848                                 &counters[num],
 849                                 sizeof(counters[num])) != 0) {
 850                        ret = -EFAULT;
 851                        goto free_counters;
 852                }
 853
 854                for (i = sizeof(struct ipt_entry);
 855                     i < e->target_offset;
 856                     i += m->u.match_size) {
 857                        m = (void *)e + i;
 858
 859                        if (copy_to_user(userptr + off + i
 860                                         + offsetof(struct xt_entry_match,
 861                                                    u.user.name),
 862                                         m->u.kernel.match->name,
 863                                         strlen(m->u.kernel.match->name)+1)
 864                            != 0) {
 865                                ret = -EFAULT;
 866                                goto free_counters;
 867                        }
 868                }
 869
 870                t = ipt_get_target_c(e);
 871                if (copy_to_user(userptr + off + e->target_offset
 872                                 + offsetof(struct xt_entry_target,
 873                                            u.user.name),
 874                                 t->u.kernel.target->name,
 875                                 strlen(t->u.kernel.target->name)+1) != 0) {
 876                        ret = -EFAULT;
 877                        goto free_counters;
 878                }
 879        }
 880
 881 free_counters:
 882        vfree(counters);
 883        return ret;
 884}
 885
 886#ifdef CONFIG_COMPAT
 887static void compat_standard_from_user(void *dst, const void *src)
 888{
 889        int v = *(compat_int_t *)src;
 890
 891        if (v > 0)
 892                v += xt_compat_calc_jump(AF_INET, v);
 893        memcpy(dst, &v, sizeof(v));
 894}
 895
 896static int compat_standard_to_user(void __user *dst, const void *src)
 897{
 898        compat_int_t cv = *(int *)src;
 899
 900        if (cv > 0)
 901                cv -= xt_compat_calc_jump(AF_INET, cv);
 902        return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
 903}
 904
 905static int compat_calc_entry(const struct ipt_entry *e,
 906                             const struct xt_table_info *info,
 907                             const void *base, struct xt_table_info *newinfo)
 908{
 909        const struct xt_entry_match *ematch;
 910        const struct xt_entry_target *t;
 911        unsigned int entry_offset;
 912        int off, i, ret;
 913
 914        off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
 915        entry_offset = (void *)e - base;
 916        xt_ematch_foreach(ematch, e)
 917                off += xt_compat_match_offset(ematch->u.kernel.match);
 918        t = ipt_get_target_c(e);
 919        off += xt_compat_target_offset(t->u.kernel.target);
 920        newinfo->size -= off;
 921        ret = xt_compat_add_offset(AF_INET, entry_offset, off);
 922        if (ret)
 923                return ret;
 924
 925        for (i = 0; i < NF_INET_NUMHOOKS; i++) {
 926                if (info->hook_entry[i] &&
 927                    (e < (struct ipt_entry *)(base + info->hook_entry[i])))
 928                        newinfo->hook_entry[i] -= off;
 929                if (info->underflow[i] &&
 930                    (e < (struct ipt_entry *)(base + info->underflow[i])))
 931                        newinfo->underflow[i] -= off;
 932        }
 933        return 0;
 934}
 935
 936static int compat_table_info(const struct xt_table_info *info,
 937                             struct xt_table_info *newinfo)
 938{
 939        struct ipt_entry *iter;
 940        const void *loc_cpu_entry;
 941        int ret;
 942
 943        if (!newinfo || !info)
 944                return -EINVAL;
 945
 946        /* we dont care about newinfo->entries */
 947        memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
 948        newinfo->initial_entries = 0;
 949        loc_cpu_entry = info->entries;
 950        xt_compat_init_offsets(AF_INET, info->number);
 951        xt_entry_foreach(iter, loc_cpu_entry, info->size) {
 952                ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
 953                if (ret != 0)
 954                        return ret;
 955        }
 956        return 0;
 957}
 958#endif
 959
 960static int get_info(struct net *net, void __user *user,
 961                    const int *len, int compat)
 962{
 963        char name[XT_TABLE_MAXNAMELEN];
 964        struct xt_table *t;
 965        int ret;
 966
 967        if (*len != sizeof(struct ipt_getinfo))
 968                return -EINVAL;
 969
 970        if (copy_from_user(name, user, sizeof(name)) != 0)
 971                return -EFAULT;
 972
 973        name[XT_TABLE_MAXNAMELEN-1] = '\0';
 974#ifdef CONFIG_COMPAT
 975        if (compat)
 976                xt_compat_lock(AF_INET);
 977#endif
 978        t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
 979                                    "iptable_%s", name);
 980        if (!IS_ERR_OR_NULL(t)) {
 981                struct ipt_getinfo info;
 982                const struct xt_table_info *private = t->private;
 983#ifdef CONFIG_COMPAT
 984                struct xt_table_info tmp;
 985
 986                if (compat) {
 987                        ret = compat_table_info(private, &tmp);
 988                        xt_compat_flush_offsets(AF_INET);
 989                        private = &tmp;
 990                }
 991#endif
 992                memset(&info, 0, sizeof(info));
 993                info.valid_hooks = t->valid_hooks;
 994                memcpy(info.hook_entry, private->hook_entry,
 995                       sizeof(info.hook_entry));
 996                memcpy(info.underflow, private->underflow,
 997                       sizeof(info.underflow));
 998                info.num_entries = private->number;
 999                info.size = private->size;
1000                strcpy(info.name, name);
1001
1002                if (copy_to_user(user, &info, *len) != 0)
1003                        ret = -EFAULT;
1004                else
1005                        ret = 0;
1006
1007                xt_table_unlock(t);
1008                module_put(t->me);
1009        } else
1010                ret = t ? PTR_ERR(t) : -ENOENT;
1011#ifdef CONFIG_COMPAT
1012        if (compat)
1013                xt_compat_unlock(AF_INET);
1014#endif
1015        return ret;
1016}
1017
1018static int
1019get_entries(struct net *net, struct ipt_get_entries __user *uptr,
1020            const int *len)
1021{
1022        int ret;
1023        struct ipt_get_entries get;
1024        struct xt_table *t;
1025
1026        if (*len < sizeof(get))
1027                return -EINVAL;
1028        if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1029                return -EFAULT;
1030        if (*len != sizeof(struct ipt_get_entries) + get.size)
1031                return -EINVAL;
1032        get.name[sizeof(get.name) - 1] = '\0';
1033
1034        t = xt_find_table_lock(net, AF_INET, get.name);
1035        if (!IS_ERR_OR_NULL(t)) {
1036                const struct xt_table_info *private = t->private;
1037                if (get.size == private->size)
1038                        ret = copy_entries_to_user(private->size,
1039                                                   t, uptr->entrytable);
1040                else
1041                        ret = -EAGAIN;
1042
1043                module_put(t->me);
1044                xt_table_unlock(t);
1045        } else
1046                ret = t ? PTR_ERR(t) : -ENOENT;
1047
1048        return ret;
1049}
1050
1051static int
1052__do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1053             struct xt_table_info *newinfo, unsigned int num_counters,
1054             void __user *counters_ptr)
1055{
1056        int ret;
1057        struct xt_table *t;
1058        struct xt_table_info *oldinfo;
1059        struct xt_counters *counters;
1060        struct ipt_entry *iter;
1061
1062        ret = 0;
1063        counters = vzalloc(num_counters * sizeof(struct xt_counters));
1064        if (!counters) {
1065                ret = -ENOMEM;
1066                goto out;
1067        }
1068
1069        t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1070                                    "iptable_%s", name);
1071        if (IS_ERR_OR_NULL(t)) {
1072                ret = t ? PTR_ERR(t) : -ENOENT;
1073                goto free_newinfo_counters_untrans;
1074        }
1075
1076        /* You lied! */
1077        if (valid_hooks != t->valid_hooks) {
1078                ret = -EINVAL;
1079                goto put_module;
1080        }
1081
1082        oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
1083        if (!oldinfo)
1084                goto put_module;
1085
1086        /* Update module usage count based on number of rules */
1087        if ((oldinfo->number > oldinfo->initial_entries) ||
1088            (newinfo->number <= oldinfo->initial_entries))
1089                module_put(t->me);
1090        if ((oldinfo->number > oldinfo->initial_entries) &&
1091            (newinfo->number <= oldinfo->initial_entries))
1092                module_put(t->me);
1093
1094        /* Get the old counters, and synchronize with replace */
1095        get_counters(oldinfo, counters);
1096
1097        /* Decrease module usage counts and free resource */
1098        xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
1099                cleanup_entry(iter, net);
1100
1101        xt_free_table_info(oldinfo);
1102        if (copy_to_user(counters_ptr, counters,
1103                         sizeof(struct xt_counters) * num_counters) != 0) {
1104                /* Silent error, can't fail, new table is already in place */
1105                net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
1106        }
1107        vfree(counters);
1108        xt_table_unlock(t);
1109        return ret;
1110
1111 put_module:
1112        module_put(t->me);
1113        xt_table_unlock(t);
1114 free_newinfo_counters_untrans:
1115        vfree(counters);
1116 out:
1117        return ret;
1118}
1119
1120static int
1121do_replace(struct net *net, const void __user *user, unsigned int len)
1122{
1123        int ret;
1124        struct ipt_replace tmp;
1125        struct xt_table_info *newinfo;
1126        void *loc_cpu_entry;
1127        struct ipt_entry *iter;
1128
1129        if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1130                return -EFAULT;
1131
1132        /* overflow check */
1133        if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1134                return -ENOMEM;
1135        if (tmp.num_counters == 0)
1136                return -EINVAL;
1137
1138        tmp.name[sizeof(tmp.name)-1] = 0;
1139
1140        newinfo = xt_alloc_table_info(tmp.size);
1141        if (!newinfo)
1142                return -ENOMEM;
1143
1144        loc_cpu_entry = newinfo->entries;
1145        if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1146                           tmp.size) != 0) {
1147                ret = -EFAULT;
1148                goto free_newinfo;
1149        }
1150
1151        ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
1152        if (ret != 0)
1153                goto free_newinfo;
1154
1155        ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1156                           tmp.num_counters, tmp.counters);
1157        if (ret)
1158                goto free_newinfo_untrans;
1159        return 0;
1160
1161 free_newinfo_untrans:
1162        xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1163                cleanup_entry(iter, net);
1164 free_newinfo:
1165        xt_free_table_info(newinfo);
1166        return ret;
1167}
1168
1169static int
1170do_add_counters(struct net *net, const void __user *user,
1171                unsigned int len, int compat)
1172{
1173        unsigned int i;
1174        struct xt_counters_info tmp;
1175        struct xt_counters *paddc;
1176        struct xt_table *t;
1177        const struct xt_table_info *private;
1178        int ret = 0;
1179        struct ipt_entry *iter;
1180        unsigned int addend;
1181
1182        paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
1183        if (IS_ERR(paddc))
1184                return PTR_ERR(paddc);
1185
1186        t = xt_find_table_lock(net, AF_INET, tmp.name);
1187        if (IS_ERR_OR_NULL(t)) {
1188                ret = t ? PTR_ERR(t) : -ENOENT;
1189                goto free;
1190        }
1191
1192        local_bh_disable();
1193        private = t->private;
1194        if (private->number != tmp.num_counters) {
1195                ret = -EINVAL;
1196                goto unlock_up_free;
1197        }
1198
1199        i = 0;
1200        addend = xt_write_recseq_begin();
1201        xt_entry_foreach(iter, private->entries, private->size) {
1202                struct xt_counters *tmp;
1203
1204                tmp = xt_get_this_cpu_counter(&iter->counters);
1205                ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
1206                ++i;
1207        }
1208        xt_write_recseq_end(addend);
1209 unlock_up_free:
1210        local_bh_enable();
1211        xt_table_unlock(t);
1212        module_put(t->me);
1213 free:
1214        vfree(paddc);
1215
1216        return ret;
1217}
1218
1219#ifdef CONFIG_COMPAT
1220struct compat_ipt_replace {
1221        char                    name[XT_TABLE_MAXNAMELEN];
1222        u32                     valid_hooks;
1223        u32                     num_entries;
1224        u32                     size;
1225        u32                     hook_entry[NF_INET_NUMHOOKS];
1226        u32                     underflow[NF_INET_NUMHOOKS];
1227        u32                     num_counters;
1228        compat_uptr_t           counters;       /* struct xt_counters * */
1229        struct compat_ipt_entry entries[0];
1230};
1231
1232static int
1233compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1234                          unsigned int *size, struct xt_counters *counters,
1235                          unsigned int i)
1236{
1237        struct xt_entry_target *t;
1238        struct compat_ipt_entry __user *ce;
1239        u_int16_t target_offset, next_offset;
1240        compat_uint_t origsize;
1241        const struct xt_entry_match *ematch;
1242        int ret = 0;
1243
1244        origsize = *size;
1245        ce = (struct compat_ipt_entry __user *)*dstptr;
1246        if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 ||
1247            copy_to_user(&ce->counters, &counters[i],
1248            sizeof(counters[i])) != 0)
1249                return -EFAULT;
1250
1251        *dstptr += sizeof(struct compat_ipt_entry);
1252        *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1253
1254        xt_ematch_foreach(ematch, e) {
1255                ret = xt_compat_match_to_user(ematch, dstptr, size);
1256                if (ret != 0)
1257                        return ret;
1258        }
1259        target_offset = e->target_offset - (origsize - *size);
1260        t = ipt_get_target(e);
1261        ret = xt_compat_target_to_user(t, dstptr, size);
1262        if (ret)
1263                return ret;
1264        next_offset = e->next_offset - (origsize - *size);
1265        if (put_user(target_offset, &ce->target_offset) != 0 ||
1266            put_user(next_offset, &ce->next_offset) != 0)
1267                return -EFAULT;
1268        return 0;
1269}
1270
1271static int
1272compat_find_calc_match(struct xt_entry_match *m,
1273                       const struct ipt_ip *ip,
1274                       int *size)
1275{
1276        struct xt_match *match;
1277
1278        match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
1279                                      m->u.user.revision);
1280        if (IS_ERR(match))
1281                return PTR_ERR(match);
1282
1283        m->u.kernel.match = match;
1284        *size += xt_compat_match_offset(match);
1285        return 0;
1286}
1287
1288static void compat_release_entry(struct compat_ipt_entry *e)
1289{
1290        struct xt_entry_target *t;
1291        struct xt_entry_match *ematch;
1292
1293        /* Cleanup all matches */
1294        xt_ematch_foreach(ematch, e)
1295                module_put(ematch->u.kernel.match->me);
1296        t = compat_ipt_get_target(e);
1297        module_put(t->u.kernel.target->me);
1298}
1299
1300static int
1301check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1302                                  struct xt_table_info *newinfo,
1303                                  unsigned int *size,
1304                                  const unsigned char *base,
1305                                  const unsigned char *limit)
1306{
1307        struct xt_entry_match *ematch;
1308        struct xt_entry_target *t;
1309        struct xt_target *target;
1310        unsigned int entry_offset;
1311        unsigned int j;
1312        int ret, off;
1313
1314        if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
1315            (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit ||
1316            (unsigned char *)e + e->next_offset > limit)
1317                return -EINVAL;
1318
1319        if (e->next_offset < sizeof(struct compat_ipt_entry) +
1320                             sizeof(struct compat_xt_entry_target))
1321                return -EINVAL;
1322
1323        if (!ip_checkentry(&e->ip))
1324                return -EINVAL;
1325
1326        ret = xt_compat_check_entry_offsets(e, e->elems,
1327                                            e->target_offset, e->next_offset);
1328        if (ret)
1329                return ret;
1330
1331        off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1332        entry_offset = (void *)e - (void *)base;
1333        j = 0;
1334        xt_ematch_foreach(ematch, e) {
1335                ret = compat_find_calc_match(ematch, &e->ip, &off);
1336                if (ret != 0)
1337                        goto release_matches;
1338                ++j;
1339        }
1340
1341        t = compat_ipt_get_target(e);
1342        target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
1343                                        t->u.user.revision);
1344        if (IS_ERR(target)) {
1345                ret = PTR_ERR(target);
1346                goto release_matches;
1347        }
1348        t->u.kernel.target = target;
1349
1350        off += xt_compat_target_offset(target);
1351        *size += off;
1352        ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1353        if (ret)
1354                goto out;
1355
1356        return 0;
1357
1358out:
1359        module_put(t->u.kernel.target->me);
1360release_matches:
1361        xt_ematch_foreach(ematch, e) {
1362                if (j-- == 0)
1363                        break;
1364                module_put(ematch->u.kernel.match->me);
1365        }
1366        return ret;
1367}
1368
1369static void
1370compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1371                            unsigned int *size,
1372                            struct xt_table_info *newinfo, unsigned char *base)
1373{
1374        struct xt_entry_target *t;
1375        struct xt_target *target;
1376        struct ipt_entry *de;
1377        unsigned int origsize;
1378        int h;
1379        struct xt_entry_match *ematch;
1380
1381        origsize = *size;
1382        de = (struct ipt_entry *)*dstptr;
1383        memcpy(de, e, sizeof(struct ipt_entry));
1384        memcpy(&de->counters, &e->counters, sizeof(e->counters));
1385
1386        *dstptr += sizeof(struct ipt_entry);
1387        *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1388
1389        xt_ematch_foreach(ematch, e)
1390                xt_compat_match_from_user(ematch, dstptr, size);
1391
1392        de->target_offset = e->target_offset - (origsize - *size);
1393        t = compat_ipt_get_target(e);
1394        target = t->u.kernel.target;
1395        xt_compat_target_from_user(t, dstptr, size);
1396
1397        de->next_offset = e->next_offset - (origsize - *size);
1398
1399        for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1400                if ((unsigned char *)de - base < newinfo->hook_entry[h])
1401                        newinfo->hook_entry[h] -= origsize - *size;
1402                if ((unsigned char *)de - base < newinfo->underflow[h])
1403                        newinfo->underflow[h] -= origsize - *size;
1404        }
1405}
1406
1407static int
1408translate_compat_table(struct net *net,
1409                       struct xt_table_info **pinfo,
1410                       void **pentry0,
1411                       const struct compat_ipt_replace *compatr)
1412{
1413        unsigned int i, j;
1414        struct xt_table_info *newinfo, *info;
1415        void *pos, *entry0, *entry1;
1416        struct compat_ipt_entry *iter0;
1417        struct ipt_replace repl;
1418        unsigned int size;
1419        int ret;
1420
1421        info = *pinfo;
1422        entry0 = *pentry0;
1423        size = compatr->size;
1424        info->number = compatr->num_entries;
1425
1426        j = 0;
1427        xt_compat_lock(AF_INET);
1428        xt_compat_init_offsets(AF_INET, compatr->num_entries);
1429        /* Walk through entries, checking offsets. */
1430        xt_entry_foreach(iter0, entry0, compatr->size) {
1431                ret = check_compat_entry_size_and_hooks(iter0, info, &size,
1432                                                        entry0,
1433                                                        entry0 + compatr->size);
1434                if (ret != 0)
1435                        goto out_unlock;
1436                ++j;
1437        }
1438
1439        ret = -EINVAL;
1440        if (j != compatr->num_entries)
1441                goto out_unlock;
1442
1443        ret = -ENOMEM;
1444        newinfo = xt_alloc_table_info(size);
1445        if (!newinfo)
1446                goto out_unlock;
1447
1448        newinfo->number = compatr->num_entries;
1449        for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1450                newinfo->hook_entry[i] = compatr->hook_entry[i];
1451                newinfo->underflow[i] = compatr->underflow[i];
1452        }
1453        entry1 = newinfo->entries;
1454        pos = entry1;
1455        size = compatr->size;
1456        xt_entry_foreach(iter0, entry0, compatr->size)
1457                compat_copy_entry_from_user(iter0, &pos, &size,
1458                                            newinfo, entry1);
1459
1460        /* all module references in entry0 are now gone.
1461         * entry1/newinfo contains a 64bit ruleset that looks exactly as
1462         * generated by 64bit userspace.
1463         *
1464         * Call standard translate_table() to validate all hook_entrys,
1465         * underflows, check for loops, etc.
1466         */
1467        xt_compat_flush_offsets(AF_INET);
1468        xt_compat_unlock(AF_INET);
1469
1470        memcpy(&repl, compatr, sizeof(*compatr));
1471
1472        for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1473                repl.hook_entry[i] = newinfo->hook_entry[i];
1474                repl.underflow[i] = newinfo->underflow[i];
1475        }
1476
1477        repl.num_counters = 0;
1478        repl.counters = NULL;
1479        repl.size = newinfo->size;
1480        ret = translate_table(net, newinfo, entry1, &repl);
1481        if (ret)
1482                goto free_newinfo;
1483
1484        *pinfo = newinfo;
1485        *pentry0 = entry1;
1486        xt_free_table_info(info);
1487        return 0;
1488
1489free_newinfo:
1490        xt_free_table_info(newinfo);
1491        return ret;
1492out_unlock:
1493        xt_compat_flush_offsets(AF_INET);
1494        xt_compat_unlock(AF_INET);
1495        xt_entry_foreach(iter0, entry0, compatr->size) {
1496                if (j-- == 0)
1497                        break;
1498                compat_release_entry(iter0);
1499        }
1500        return ret;
1501}
1502
1503static int
1504compat_do_replace(struct net *net, void __user *user, unsigned int len)
1505{
1506        int ret;
1507        struct compat_ipt_replace tmp;
1508        struct xt_table_info *newinfo;
1509        void *loc_cpu_entry;
1510        struct ipt_entry *iter;
1511
1512        if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1513                return -EFAULT;
1514
1515        /* overflow check */
1516        if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1517                return -ENOMEM;
1518        if (tmp.num_counters == 0)
1519                return -EINVAL;
1520
1521        tmp.name[sizeof(tmp.name)-1] = 0;
1522
1523        newinfo = xt_alloc_table_info(tmp.size);
1524        if (!newinfo)
1525                return -ENOMEM;
1526
1527        loc_cpu_entry = newinfo->entries;
1528        if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1529                           tmp.size) != 0) {
1530                ret = -EFAULT;
1531                goto free_newinfo;
1532        }
1533
1534        ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp);
1535        if (ret != 0)
1536                goto free_newinfo;
1537
1538        ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1539                           tmp.num_counters, compat_ptr(tmp.counters));
1540        if (ret)
1541                goto free_newinfo_untrans;
1542        return 0;
1543
1544 free_newinfo_untrans:
1545        xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1546                cleanup_entry(iter, net);
1547 free_newinfo:
1548        xt_free_table_info(newinfo);
1549        return ret;
1550}
1551
1552static int
1553compat_do_ipt_set_ctl(struct sock *sk,  int cmd, void __user *user,
1554                      unsigned int len)
1555{
1556        int ret;
1557
1558        if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1559                return -EPERM;
1560
1561        switch (cmd) {
1562        case IPT_SO_SET_REPLACE:
1563                ret = compat_do_replace(sock_net(sk), user, len);
1564                break;
1565
1566        case IPT_SO_SET_ADD_COUNTERS:
1567                ret = do_add_counters(sock_net(sk), user, len, 1);
1568                break;
1569
1570        default:
1571                ret = -EINVAL;
1572        }
1573
1574        return ret;
1575}
1576
1577struct compat_ipt_get_entries {
1578        char name[XT_TABLE_MAXNAMELEN];
1579        compat_uint_t size;
1580        struct compat_ipt_entry entrytable[0];
1581};
1582
1583static int
1584compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1585                            void __user *userptr)
1586{
1587        struct xt_counters *counters;
1588        const struct xt_table_info *private = table->private;
1589        void __user *pos;
1590        unsigned int size;
1591        int ret = 0;
1592        unsigned int i = 0;
1593        struct ipt_entry *iter;
1594
1595        counters = alloc_counters(table);
1596        if (IS_ERR(counters))
1597                return PTR_ERR(counters);
1598
1599        pos = userptr;
1600        size = total_size;
1601        xt_entry_foreach(iter, private->entries, total_size) {
1602                ret = compat_copy_entry_to_user(iter, &pos,
1603                                                &size, counters, i++);
1604                if (ret != 0)
1605                        break;
1606        }
1607
1608        vfree(counters);
1609        return ret;
1610}
1611
1612static int
1613compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1614                   int *len)
1615{
1616        int ret;
1617        struct compat_ipt_get_entries get;
1618        struct xt_table *t;
1619
1620        if (*len < sizeof(get))
1621                return -EINVAL;
1622
1623        if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1624                return -EFAULT;
1625
1626        if (*len != sizeof(struct compat_ipt_get_entries) + get.size)
1627                return -EINVAL;
1628
1629        get.name[sizeof(get.name) - 1] = '\0';
1630
1631        xt_compat_lock(AF_INET);
1632        t = xt_find_table_lock(net, AF_INET, get.name);
1633        if (!IS_ERR_OR_NULL(t)) {
1634                const struct xt_table_info *private = t->private;
1635                struct xt_table_info info;
1636                ret = compat_table_info(private, &info);
1637                if (!ret && get.size == info.size)
1638                        ret = compat_copy_entries_to_user(private->size,
1639                                                          t, uptr->entrytable);
1640                else if (!ret)
1641                        ret = -EAGAIN;
1642
1643                xt_compat_flush_offsets(AF_INET);
1644                module_put(t->me);
1645                xt_table_unlock(t);
1646        } else
1647                ret = t ? PTR_ERR(t) : -ENOENT;
1648
1649        xt_compat_unlock(AF_INET);
1650        return ret;
1651}
1652
1653static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
1654
1655static int
1656compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1657{
1658        int ret;
1659
1660        if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1661                return -EPERM;
1662
1663        switch (cmd) {
1664        case IPT_SO_GET_INFO:
1665                ret = get_info(sock_net(sk), user, len, 1);
1666                break;
1667        case IPT_SO_GET_ENTRIES:
1668                ret = compat_get_entries(sock_net(sk), user, len);
1669                break;
1670        default:
1671                ret = do_ipt_get_ctl(sk, cmd, user, len);
1672        }
1673        return ret;
1674}
1675#endif
1676
1677static int
1678do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1679{
1680        int ret;
1681
1682        if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1683                return -EPERM;
1684
1685        switch (cmd) {
1686        case IPT_SO_SET_REPLACE:
1687                ret = do_replace(sock_net(sk), user, len);
1688                break;
1689
1690        case IPT_SO_SET_ADD_COUNTERS:
1691                ret = do_add_counters(sock_net(sk), user, len, 0);
1692                break;
1693
1694        default:
1695                ret = -EINVAL;
1696        }
1697
1698        return ret;
1699}
1700
1701static int
1702do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1703{
1704        int ret;
1705
1706        if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1707                return -EPERM;
1708
1709        switch (cmd) {
1710        case IPT_SO_GET_INFO:
1711                ret = get_info(sock_net(sk), user, len, 0);
1712                break;
1713
1714        case IPT_SO_GET_ENTRIES:
1715                ret = get_entries(sock_net(sk), user, len);
1716                break;
1717
1718        case IPT_SO_GET_REVISION_MATCH:
1719        case IPT_SO_GET_REVISION_TARGET: {
1720                struct xt_get_revision rev;
1721                int target;
1722
1723                if (*len != sizeof(rev)) {
1724                        ret = -EINVAL;
1725                        break;
1726                }
1727                if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1728                        ret = -EFAULT;
1729                        break;
1730                }
1731                rev.name[sizeof(rev.name)-1] = 0;
1732
1733                if (cmd == IPT_SO_GET_REVISION_TARGET)
1734                        target = 1;
1735                else
1736                        target = 0;
1737
1738                try_then_request_module(xt_find_revision(AF_INET, rev.name,
1739                                                         rev.revision,
1740                                                         target, &ret),
1741                                        "ipt_%s", rev.name);
1742                break;
1743        }
1744
1745        default:
1746                ret = -EINVAL;
1747        }
1748
1749        return ret;
1750}
1751
1752static void __ipt_unregister_table(struct net *net, struct xt_table *table)
1753{
1754        struct xt_table_info *private;
1755        void *loc_cpu_entry;
1756        struct module *table_owner = table->me;
1757        struct ipt_entry *iter;
1758
1759        private = xt_unregister_table(table);
1760
1761        /* Decrease module usage counts and free resources */
1762        loc_cpu_entry = private->entries;
1763        xt_entry_foreach(iter, loc_cpu_entry, private->size)
1764                cleanup_entry(iter, net);
1765        if (private->number > private->initial_entries)
1766                module_put(table_owner);
1767        xt_free_table_info(private);
1768}
1769
1770int ipt_register_table(struct net *net, const struct xt_table *table,
1771                       const struct ipt_replace *repl,
1772                       const struct nf_hook_ops *ops, struct xt_table **res)
1773{
1774        int ret;
1775        struct xt_table_info *newinfo;
1776        struct xt_table_info bootstrap = {0};
1777        void *loc_cpu_entry;
1778        struct xt_table *new_table;
1779
1780        newinfo = xt_alloc_table_info(repl->size);
1781        if (!newinfo)
1782                return -ENOMEM;
1783
1784        loc_cpu_entry = newinfo->entries;
1785        memcpy(loc_cpu_entry, repl->entries, repl->size);
1786
1787        ret = translate_table(net, newinfo, loc_cpu_entry, repl);
1788        if (ret != 0)
1789                goto out_free;
1790
1791        new_table = xt_register_table(net, table, &bootstrap, newinfo);
1792        if (IS_ERR(new_table)) {
1793                ret = PTR_ERR(new_table);
1794                goto out_free;
1795        }
1796
1797        /* set res now, will see skbs right after nf_register_net_hooks */
1798        WRITE_ONCE(*res, new_table);
1799
1800        ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
1801        if (ret != 0) {
1802                __ipt_unregister_table(net, new_table);
1803                *res = NULL;
1804        }
1805
1806        return ret;
1807
1808out_free:
1809        xt_free_table_info(newinfo);
1810        return ret;
1811}
1812
1813void ipt_unregister_table(struct net *net, struct xt_table *table,
1814                          const struct nf_hook_ops *ops)
1815{
1816        nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
1817        __ipt_unregister_table(net, table);
1818}
1819
1820/* Returns 1 if the type and code is matched by the range, 0 otherwise */
1821static inline bool
1822icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1823                     u_int8_t type, u_int8_t code,
1824                     bool invert)
1825{
1826        return ((test_type == 0xFF) ||
1827                (type == test_type && code >= min_code && code <= max_code))
1828                ^ invert;
1829}
1830
1831static bool
1832icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
1833{
1834        const struct icmphdr *ic;
1835        struct icmphdr _icmph;
1836        const struct ipt_icmp *icmpinfo = par->matchinfo;
1837
1838        /* Must not be a fragment. */
1839        if (par->fragoff != 0)
1840                return false;
1841
1842        ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
1843        if (ic == NULL) {
1844                /* We've been asked to examine this packet, and we
1845                 * can't.  Hence, no choice but to drop.
1846                 */
1847                par->hotdrop = true;
1848                return false;
1849        }
1850
1851        return icmp_type_code_match(icmpinfo->type,
1852                                    icmpinfo->code[0],
1853                                    icmpinfo->code[1],
1854                                    ic->type, ic->code,
1855                                    !!(icmpinfo->invflags&IPT_ICMP_INV));
1856}
1857
1858static int icmp_checkentry(const struct xt_mtchk_param *par)
1859{
1860        const struct ipt_icmp *icmpinfo = par->matchinfo;
1861
1862        /* Must specify no unknown invflags */
1863        return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0;
1864}
1865
1866static struct xt_target ipt_builtin_tg[] __read_mostly = {
1867        {
1868                .name             = XT_STANDARD_TARGET,
1869                .targetsize       = sizeof(int),
1870                .family           = NFPROTO_IPV4,
1871#ifdef CONFIG_COMPAT
1872                .compatsize       = sizeof(compat_int_t),
1873                .compat_from_user = compat_standard_from_user,
1874                .compat_to_user   = compat_standard_to_user,
1875#endif
1876        },
1877        {
1878                .name             = XT_ERROR_TARGET,
1879                .target           = ipt_error,
1880                .targetsize       = XT_FUNCTION_MAXNAMELEN,
1881                .family           = NFPROTO_IPV4,
1882        },
1883};
1884
1885static struct nf_sockopt_ops ipt_sockopts = {
1886        .pf             = PF_INET,
1887        .set_optmin     = IPT_BASE_CTL,
1888        .set_optmax     = IPT_SO_SET_MAX+1,
1889        .set            = do_ipt_set_ctl,
1890#ifdef CONFIG_COMPAT
1891        .compat_set     = compat_do_ipt_set_ctl,
1892#endif
1893        .get_optmin     = IPT_BASE_CTL,
1894        .get_optmax     = IPT_SO_GET_MAX+1,
1895        .get            = do_ipt_get_ctl,
1896#ifdef CONFIG_COMPAT
1897        .compat_get     = compat_do_ipt_get_ctl,
1898#endif
1899        .owner          = THIS_MODULE,
1900};
1901
1902static struct xt_match ipt_builtin_mt[] __read_mostly = {
1903        {
1904                .name       = "icmp",
1905                .match      = icmp_match,
1906                .matchsize  = sizeof(struct ipt_icmp),
1907                .checkentry = icmp_checkentry,
1908                .proto      = IPPROTO_ICMP,
1909                .family     = NFPROTO_IPV4,
1910        },
1911};
1912
1913static int __net_init ip_tables_net_init(struct net *net)
1914{
1915        return xt_proto_init(net, NFPROTO_IPV4);
1916}
1917
1918static void __net_exit ip_tables_net_exit(struct net *net)
1919{
1920        xt_proto_fini(net, NFPROTO_IPV4);
1921}
1922
1923static struct pernet_operations ip_tables_net_ops = {
1924        .init = ip_tables_net_init,
1925        .exit = ip_tables_net_exit,
1926};
1927
1928static int __init ip_tables_init(void)
1929{
1930        int ret;
1931
1932        ret = register_pernet_subsys(&ip_tables_net_ops);
1933        if (ret < 0)
1934                goto err1;
1935
1936        /* No one else will be downing sem now, so we won't sleep */
1937        ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
1938        if (ret < 0)
1939                goto err2;
1940        ret = xt_register_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
1941        if (ret < 0)
1942                goto err4;
1943
1944        /* Register setsockopt */
1945        ret = nf_register_sockopt(&ipt_sockopts);
1946        if (ret < 0)
1947                goto err5;
1948
1949        pr_info("(C) 2000-2006 Netfilter Core Team\n");
1950        return 0;
1951
1952err5:
1953        xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
1954err4:
1955        xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
1956err2:
1957        unregister_pernet_subsys(&ip_tables_net_ops);
1958err1:
1959        return ret;
1960}
1961
1962static void __exit ip_tables_fini(void)
1963{
1964        nf_unregister_sockopt(&ipt_sockopts);
1965
1966        xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
1967        xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
1968        unregister_pernet_subsys(&ip_tables_net_ops);
1969}
1970
1971EXPORT_SYMBOL(ipt_register_table);
1972EXPORT_SYMBOL(ipt_unregister_table);
1973EXPORT_SYMBOL(ipt_do_table);
1974module_init(ip_tables_init);
1975module_exit(ip_tables_fini);
1976