linux/net/core/filter.c
<<
>>
Prefs
   1/*
   2 * Linux Socket Filter - Kernel level socket filtering
   3 *
   4 * Author:
   5 *     Jay Schulist <jschlst@samba.org>
   6 *
   7 * Based on the design of:
   8 *     - The Berkeley Packet Filter
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public License
  12 * as published by the Free Software Foundation; either version
  13 * 2 of the License, or (at your option) any later version.
  14 *
  15 * Andi Kleen - Fix a few bad bugs and races.
  16 * Kris Katterjohn - Added many additional checks in sk_chk_filter()
  17 */
  18
  19#include <linux/module.h>
  20#include <linux/types.h>
  21#include <linux/mm.h>
  22#include <linux/fcntl.h>
  23#include <linux/socket.h>
  24#include <linux/in.h>
  25#include <linux/inet.h>
  26#include <linux/netdevice.h>
  27#include <linux/if_packet.h>
  28#include <net/ip.h>
  29#include <net/protocol.h>
  30#include <net/netlink.h>
  31#include <linux/skbuff.h>
  32#include <net/sock.h>
  33#include <linux/errno.h>
  34#include <linux/timer.h>
  35#include <asm/system.h>
  36#include <asm/uaccess.h>
  37#include <asm/unaligned.h>
  38#include <linux/filter.h>
  39
  40/* No hurry in this branch */
  41static void *__load_pointer(struct sk_buff *skb, int k)
  42{
  43        u8 *ptr = NULL;
  44
  45        if (k >= SKF_NET_OFF)
  46                ptr = skb_network_header(skb) + k - SKF_NET_OFF;
  47        else if (k >= SKF_LL_OFF)
  48                ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
  49
  50        if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
  51                return ptr;
  52        return NULL;
  53}
  54
  55static inline void *load_pointer(struct sk_buff *skb, int k,
  56                                 unsigned int size, void *buffer)
  57{
  58        if (k >= 0)
  59                return skb_header_pointer(skb, k, size, buffer);
  60        else {
  61                if (k >= SKF_AD_OFF)
  62                        return NULL;
  63                return __load_pointer(skb, k);
  64        }
  65}
  66
  67/**
  68 *      sk_filter - run a packet through a socket filter
  69 *      @sk: sock associated with &sk_buff
  70 *      @skb: buffer to filter
  71 *
  72 * Run the filter code and then cut skb->data to correct size returned by
  73 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
  74 * than pkt_len we keep whole skb->data. This is the socket level
  75 * wrapper to sk_run_filter. It returns 0 if the packet should
  76 * be accepted or -EPERM if the packet should be tossed.
  77 *
  78 */
  79int sk_filter(struct sock *sk, struct sk_buff *skb)
  80{
  81        int err;
  82        struct sk_filter *filter;
  83
  84        err = security_sock_rcv_skb(sk, skb);
  85        if (err)
  86                return err;
  87
  88        rcu_read_lock_bh();
  89        filter = rcu_dereference(sk->sk_filter);
  90        if (filter) {
  91                unsigned int pkt_len = sk_run_filter(skb, filter->insns,
  92                                filter->len);
  93                err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
  94        }
  95        rcu_read_unlock_bh();
  96
  97        return err;
  98}
  99EXPORT_SYMBOL(sk_filter);
 100
 101/**
 102 *      sk_run_filter - run a filter on a socket
 103 *      @skb: buffer to run the filter on
 104 *      @filter: filter to apply
 105 *      @flen: length of filter
 106 *
 107 * Decode and apply filter instructions to the skb->data.
 108 * Return length to keep, 0 for none. skb is the data we are
 109 * filtering, filter is the array of filter instructions, and
 110 * len is the number of filter blocks in the array.
 111 */
 112unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
 113{
 114        struct sock_filter *fentry;     /* We walk down these */
 115        void *ptr;
 116        u32 A = 0;                      /* Accumulator */
 117        u32 X = 0;                      /* Index Register */
 118        u32 mem[BPF_MEMWORDS];          /* Scratch Memory Store */
 119        u32 tmp;
 120        int k;
 121        int pc;
 122
 123        /*
 124         * Process array of filter instructions.
 125         */
 126        for (pc = 0; pc < flen; pc++) {
 127                fentry = &filter[pc];
 128
 129                switch (fentry->code) {
 130                case BPF_ALU|BPF_ADD|BPF_X:
 131                        A += X;
 132                        continue;
 133                case BPF_ALU|BPF_ADD|BPF_K:
 134                        A += fentry->k;
 135                        continue;
 136                case BPF_ALU|BPF_SUB|BPF_X:
 137                        A -= X;
 138                        continue;
 139                case BPF_ALU|BPF_SUB|BPF_K:
 140                        A -= fentry->k;
 141                        continue;
 142                case BPF_ALU|BPF_MUL|BPF_X:
 143                        A *= X;
 144                        continue;
 145                case BPF_ALU|BPF_MUL|BPF_K:
 146                        A *= fentry->k;
 147                        continue;
 148                case BPF_ALU|BPF_DIV|BPF_X:
 149                        if (X == 0)
 150                                return 0;
 151                        A /= X;
 152                        continue;
 153                case BPF_ALU|BPF_DIV|BPF_K:
 154                        A /= fentry->k;
 155                        continue;
 156                case BPF_ALU|BPF_AND|BPF_X:
 157                        A &= X;
 158                        continue;
 159                case BPF_ALU|BPF_AND|BPF_K:
 160                        A &= fentry->k;
 161                        continue;
 162                case BPF_ALU|BPF_OR|BPF_X:
 163                        A |= X;
 164                        continue;
 165                case BPF_ALU|BPF_OR|BPF_K:
 166                        A |= fentry->k;
 167                        continue;
 168                case BPF_ALU|BPF_LSH|BPF_X:
 169                        A <<= X;
 170                        continue;
 171                case BPF_ALU|BPF_LSH|BPF_K:
 172                        A <<= fentry->k;
 173                        continue;
 174                case BPF_ALU|BPF_RSH|BPF_X:
 175                        A >>= X;
 176                        continue;
 177                case BPF_ALU|BPF_RSH|BPF_K:
 178                        A >>= fentry->k;
 179                        continue;
 180                case BPF_ALU|BPF_NEG:
 181                        A = -A;
 182                        continue;
 183                case BPF_JMP|BPF_JA:
 184                        pc += fentry->k;
 185                        continue;
 186                case BPF_JMP|BPF_JGT|BPF_K:
 187                        pc += (A > fentry->k) ? fentry->jt : fentry->jf;
 188                        continue;
 189                case BPF_JMP|BPF_JGE|BPF_K:
 190                        pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
 191                        continue;
 192                case BPF_JMP|BPF_JEQ|BPF_K:
 193                        pc += (A == fentry->k) ? fentry->jt : fentry->jf;
 194                        continue;
 195                case BPF_JMP|BPF_JSET|BPF_K:
 196                        pc += (A & fentry->k) ? fentry->jt : fentry->jf;
 197                        continue;
 198                case BPF_JMP|BPF_JGT|BPF_X:
 199                        pc += (A > X) ? fentry->jt : fentry->jf;
 200                        continue;
 201                case BPF_JMP|BPF_JGE|BPF_X:
 202                        pc += (A >= X) ? fentry->jt : fentry->jf;
 203                        continue;
 204                case BPF_JMP|BPF_JEQ|BPF_X:
 205                        pc += (A == X) ? fentry->jt : fentry->jf;
 206                        continue;
 207                case BPF_JMP|BPF_JSET|BPF_X:
 208                        pc += (A & X) ? fentry->jt : fentry->jf;
 209                        continue;
 210                case BPF_LD|BPF_W|BPF_ABS:
 211                        k = fentry->k;
 212load_w:
 213                        ptr = load_pointer(skb, k, 4, &tmp);
 214                        if (ptr != NULL) {
 215                                A = get_unaligned_be32(ptr);
 216                                continue;
 217                        }
 218                        break;
 219                case BPF_LD|BPF_H|BPF_ABS:
 220                        k = fentry->k;
 221load_h:
 222                        ptr = load_pointer(skb, k, 2, &tmp);
 223                        if (ptr != NULL) {
 224                                A = get_unaligned_be16(ptr);
 225                                continue;
 226                        }
 227                        break;
 228                case BPF_LD|BPF_B|BPF_ABS:
 229                        k = fentry->k;
 230load_b:
 231                        ptr = load_pointer(skb, k, 1, &tmp);
 232                        if (ptr != NULL) {
 233                                A = *(u8 *)ptr;
 234                                continue;
 235                        }
 236                        break;
 237                case BPF_LD|BPF_W|BPF_LEN:
 238                        A = skb->len;
 239                        continue;
 240                case BPF_LDX|BPF_W|BPF_LEN:
 241                        X = skb->len;
 242                        continue;
 243                case BPF_LD|BPF_W|BPF_IND:
 244                        k = X + fentry->k;
 245                        goto load_w;
 246                case BPF_LD|BPF_H|BPF_IND:
 247                        k = X + fentry->k;
 248                        goto load_h;
 249                case BPF_LD|BPF_B|BPF_IND:
 250                        k = X + fentry->k;
 251                        goto load_b;
 252                case BPF_LDX|BPF_B|BPF_MSH:
 253                        ptr = load_pointer(skb, fentry->k, 1, &tmp);
 254                        if (ptr != NULL) {
 255                                X = (*(u8 *)ptr & 0xf) << 2;
 256                                continue;
 257                        }
 258                        return 0;
 259                case BPF_LD|BPF_IMM:
 260                        A = fentry->k;
 261                        continue;
 262                case BPF_LDX|BPF_IMM:
 263                        X = fentry->k;
 264                        continue;
 265                case BPF_LD|BPF_MEM:
 266                        A = mem[fentry->k];
 267                        continue;
 268                case BPF_LDX|BPF_MEM:
 269                        X = mem[fentry->k];
 270                        continue;
 271                case BPF_MISC|BPF_TAX:
 272                        X = A;
 273                        continue;
 274                case BPF_MISC|BPF_TXA:
 275                        A = X;
 276                        continue;
 277                case BPF_RET|BPF_K:
 278                        return fentry->k;
 279                case BPF_RET|BPF_A:
 280                        return A;
 281                case BPF_ST:
 282                        mem[fentry->k] = A;
 283                        continue;
 284                case BPF_STX:
 285                        mem[fentry->k] = X;
 286                        continue;
 287                default:
 288                        WARN_ON(1);
 289                        return 0;
 290                }
 291
 292                /*
 293                 * Handle ancillary data, which are impossible
 294                 * (or very difficult) to get parsing packet contents.
 295                 */
 296                switch (k-SKF_AD_OFF) {
 297                case SKF_AD_PROTOCOL:
 298                        A = ntohs(skb->protocol);
 299                        continue;
 300                case SKF_AD_PKTTYPE:
 301                        A = skb->pkt_type;
 302                        continue;
 303                case SKF_AD_IFINDEX:
 304                        A = skb->dev->ifindex;
 305                        continue;
 306                case SKF_AD_NLATTR: {
 307                        struct nlattr *nla;
 308
 309                        if (skb_is_nonlinear(skb))
 310                                return 0;
 311                        if (A > skb->len - sizeof(struct nlattr))
 312                                return 0;
 313
 314                        nla = nla_find((struct nlattr *)&skb->data[A],
 315                                       skb->len - A, X);
 316                        if (nla)
 317                                A = (void *)nla - (void *)skb->data;
 318                        else
 319                                A = 0;
 320                        continue;
 321                }
 322                case SKF_AD_NLATTR_NEST: {
 323                        struct nlattr *nla;
 324
 325                        if (skb_is_nonlinear(skb))
 326                                return 0;
 327                        if (A > skb->len - sizeof(struct nlattr))
 328                                return 0;
 329
 330                        nla = (struct nlattr *)&skb->data[A];
 331                        if (nla->nla_len > A - skb->len)
 332                                return 0;
 333
 334                        nla = nla_find_nested(nla, X);
 335                        if (nla)
 336                                A = (void *)nla - (void *)skb->data;
 337                        else
 338                                A = 0;
 339                        continue;
 340                }
 341                default:
 342                        return 0;
 343                }
 344        }
 345
 346        return 0;
 347}
 348EXPORT_SYMBOL(sk_run_filter);
 349
 350/**
 351 *      sk_chk_filter - verify socket filter code
 352 *      @filter: filter to verify
 353 *      @flen: length of filter
 354 *
 355 * Check the user's filter code. If we let some ugly
 356 * filter code slip through kaboom! The filter must contain
 357 * no references or jumps that are out of range, no illegal
 358 * instructions, and must end with a RET instruction.
 359 *
 360 * All jumps are forward as they are not signed.
 361 *
 362 * Returns 0 if the rule set is legal or -EINVAL if not.
 363 */
 364int sk_chk_filter(struct sock_filter *filter, int flen)
 365{
 366        struct sock_filter *ftest;
 367        int pc;
 368
 369        if (flen == 0 || flen > BPF_MAXINSNS)
 370                return -EINVAL;
 371
 372        /* check the filter code now */
 373        for (pc = 0; pc < flen; pc++) {
 374                ftest = &filter[pc];
 375
 376                /* Only allow valid instructions */
 377                switch (ftest->code) {
 378                case BPF_ALU|BPF_ADD|BPF_K:
 379                case BPF_ALU|BPF_ADD|BPF_X:
 380                case BPF_ALU|BPF_SUB|BPF_K:
 381                case BPF_ALU|BPF_SUB|BPF_X:
 382                case BPF_ALU|BPF_MUL|BPF_K:
 383                case BPF_ALU|BPF_MUL|BPF_X:
 384                case BPF_ALU|BPF_DIV|BPF_X:
 385                case BPF_ALU|BPF_AND|BPF_K:
 386                case BPF_ALU|BPF_AND|BPF_X:
 387                case BPF_ALU|BPF_OR|BPF_K:
 388                case BPF_ALU|BPF_OR|BPF_X:
 389                case BPF_ALU|BPF_LSH|BPF_K:
 390                case BPF_ALU|BPF_LSH|BPF_X:
 391                case BPF_ALU|BPF_RSH|BPF_K:
 392                case BPF_ALU|BPF_RSH|BPF_X:
 393                case BPF_ALU|BPF_NEG:
 394                case BPF_LD|BPF_W|BPF_ABS:
 395                case BPF_LD|BPF_H|BPF_ABS:
 396                case BPF_LD|BPF_B|BPF_ABS:
 397                case BPF_LD|BPF_W|BPF_LEN:
 398                case BPF_LD|BPF_W|BPF_IND:
 399                case BPF_LD|BPF_H|BPF_IND:
 400                case BPF_LD|BPF_B|BPF_IND:
 401                case BPF_LD|BPF_IMM:
 402                case BPF_LDX|BPF_W|BPF_LEN:
 403                case BPF_LDX|BPF_B|BPF_MSH:
 404                case BPF_LDX|BPF_IMM:
 405                case BPF_MISC|BPF_TAX:
 406                case BPF_MISC|BPF_TXA:
 407                case BPF_RET|BPF_K:
 408                case BPF_RET|BPF_A:
 409                        break;
 410
 411                /* Some instructions need special checks */
 412
 413                case BPF_ALU|BPF_DIV|BPF_K:
 414                        /* check for division by zero */
 415                        if (ftest->k == 0)
 416                                return -EINVAL;
 417                        break;
 418
 419                case BPF_LD|BPF_MEM:
 420                case BPF_LDX|BPF_MEM:
 421                case BPF_ST:
 422                case BPF_STX:
 423                        /* check for invalid memory addresses */
 424                        if (ftest->k >= BPF_MEMWORDS)
 425                                return -EINVAL;
 426                        break;
 427
 428                case BPF_JMP|BPF_JA:
 429                        /*
 430                         * Note, the large ftest->k might cause loops.
 431                         * Compare this with conditional jumps below,
 432                         * where offsets are limited. --ANK (981016)
 433                         */
 434                        if (ftest->k >= (unsigned)(flen-pc-1))
 435                                return -EINVAL;
 436                        break;
 437
 438                case BPF_JMP|BPF_JEQ|BPF_K:
 439                case BPF_JMP|BPF_JEQ|BPF_X:
 440                case BPF_JMP|BPF_JGE|BPF_K:
 441                case BPF_JMP|BPF_JGE|BPF_X:
 442                case BPF_JMP|BPF_JGT|BPF_K:
 443                case BPF_JMP|BPF_JGT|BPF_X:
 444                case BPF_JMP|BPF_JSET|BPF_K:
 445                case BPF_JMP|BPF_JSET|BPF_X:
 446                        /* for conditionals both must be safe */
 447                        if (pc + ftest->jt + 1 >= flen ||
 448                            pc + ftest->jf + 1 >= flen)
 449                                return -EINVAL;
 450                        break;
 451
 452                default:
 453                        return -EINVAL;
 454                }
 455        }
 456
 457        return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
 458}
 459EXPORT_SYMBOL(sk_chk_filter);
 460
 461/**
 462 *      sk_filter_rcu_release: Release a socket filter by rcu_head
 463 *      @rcu: rcu_head that contains the sk_filter to free
 464 */
 465static void sk_filter_rcu_release(struct rcu_head *rcu)
 466{
 467        struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
 468
 469        sk_filter_release(fp);
 470}
 471
 472static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
 473{
 474        unsigned int size = sk_filter_len(fp);
 475
 476        atomic_sub(size, &sk->sk_omem_alloc);
 477        call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
 478}
 479
 480/**
 481 *      sk_attach_filter - attach a socket filter
 482 *      @fprog: the filter program
 483 *      @sk: the socket to use
 484 *
 485 * Attach the user's filter code. We first run some sanity checks on
 486 * it to make sure it does not explode on us later. If an error
 487 * occurs or there is insufficient memory for the filter a negative
 488 * errno code is returned. On success the return is zero.
 489 */
 490int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 491{
 492        struct sk_filter *fp, *old_fp;
 493        unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
 494        int err;
 495
 496        /* Make sure new filter is there and in the right amounts. */
 497        if (fprog->filter == NULL)
 498                return -EINVAL;
 499
 500        fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
 501        if (!fp)
 502                return -ENOMEM;
 503        if (copy_from_user(fp->insns, fprog->filter, fsize)) {
 504                sock_kfree_s(sk, fp, fsize+sizeof(*fp));
 505                return -EFAULT;
 506        }
 507
 508        atomic_set(&fp->refcnt, 1);
 509        fp->len = fprog->len;
 510
 511        err = sk_chk_filter(fp->insns, fp->len);
 512        if (err) {
 513                sk_filter_uncharge(sk, fp);
 514                return err;
 515        }
 516
 517        rcu_read_lock_bh();
 518        old_fp = rcu_dereference(sk->sk_filter);
 519        rcu_assign_pointer(sk->sk_filter, fp);
 520        rcu_read_unlock_bh();
 521
 522        if (old_fp)
 523                sk_filter_delayed_uncharge(sk, old_fp);
 524        return 0;
 525}
 526
 527int sk_detach_filter(struct sock *sk)
 528{
 529        int ret = -ENOENT;
 530        struct sk_filter *filter;
 531
 532        rcu_read_lock_bh();
 533        filter = rcu_dereference(sk->sk_filter);
 534        if (filter) {
 535                rcu_assign_pointer(sk->sk_filter, NULL);
 536                sk_filter_delayed_uncharge(sk, filter);
 537                ret = 0;
 538        }
 539        rcu_read_unlock_bh();
 540        return ret;
 541}
 542