linux/net/dccp/proto.c
<<
>>
Prefs
   1/*
   2 *  net/dccp/proto.c
   3 *
   4 *  An implementation of the DCCP protocol
   5 *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
   6 *
   7 *      This program is free software; you can redistribute it and/or modify it
   8 *      under the terms of the GNU General Public License version 2 as
   9 *      published by the Free Software Foundation.
  10 */
  11
  12#include <linux/dccp.h>
  13#include <linux/module.h>
  14#include <linux/types.h>
  15#include <linux/sched.h>
  16#include <linux/kernel.h>
  17#include <linux/skbuff.h>
  18#include <linux/netdevice.h>
  19#include <linux/in.h>
  20#include <linux/if_arp.h>
  21#include <linux/init.h>
  22#include <linux/random.h>
  23#include <net/checksum.h>
  24
  25#include <net/inet_sock.h>
  26#include <net/sock.h>
  27#include <net/xfrm.h>
  28
  29#include <asm/ioctls.h>
  30#include <asm/semaphore.h>
  31#include <linux/spinlock.h>
  32#include <linux/timer.h>
  33#include <linux/delay.h>
  34#include <linux/poll.h>
  35
  36#include "ccid.h"
  37#include "dccp.h"
  38#include "feat.h"
  39
  40DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
  41
  42EXPORT_SYMBOL_GPL(dccp_statistics);
  43
  44atomic_t dccp_orphan_count = ATOMIC_INIT(0);
  45
  46EXPORT_SYMBOL_GPL(dccp_orphan_count);
  47
  48struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
  49        .lhash_lock     = RW_LOCK_UNLOCKED,
  50        .lhash_users    = ATOMIC_INIT(0),
  51        .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
  52};
  53
  54EXPORT_SYMBOL_GPL(dccp_hashinfo);
  55
  56/* the maximum queue length for tx in packets. 0 is no limit */
  57int sysctl_dccp_tx_qlen __read_mostly = 5;
  58
  59void dccp_set_state(struct sock *sk, const int state)
  60{
  61        const int oldstate = sk->sk_state;
  62
  63        dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
  64                      dccp_role(sk), sk,
  65                      dccp_state_name(oldstate), dccp_state_name(state));
  66        WARN_ON(state == oldstate);
  67
  68        switch (state) {
  69        case DCCP_OPEN:
  70                if (oldstate != DCCP_OPEN)
  71                        DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
  72                break;
  73
  74        case DCCP_CLOSED:
  75                if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
  76                        DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
  77
  78                sk->sk_prot->unhash(sk);
  79                if (inet_csk(sk)->icsk_bind_hash != NULL &&
  80                    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
  81                        inet_put_port(&dccp_hashinfo, sk);
  82                /* fall through */
  83        default:
  84                if (oldstate == DCCP_OPEN)
  85                        DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
  86        }
  87
  88        /* Change state AFTER socket is unhashed to avoid closed
  89         * socket sitting in hash tables.
  90         */
  91        sk->sk_state = state;
  92}
  93
  94EXPORT_SYMBOL_GPL(dccp_set_state);
  95
  96void dccp_done(struct sock *sk)
  97{
  98        dccp_set_state(sk, DCCP_CLOSED);
  99        dccp_clear_xmit_timers(sk);
 100
 101        sk->sk_shutdown = SHUTDOWN_MASK;
 102
 103        if (!sock_flag(sk, SOCK_DEAD))
 104                sk->sk_state_change(sk);
 105        else
 106                inet_csk_destroy_sock(sk);
 107}
 108
 109EXPORT_SYMBOL_GPL(dccp_done);
 110
 111const char *dccp_packet_name(const int type)
 112{
 113        static const char *dccp_packet_names[] = {
 114                [DCCP_PKT_REQUEST]  = "REQUEST",
 115                [DCCP_PKT_RESPONSE] = "RESPONSE",
 116                [DCCP_PKT_DATA]     = "DATA",
 117                [DCCP_PKT_ACK]      = "ACK",
 118                [DCCP_PKT_DATAACK]  = "DATAACK",
 119                [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
 120                [DCCP_PKT_CLOSE]    = "CLOSE",
 121                [DCCP_PKT_RESET]    = "RESET",
 122                [DCCP_PKT_SYNC]     = "SYNC",
 123                [DCCP_PKT_SYNCACK]  = "SYNCACK",
 124        };
 125
 126        if (type >= DCCP_NR_PKT_TYPES)
 127                return "INVALID";
 128        else
 129                return dccp_packet_names[type];
 130}
 131
 132EXPORT_SYMBOL_GPL(dccp_packet_name);
 133
 134const char *dccp_state_name(const int state)
 135{
 136        static char *dccp_state_names[] = {
 137        [DCCP_OPEN]       = "OPEN",
 138        [DCCP_REQUESTING] = "REQUESTING",
 139        [DCCP_PARTOPEN]   = "PARTOPEN",
 140        [DCCP_LISTEN]     = "LISTEN",
 141        [DCCP_RESPOND]    = "RESPOND",
 142        [DCCP_CLOSING]    = "CLOSING",
 143        [DCCP_TIME_WAIT]  = "TIME_WAIT",
 144        [DCCP_CLOSED]     = "CLOSED",
 145        };
 146
 147        if (state >= DCCP_MAX_STATES)
 148                return "INVALID STATE!";
 149        else
 150                return dccp_state_names[state];
 151}
 152
 153EXPORT_SYMBOL_GPL(dccp_state_name);
 154
 155void dccp_hash(struct sock *sk)
 156{
 157        inet_hash(&dccp_hashinfo, sk);
 158}
 159
 160EXPORT_SYMBOL_GPL(dccp_hash);
 161
 162void dccp_unhash(struct sock *sk)
 163{
 164        inet_unhash(&dccp_hashinfo, sk);
 165}
 166
 167EXPORT_SYMBOL_GPL(dccp_unhash);
 168
 169int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 170{
 171        struct dccp_sock *dp = dccp_sk(sk);
 172        struct dccp_minisock *dmsk = dccp_msk(sk);
 173        struct inet_connection_sock *icsk = inet_csk(sk);
 174
 175        dccp_minisock_init(&dp->dccps_minisock);
 176
 177        /*
 178         * FIXME: We're hardcoding the CCID, and doing this at this point makes
 179         * the listening (master) sock get CCID control blocks, which is not
 180         * necessary, but for now, to not mess with the test userspace apps,
 181         * lets leave it here, later the real solution is to do this in a
 182         * setsockopt(CCIDs-I-want/accept). -acme
 183         */
 184        if (likely(ctl_sock_initialized)) {
 185                int rc = dccp_feat_init(dmsk);
 186
 187                if (rc)
 188                        return rc;
 189
 190                if (dmsk->dccpms_send_ack_vector) {
 191                        dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
 192                        if (dp->dccps_hc_rx_ackvec == NULL)
 193                                return -ENOMEM;
 194                }
 195                dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
 196                                                      sk, GFP_KERNEL);
 197                dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
 198                                                      sk, GFP_KERNEL);
 199                if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
 200                             dp->dccps_hc_tx_ccid == NULL)) {
 201                        ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
 202                        ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
 203                        if (dmsk->dccpms_send_ack_vector) {
 204                                dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
 205                                dp->dccps_hc_rx_ackvec = NULL;
 206                        }
 207                        dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
 208                        return -ENOMEM;
 209                }
 210        } else {
 211                /* control socket doesn't need feat nego */
 212                INIT_LIST_HEAD(&dmsk->dccpms_pending);
 213                INIT_LIST_HEAD(&dmsk->dccpms_conf);
 214        }
 215
 216        dccp_init_xmit_timers(sk);
 217        icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
 218        icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
 219        sk->sk_state            = DCCP_CLOSED;
 220        sk->sk_write_space      = dccp_write_space;
 221        icsk->icsk_sync_mss     = dccp_sync_mss;
 222        dp->dccps_mss_cache     = 536;
 223        dp->dccps_rate_last     = jiffies;
 224        dp->dccps_role          = DCCP_ROLE_UNDEFINED;
 225        dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
 226        dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
 227
 228        return 0;
 229}
 230
 231EXPORT_SYMBOL_GPL(dccp_init_sock);
 232
 233int dccp_destroy_sock(struct sock *sk)
 234{
 235        struct dccp_sock *dp = dccp_sk(sk);
 236        struct dccp_minisock *dmsk = dccp_msk(sk);
 237
 238        /*
 239         * DCCP doesn't use sk_write_queue, just sk_send_head
 240         * for retransmissions
 241         */
 242        if (sk->sk_send_head != NULL) {
 243                kfree_skb(sk->sk_send_head);
 244                sk->sk_send_head = NULL;
 245        }
 246
 247        /* Clean up a referenced DCCP bind bucket. */
 248        if (inet_csk(sk)->icsk_bind_hash != NULL)
 249                inet_put_port(&dccp_hashinfo, sk);
 250
 251        kfree(dp->dccps_service_list);
 252        dp->dccps_service_list = NULL;
 253
 254        if (dmsk->dccpms_send_ack_vector) {
 255                dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
 256                dp->dccps_hc_rx_ackvec = NULL;
 257        }
 258        ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
 259        ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
 260        dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
 261
 262        /* clean up feature negotiation state */
 263        dccp_feat_clean(dmsk);
 264
 265        return 0;
 266}
 267
 268EXPORT_SYMBOL_GPL(dccp_destroy_sock);
 269
 270static inline int dccp_listen_start(struct sock *sk, int backlog)
 271{
 272        struct dccp_sock *dp = dccp_sk(sk);
 273
 274        dp->dccps_role = DCCP_ROLE_LISTEN;
 275        return inet_csk_listen_start(sk, backlog);
 276}
 277
 278int dccp_disconnect(struct sock *sk, int flags)
 279{
 280        struct inet_connection_sock *icsk = inet_csk(sk);
 281        struct inet_sock *inet = inet_sk(sk);
 282        int err = 0;
 283        const int old_state = sk->sk_state;
 284
 285        if (old_state != DCCP_CLOSED)
 286                dccp_set_state(sk, DCCP_CLOSED);
 287
 288        /* ABORT function of RFC793 */
 289        if (old_state == DCCP_LISTEN) {
 290                inet_csk_listen_stop(sk);
 291        /* FIXME: do the active reset thing */
 292        } else if (old_state == DCCP_REQUESTING)
 293                sk->sk_err = ECONNRESET;
 294
 295        dccp_clear_xmit_timers(sk);
 296        __skb_queue_purge(&sk->sk_receive_queue);
 297        if (sk->sk_send_head != NULL) {
 298                __kfree_skb(sk->sk_send_head);
 299                sk->sk_send_head = NULL;
 300        }
 301
 302        inet->dport = 0;
 303
 304        if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
 305                inet_reset_saddr(sk);
 306
 307        sk->sk_shutdown = 0;
 308        sock_reset_flag(sk, SOCK_DONE);
 309
 310        icsk->icsk_backoff = 0;
 311        inet_csk_delack_init(sk);
 312        __sk_dst_reset(sk);
 313
 314        BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
 315
 316        sk->sk_error_report(sk);
 317        return err;
 318}
 319
 320EXPORT_SYMBOL_GPL(dccp_disconnect);
 321
 322/*
 323 *      Wait for a DCCP event.
 324 *
 325 *      Note that we don't need to lock the socket, as the upper poll layers
 326 *      take care of normal races (between the test and the event) and we don't
 327 *      go look at any of the socket buffers directly.
 328 */
 329unsigned int dccp_poll(struct file *file, struct socket *sock,
 330                       poll_table *wait)
 331{
 332        unsigned int mask;
 333        struct sock *sk = sock->sk;
 334
 335        poll_wait(file, sk->sk_sleep, wait);
 336        if (sk->sk_state == DCCP_LISTEN)
 337                return inet_csk_listen_poll(sk);
 338
 339        /* Socket is not locked. We are protected from async events
 340           by poll logic and correct handling of state changes
 341           made by another threads is impossible in any case.
 342         */
 343
 344        mask = 0;
 345        if (sk->sk_err)
 346                mask = POLLERR;
 347
 348        if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
 349                mask |= POLLHUP;
 350        if (sk->sk_shutdown & RCV_SHUTDOWN)
 351                mask |= POLLIN | POLLRDNORM | POLLRDHUP;
 352
 353        /* Connected? */
 354        if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
 355                if (atomic_read(&sk->sk_rmem_alloc) > 0)
 356                        mask |= POLLIN | POLLRDNORM;
 357
 358                if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
 359                        if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
 360                                mask |= POLLOUT | POLLWRNORM;
 361                        } else {  /* send SIGIO later */
 362                                set_bit(SOCK_ASYNC_NOSPACE,
 363                                        &sk->sk_socket->flags);
 364                                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 365
 366                                /* Race breaker. If space is freed after
 367                                 * wspace test but before the flags are set,
 368                                 * IO signal will be lost.
 369                                 */
 370                                if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
 371                                        mask |= POLLOUT | POLLWRNORM;
 372                        }
 373                }
 374        }
 375        return mask;
 376}
 377
 378EXPORT_SYMBOL_GPL(dccp_poll);
 379
 380int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 381{
 382        int rc = -ENOTCONN;
 383
 384        lock_sock(sk);
 385
 386        if (sk->sk_state == DCCP_LISTEN)
 387                goto out;
 388
 389        switch (cmd) {
 390        case SIOCINQ: {
 391                struct sk_buff *skb;
 392                unsigned long amount = 0;
 393
 394                skb = skb_peek(&sk->sk_receive_queue);
 395                if (skb != NULL) {
 396                        /*
 397                         * We will only return the amount of this packet since
 398                         * that is all that will be read.
 399                         */
 400                        amount = skb->len;
 401                }
 402                rc = put_user(amount, (int __user *)arg);
 403        }
 404                break;
 405        default:
 406                rc = -ENOIOCTLCMD;
 407                break;
 408        }
 409out:
 410        release_sock(sk);
 411        return rc;
 412}
 413
 414EXPORT_SYMBOL_GPL(dccp_ioctl);
 415
 416static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
 417                                   char __user *optval, int optlen)
 418{
 419        struct dccp_sock *dp = dccp_sk(sk);
 420        struct dccp_service_list *sl = NULL;
 421
 422        if (service == DCCP_SERVICE_INVALID_VALUE ||
 423            optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
 424                return -EINVAL;
 425
 426        if (optlen > sizeof(service)) {
 427                sl = kmalloc(optlen, GFP_KERNEL);
 428                if (sl == NULL)
 429                        return -ENOMEM;
 430
 431                sl->dccpsl_nr = optlen / sizeof(u32) - 1;
 432                if (copy_from_user(sl->dccpsl_list,
 433                                   optval + sizeof(service),
 434                                   optlen - sizeof(service)) ||
 435                    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
 436                        kfree(sl);
 437                        return -EFAULT;
 438                }
 439        }
 440
 441        lock_sock(sk);
 442        dp->dccps_service = service;
 443
 444        kfree(dp->dccps_service_list);
 445
 446        dp->dccps_service_list = sl;
 447        release_sock(sk);
 448        return 0;
 449}
 450
 451/* byte 1 is feature.  the rest is the preference list */
 452static int dccp_setsockopt_change(struct sock *sk, int type,
 453                                  struct dccp_so_feat __user *optval)
 454{
 455        struct dccp_so_feat opt;
 456        u8 *val;
 457        int rc;
 458
 459        if (copy_from_user(&opt, optval, sizeof(opt)))
 460                return -EFAULT;
 461
 462        val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
 463        if (!val)
 464                return -ENOMEM;
 465
 466        if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
 467                rc = -EFAULT;
 468                goto out_free_val;
 469        }
 470
 471        rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
 472                              val, opt.dccpsf_len, GFP_KERNEL);
 473        if (rc)
 474                goto out_free_val;
 475
 476out:
 477        return rc;
 478
 479out_free_val:
 480        kfree(val);
 481        goto out;
 482}
 483
 484static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 485                char __user *optval, int optlen)
 486{
 487        struct dccp_sock *dp = dccp_sk(sk);
 488        int val, err = 0;
 489
 490        if (optlen < sizeof(int))
 491                return -EINVAL;
 492
 493        if (get_user(val, (int __user *)optval))
 494                return -EFAULT;
 495
 496        if (optname == DCCP_SOCKOPT_SERVICE)
 497                return dccp_setsockopt_service(sk, val, optval, optlen);
 498
 499        lock_sock(sk);
 500        switch (optname) {
 501        case DCCP_SOCKOPT_PACKET_SIZE:
 502                DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
 503                err = 0;
 504                break;
 505        case DCCP_SOCKOPT_CHANGE_L:
 506                if (optlen != sizeof(struct dccp_so_feat))
 507                        err = -EINVAL;
 508                else
 509                        err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
 510                                                     (struct dccp_so_feat __user *)
 511                                                     optval);
 512                break;
 513        case DCCP_SOCKOPT_CHANGE_R:
 514                if (optlen != sizeof(struct dccp_so_feat))
 515                        err = -EINVAL;
 516                else
 517                        err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
 518                                                     (struct dccp_so_feat __user *)
 519                                                     optval);
 520                break;
 521        case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
 522                if (val < 0 || val > 15)
 523                        err = -EINVAL;
 524                else
 525                        dp->dccps_pcslen = val;
 526                break;
 527        case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
 528                if (val < 0 || val > 15)
 529                        err = -EINVAL;
 530                else {
 531                        dp->dccps_pcrlen = val;
 532                        /* FIXME: add feature negotiation,
 533                         * ChangeL(MinimumChecksumCoverage, val) */
 534                }
 535                break;
 536        default:
 537                err = -ENOPROTOOPT;
 538                break;
 539        }
 540
 541        release_sock(sk);
 542        return err;
 543}
 544
 545int dccp_setsockopt(struct sock *sk, int level, int optname,
 546                    char __user *optval, int optlen)
 547{
 548        if (level != SOL_DCCP)
 549                return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
 550                                                             optname, optval,
 551                                                             optlen);
 552        return do_dccp_setsockopt(sk, level, optname, optval, optlen);
 553}
 554
 555EXPORT_SYMBOL_GPL(dccp_setsockopt);
 556
 557#ifdef CONFIG_COMPAT
 558int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
 559                           char __user *optval, int optlen)
 560{
 561        if (level != SOL_DCCP)
 562                return inet_csk_compat_setsockopt(sk, level, optname,
 563                                                  optval, optlen);
 564        return do_dccp_setsockopt(sk, level, optname, optval, optlen);
 565}
 566
 567EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
 568#endif
 569
 570static int dccp_getsockopt_service(struct sock *sk, int len,
 571                                   __be32 __user *optval,
 572                                   int __user *optlen)
 573{
 574        const struct dccp_sock *dp = dccp_sk(sk);
 575        const struct dccp_service_list *sl;
 576        int err = -ENOENT, slen = 0, total_len = sizeof(u32);
 577
 578        lock_sock(sk);
 579        if ((sl = dp->dccps_service_list) != NULL) {
 580                slen = sl->dccpsl_nr * sizeof(u32);
 581                total_len += slen;
 582        }
 583
 584        err = -EINVAL;
 585        if (total_len > len)
 586                goto out;
 587
 588        err = 0;
 589        if (put_user(total_len, optlen) ||
 590            put_user(dp->dccps_service, optval) ||
 591            (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
 592                err = -EFAULT;
 593out:
 594        release_sock(sk);
 595        return err;
 596}
 597
 598static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 599                    char __user *optval, int __user *optlen)
 600{
 601        struct dccp_sock *dp;
 602        int val, len;
 603
 604        if (get_user(len, optlen))
 605                return -EFAULT;
 606
 607        if (len < (int)sizeof(int))
 608                return -EINVAL;
 609
 610        dp = dccp_sk(sk);
 611
 612        switch (optname) {
 613        case DCCP_SOCKOPT_PACKET_SIZE:
 614                DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
 615                return 0;
 616        case DCCP_SOCKOPT_SERVICE:
 617                return dccp_getsockopt_service(sk, len,
 618                                               (__be32 __user *)optval, optlen);
 619        case DCCP_SOCKOPT_GET_CUR_MPS:
 620                val = dp->dccps_mss_cache;
 621                len = sizeof(val);
 622                break;
 623        case DCCP_SOCKOPT_SEND_CSCOV:
 624                val = dp->dccps_pcslen;
 625                len = sizeof(val);
 626                break;
 627        case DCCP_SOCKOPT_RECV_CSCOV:
 628                val = dp->dccps_pcrlen;
 629                len = sizeof(val);
 630                break;
 631        case 128 ... 191:
 632                return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
 633                                             len, (u32 __user *)optval, optlen);
 634        case 192 ... 255:
 635                return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
 636                                             len, (u32 __user *)optval, optlen);
 637        default:
 638                return -ENOPROTOOPT;
 639        }
 640
 641        if (put_user(len, optlen) || copy_to_user(optval, &val, len))
 642                return -EFAULT;
 643
 644        return 0;
 645}
 646
 647int dccp_getsockopt(struct sock *sk, int level, int optname,
 648                    char __user *optval, int __user *optlen)
 649{
 650        if (level != SOL_DCCP)
 651                return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
 652                                                             optname, optval,
 653                                                             optlen);
 654        return do_dccp_getsockopt(sk, level, optname, optval, optlen);
 655}
 656
 657EXPORT_SYMBOL_GPL(dccp_getsockopt);
 658
 659#ifdef CONFIG_COMPAT
 660int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
 661                           char __user *optval, int __user *optlen)
 662{
 663        if (level != SOL_DCCP)
 664                return inet_csk_compat_getsockopt(sk, level, optname,
 665                                                  optval, optlen);
 666        return do_dccp_getsockopt(sk, level, optname, optval, optlen);
 667}
 668
 669EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
 670#endif
 671
 672int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 673                 size_t len)
 674{
 675        const struct dccp_sock *dp = dccp_sk(sk);
 676        const int flags = msg->msg_flags;
 677        const int noblock = flags & MSG_DONTWAIT;
 678        struct sk_buff *skb;
 679        int rc, size;
 680        long timeo;
 681
 682        if (len > dp->dccps_mss_cache)
 683                return -EMSGSIZE;
 684
 685        lock_sock(sk);
 686
 687        if (sysctl_dccp_tx_qlen &&
 688            (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
 689                rc = -EAGAIN;
 690                goto out_release;
 691        }
 692
 693        timeo = sock_sndtimeo(sk, noblock);
 694
 695        /*
 696         * We have to use sk_stream_wait_connect here to set sk_write_pending,
 697         * so that the trick in dccp_rcv_request_sent_state_process.
 698         */
 699        /* Wait for a connection to finish. */
 700        if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
 701                if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
 702                        goto out_release;
 703
 704        size = sk->sk_prot->max_header + len;
 705        release_sock(sk);
 706        skb = sock_alloc_send_skb(sk, size, noblock, &rc);
 707        lock_sock(sk);
 708        if (skb == NULL)
 709                goto out_release;
 710
 711        skb_reserve(skb, sk->sk_prot->max_header);
 712        rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
 713        if (rc != 0)
 714                goto out_discard;
 715
 716        skb_queue_tail(&sk->sk_write_queue, skb);
 717        dccp_write_xmit(sk,0);
 718out_release:
 719        release_sock(sk);
 720        return rc ? : len;
 721out_discard:
 722        kfree_skb(skb);
 723        goto out_release;
 724}
 725
 726EXPORT_SYMBOL_GPL(dccp_sendmsg);
 727
 728int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 729                 size_t len, int nonblock, int flags, int *addr_len)
 730{
 731        const struct dccp_hdr *dh;
 732        long timeo;
 733
 734        lock_sock(sk);
 735
 736        if (sk->sk_state == DCCP_LISTEN) {
 737                len = -ENOTCONN;
 738                goto out;
 739        }
 740
 741        timeo = sock_rcvtimeo(sk, nonblock);
 742
 743        do {
 744                struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 745
 746                if (skb == NULL)
 747                        goto verify_sock_status;
 748
 749                dh = dccp_hdr(skb);
 750
 751                if (dh->dccph_type == DCCP_PKT_DATA ||
 752                    dh->dccph_type == DCCP_PKT_DATAACK)
 753                        goto found_ok_skb;
 754
 755                if (dh->dccph_type == DCCP_PKT_RESET ||
 756                    dh->dccph_type == DCCP_PKT_CLOSE) {
 757                        dccp_pr_debug("found fin ok!\n");
 758                        len = 0;
 759                        goto found_fin_ok;
 760                }
 761                dccp_pr_debug("packet_type=%s\n",
 762                              dccp_packet_name(dh->dccph_type));
 763                sk_eat_skb(sk, skb, 0);
 764verify_sock_status:
 765                if (sock_flag(sk, SOCK_DONE)) {
 766                        len = 0;
 767                        break;
 768                }
 769
 770                if (sk->sk_err) {
 771                        len = sock_error(sk);
 772                        break;
 773                }
 774
 775                if (sk->sk_shutdown & RCV_SHUTDOWN) {
 776                        len = 0;
 777                        break;
 778                }
 779
 780                if (sk->sk_state == DCCP_CLOSED) {
 781                        if (!sock_flag(sk, SOCK_DONE)) {
 782                                /* This occurs when user tries to read
 783                                 * from never connected socket.
 784                                 */
 785                                len = -ENOTCONN;
 786                                break;
 787                        }
 788                        len = 0;
 789                        break;
 790                }
 791
 792                if (!timeo) {
 793                        len = -EAGAIN;
 794                        break;
 795                }
 796
 797                if (signal_pending(current)) {
 798                        len = sock_intr_errno(timeo);
 799                        break;
 800                }
 801
 802                sk_wait_data(sk, &timeo);
 803                continue;
 804        found_ok_skb:
 805                if (len > skb->len)
 806                        len = skb->len;
 807                else if (len < skb->len)
 808                        msg->msg_flags |= MSG_TRUNC;
 809
 810                if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
 811                        /* Exception. Bailout! */
 812                        len = -EFAULT;
 813                        break;
 814                }
 815        found_fin_ok:
 816                if (!(flags & MSG_PEEK))
 817                        sk_eat_skb(sk, skb, 0);
 818                break;
 819        } while (1);
 820out:
 821        release_sock(sk);
 822        return len;
 823}
 824
 825EXPORT_SYMBOL_GPL(dccp_recvmsg);
 826
 827int inet_dccp_listen(struct socket *sock, int backlog)
 828{
 829        struct sock *sk = sock->sk;
 830        unsigned char old_state;
 831        int err;
 832
 833        lock_sock(sk);
 834
 835        err = -EINVAL;
 836        if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
 837                goto out;
 838
 839        old_state = sk->sk_state;
 840        if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
 841                goto out;
 842
 843        /* Really, if the socket is already in listen state
 844         * we can only allow the backlog to be adjusted.
 845         */
 846        if (old_state != DCCP_LISTEN) {
 847                /*
 848                 * FIXME: here it probably should be sk->sk_prot->listen_start
 849                 * see tcp_listen_start
 850                 */
 851                err = dccp_listen_start(sk, backlog);
 852                if (err)
 853                        goto out;
 854        }
 855        sk->sk_max_ack_backlog = backlog;
 856        err = 0;
 857
 858out:
 859        release_sock(sk);
 860        return err;
 861}
 862
 863EXPORT_SYMBOL_GPL(inet_dccp_listen);
 864
 865static const unsigned char dccp_new_state[] = {
 866        /* current state:   new state:      action:     */
 867        [0]               = DCCP_CLOSED,
 868        [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
 869        [DCCP_REQUESTING] = DCCP_CLOSED,
 870        [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
 871        [DCCP_LISTEN]     = DCCP_CLOSED,
 872        [DCCP_RESPOND]    = DCCP_CLOSED,
 873        [DCCP_CLOSING]    = DCCP_CLOSED,
 874        [DCCP_TIME_WAIT]  = DCCP_CLOSED,
 875        [DCCP_CLOSED]     = DCCP_CLOSED,
 876};
 877
 878static int dccp_close_state(struct sock *sk)
 879{
 880        const int next = dccp_new_state[sk->sk_state];
 881        const int ns = next & DCCP_STATE_MASK;
 882
 883        if (ns != sk->sk_state)
 884                dccp_set_state(sk, ns);
 885
 886        return next & DCCP_ACTION_FIN;
 887}
 888
 889void dccp_close(struct sock *sk, long timeout)
 890{
 891        struct dccp_sock *dp = dccp_sk(sk);
 892        struct sk_buff *skb;
 893        int state;
 894
 895        lock_sock(sk);
 896
 897        sk->sk_shutdown = SHUTDOWN_MASK;
 898
 899        if (sk->sk_state == DCCP_LISTEN) {
 900                dccp_set_state(sk, DCCP_CLOSED);
 901
 902                /* Special case. */
 903                inet_csk_listen_stop(sk);
 904
 905                goto adjudge_to_death;
 906        }
 907
 908        sk_stop_timer(sk, &dp->dccps_xmit_timer);
 909
 910        /*
 911         * We need to flush the recv. buffs.  We do this only on the
 912         * descriptor close, not protocol-sourced closes, because the
 913          *reader process may not have drained the data yet!
 914         */
 915        /* FIXME: check for unread data */
 916        while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 917                __kfree_skb(skb);
 918        }
 919
 920        if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
 921                /* Check zero linger _after_ checking for unread data. */
 922                sk->sk_prot->disconnect(sk, 0);
 923        } else if (dccp_close_state(sk)) {
 924                dccp_send_close(sk, 1);
 925        }
 926
 927        sk_stream_wait_close(sk, timeout);
 928
 929adjudge_to_death:
 930        state = sk->sk_state;
 931        sock_hold(sk);
 932        sock_orphan(sk);
 933        atomic_inc(sk->sk_prot->orphan_count);
 934
 935        /*
 936         * It is the last release_sock in its life. It will remove backlog.
 937         */
 938        release_sock(sk);
 939        /*
 940         * Now socket is owned by kernel and we acquire BH lock
 941         * to finish close. No need to check for user refs.
 942         */
 943        local_bh_disable();
 944        bh_lock_sock(sk);
 945        BUG_TRAP(!sock_owned_by_user(sk));
 946
 947        /* Have we already been destroyed by a softirq or backlog? */
 948        if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
 949                goto out;
 950
 951        /*
 952         * The last release_sock may have processed the CLOSE or RESET
 953         * packet moving sock to CLOSED state, if not we have to fire
 954         * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
 955         * in draft-ietf-dccp-spec-11. -acme
 956         */
 957        if (sk->sk_state == DCCP_CLOSING) {
 958                /* FIXME: should start at 2 * RTT */
 959                /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
 960                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 961                                          inet_csk(sk)->icsk_rto,
 962                                          DCCP_RTO_MAX);
 963#if 0
 964                /* Yeah, we should use sk->sk_prot->orphan_count, etc */
 965                dccp_set_state(sk, DCCP_CLOSED);
 966#endif
 967        }
 968
 969        if (sk->sk_state == DCCP_CLOSED)
 970                inet_csk_destroy_sock(sk);
 971
 972        /* Otherwise, socket is reprieved until protocol close. */
 973
 974out:
 975        bh_unlock_sock(sk);
 976        local_bh_enable();
 977        sock_put(sk);
 978}
 979
 980EXPORT_SYMBOL_GPL(dccp_close);
 981
 982void dccp_shutdown(struct sock *sk, int how)
 983{
 984        dccp_pr_debug("entry\n");
 985}
 986
 987EXPORT_SYMBOL_GPL(dccp_shutdown);
 988
 989static int __init dccp_mib_init(void)
 990{
 991        int rc = -ENOMEM;
 992
 993        dccp_statistics[0] = alloc_percpu(struct dccp_mib);
 994        if (dccp_statistics[0] == NULL)
 995                goto out;
 996
 997        dccp_statistics[1] = alloc_percpu(struct dccp_mib);
 998        if (dccp_statistics[1] == NULL)
 999                goto out_free_one;
1000
1001        rc = 0;
1002out:
1003        return rc;
1004out_free_one:
1005        free_percpu(dccp_statistics[0]);
1006        dccp_statistics[0] = NULL;
1007        goto out;
1008
1009}
1010
1011static void dccp_mib_exit(void)
1012{
1013        free_percpu(dccp_statistics[0]);
1014        free_percpu(dccp_statistics[1]);
1015        dccp_statistics[0] = dccp_statistics[1] = NULL;
1016}
1017
1018static int thash_entries;
1019module_param(thash_entries, int, 0444);
1020MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1021
1022#ifdef CONFIG_IP_DCCP_DEBUG
1023int dccp_debug;
1024module_param(dccp_debug, bool, 0444);
1025MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1026
1027EXPORT_SYMBOL_GPL(dccp_debug);
1028#endif
1029
1030static int __init dccp_init(void)
1031{
1032        unsigned long goal;
1033        int ehash_order, bhash_order, i;
1034        int rc = -ENOBUFS;
1035
1036        dccp_hashinfo.bind_bucket_cachep =
1037                kmem_cache_create("dccp_bind_bucket",
1038                                  sizeof(struct inet_bind_bucket), 0,
1039                                  SLAB_HWCACHE_ALIGN, NULL);
1040        if (!dccp_hashinfo.bind_bucket_cachep)
1041                goto out;
1042
1043        /*
1044         * Size and allocate the main established and bind bucket
1045         * hash tables.
1046         *
1047         * The methodology is similar to that of the buffer cache.
1048         */
1049        if (num_physpages >= (128 * 1024))
1050                goal = num_physpages >> (21 - PAGE_SHIFT);
1051        else
1052                goal = num_physpages >> (23 - PAGE_SHIFT);
1053
1054        if (thash_entries)
1055                goal = (thash_entries *
1056                        sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1057        for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1058                ;
1059        do {
1060                dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1061                                        sizeof(struct inet_ehash_bucket);
1062                while (dccp_hashinfo.ehash_size &
1063                       (dccp_hashinfo.ehash_size - 1))
1064                        dccp_hashinfo.ehash_size--;
1065                dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1066                        __get_free_pages(GFP_ATOMIC, ehash_order);
1067        } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1068
1069        if (!dccp_hashinfo.ehash) {
1070                DCCP_CRIT("Failed to allocate DCCP established hash table");
1071                goto out_free_bind_bucket_cachep;
1072        }
1073
1074        for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1075                INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1076                INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1077        }
1078
1079        if (inet_ehash_locks_alloc(&dccp_hashinfo))
1080                        goto out_free_dccp_ehash;
1081
1082        bhash_order = ehash_order;
1083
1084        do {
1085                dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1086                                        sizeof(struct inet_bind_hashbucket);
1087                if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1088                    bhash_order > 0)
1089                        continue;
1090                dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1091                        __get_free_pages(GFP_ATOMIC, bhash_order);
1092        } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1093
1094        if (!dccp_hashinfo.bhash) {
1095                DCCP_CRIT("Failed to allocate DCCP bind hash table");
1096                goto out_free_dccp_locks;
1097        }
1098
1099        for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1100                spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1101                INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1102        }
1103
1104        rc = dccp_mib_init();
1105        if (rc)
1106                goto out_free_dccp_bhash;
1107
1108        rc = dccp_ackvec_init();
1109        if (rc)
1110                goto out_free_dccp_mib;
1111
1112        rc = dccp_sysctl_init();
1113        if (rc)
1114                goto out_ackvec_exit;
1115
1116        dccp_timestamping_init();
1117out:
1118        return rc;
1119out_ackvec_exit:
1120        dccp_ackvec_exit();
1121out_free_dccp_mib:
1122        dccp_mib_exit();
1123out_free_dccp_bhash:
1124        free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1125        dccp_hashinfo.bhash = NULL;
1126out_free_dccp_locks:
1127        inet_ehash_locks_free(&dccp_hashinfo);
1128out_free_dccp_ehash:
1129        free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1130        dccp_hashinfo.ehash = NULL;
1131out_free_bind_bucket_cachep:
1132        kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1133        dccp_hashinfo.bind_bucket_cachep = NULL;
1134        goto out;
1135}
1136
1137static void __exit dccp_fini(void)
1138{
1139        dccp_mib_exit();
1140        free_pages((unsigned long)dccp_hashinfo.bhash,
1141                   get_order(dccp_hashinfo.bhash_size *
1142                             sizeof(struct inet_bind_hashbucket)));
1143        free_pages((unsigned long)dccp_hashinfo.ehash,
1144                   get_order(dccp_hashinfo.ehash_size *
1145                             sizeof(struct inet_ehash_bucket)));
1146        inet_ehash_locks_free(&dccp_hashinfo);
1147        kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1148        dccp_ackvec_exit();
1149        dccp_sysctl_exit();
1150}
1151
1152module_init(dccp_init);
1153module_exit(dccp_fini);
1154
1155MODULE_LICENSE("GPL");
1156MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1157MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1158