linux/net/dccp/proto.c
<<
>>
Prefs
   1/*
   2 *  net/dccp/proto.c
   3 *
   4 *  An implementation of the DCCP protocol
   5 *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
   6 *
   7 *      This program is free software; you can redistribute it and/or modify it
   8 *      under the terms of the GNU General Public License version 2 as
   9 *      published by the Free Software Foundation.
  10 */
  11
  12#include <linux/dccp.h>
  13#include <linux/module.h>
  14#include <linux/types.h>
  15#include <linux/sched.h>
  16#include <linux/kernel.h>
  17#include <linux/skbuff.h>
  18#include <linux/netdevice.h>
  19#include <linux/in.h>
  20#include <linux/if_arp.h>
  21#include <linux/init.h>
  22#include <linux/random.h>
  23#include <linux/slab.h>
  24#include <net/checksum.h>
  25
  26#include <net/inet_sock.h>
  27#include <net/sock.h>
  28#include <net/xfrm.h>
  29
  30#include <asm/ioctls.h>
  31#include <linux/spinlock.h>
  32#include <linux/timer.h>
  33#include <linux/delay.h>
  34#include <linux/poll.h>
  35
  36#include "ccid.h"
  37#include "dccp.h"
  38#include "feat.h"
  39
  40DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
  41
  42EXPORT_SYMBOL_GPL(dccp_statistics);
  43
  44struct percpu_counter dccp_orphan_count;
  45EXPORT_SYMBOL_GPL(dccp_orphan_count);
  46
  47struct inet_hashinfo dccp_hashinfo;
  48EXPORT_SYMBOL_GPL(dccp_hashinfo);
  49
  50/* the maximum queue length for tx in packets. 0 is no limit */
  51int sysctl_dccp_tx_qlen __read_mostly = 5;
  52
  53#ifdef CONFIG_IP_DCCP_DEBUG
  54static const char *dccp_state_name(const int state)
  55{
  56        static const char *const dccp_state_names[] = {
  57        [DCCP_OPEN]             = "OPEN",
  58        [DCCP_REQUESTING]       = "REQUESTING",
  59        [DCCP_PARTOPEN]         = "PARTOPEN",
  60        [DCCP_LISTEN]           = "LISTEN",
  61        [DCCP_RESPOND]          = "RESPOND",
  62        [DCCP_CLOSING]          = "CLOSING",
  63        [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
  64        [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
  65        [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
  66        [DCCP_TIME_WAIT]        = "TIME_WAIT",
  67        [DCCP_CLOSED]           = "CLOSED",
  68        };
  69
  70        if (state >= DCCP_MAX_STATES)
  71                return "INVALID STATE!";
  72        else
  73                return dccp_state_names[state];
  74}
  75#endif
  76
  77void dccp_set_state(struct sock *sk, const int state)
  78{
  79        const int oldstate = sk->sk_state;
  80
  81        dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
  82                      dccp_state_name(oldstate), dccp_state_name(state));
  83        WARN_ON(state == oldstate);
  84
  85        switch (state) {
  86        case DCCP_OPEN:
  87                if (oldstate != DCCP_OPEN)
  88                        DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
  89                /* Client retransmits all Confirm options until entering OPEN */
  90                if (oldstate == DCCP_PARTOPEN)
  91                        dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
  92                break;
  93
  94        case DCCP_CLOSED:
  95                if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
  96                    oldstate == DCCP_CLOSING)
  97                        DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
  98
  99                sk->sk_prot->unhash(sk);
 100                if (inet_csk(sk)->icsk_bind_hash != NULL &&
 101                    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
 102                        inet_put_port(sk);
 103                /* fall through */
 104        default:
 105                if (oldstate == DCCP_OPEN)
 106                        DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
 107        }
 108
 109        /* Change state AFTER socket is unhashed to avoid closed
 110         * socket sitting in hash tables.
 111         */
 112        sk->sk_state = state;
 113}
 114
 115EXPORT_SYMBOL_GPL(dccp_set_state);
 116
 117static void dccp_finish_passive_close(struct sock *sk)
 118{
 119        switch (sk->sk_state) {
 120        case DCCP_PASSIVE_CLOSE:
 121                /* Node (client or server) has received Close packet. */
 122                dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
 123                dccp_set_state(sk, DCCP_CLOSED);
 124                break;
 125        case DCCP_PASSIVE_CLOSEREQ:
 126                /*
 127                 * Client received CloseReq. We set the `active' flag so that
 128                 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
 129                 */
 130                dccp_send_close(sk, 1);
 131                dccp_set_state(sk, DCCP_CLOSING);
 132        }
 133}
 134
 135void dccp_done(struct sock *sk)
 136{
 137        dccp_set_state(sk, DCCP_CLOSED);
 138        dccp_clear_xmit_timers(sk);
 139
 140        sk->sk_shutdown = SHUTDOWN_MASK;
 141
 142        if (!sock_flag(sk, SOCK_DEAD))
 143                sk->sk_state_change(sk);
 144        else
 145                inet_csk_destroy_sock(sk);
 146}
 147
 148EXPORT_SYMBOL_GPL(dccp_done);
 149
 150const char *dccp_packet_name(const int type)
 151{
 152        static const char *const dccp_packet_names[] = {
 153                [DCCP_PKT_REQUEST]  = "REQUEST",
 154                [DCCP_PKT_RESPONSE] = "RESPONSE",
 155                [DCCP_PKT_DATA]     = "DATA",
 156                [DCCP_PKT_ACK]      = "ACK",
 157                [DCCP_PKT_DATAACK]  = "DATAACK",
 158                [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
 159                [DCCP_PKT_CLOSE]    = "CLOSE",
 160                [DCCP_PKT_RESET]    = "RESET",
 161                [DCCP_PKT_SYNC]     = "SYNC",
 162                [DCCP_PKT_SYNCACK]  = "SYNCACK",
 163        };
 164
 165        if (type >= DCCP_NR_PKT_TYPES)
 166                return "INVALID";
 167        else
 168                return dccp_packet_names[type];
 169}
 170
 171EXPORT_SYMBOL_GPL(dccp_packet_name);
 172
 173int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 174{
 175        struct dccp_sock *dp = dccp_sk(sk);
 176        struct inet_connection_sock *icsk = inet_csk(sk);
 177
 178        icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
 179        icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
 180        sk->sk_state            = DCCP_CLOSED;
 181        sk->sk_write_space      = dccp_write_space;
 182        icsk->icsk_sync_mss     = dccp_sync_mss;
 183        dp->dccps_mss_cache     = 536;
 184        dp->dccps_rate_last     = jiffies;
 185        dp->dccps_role          = DCCP_ROLE_UNDEFINED;
 186        dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
 187        dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
 188        dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
 189
 190        dccp_init_xmit_timers(sk);
 191
 192        INIT_LIST_HEAD(&dp->dccps_featneg);
 193        /* control socket doesn't need feat nego */
 194        if (likely(ctl_sock_initialized))
 195                return dccp_feat_init(sk);
 196        return 0;
 197}
 198
 199EXPORT_SYMBOL_GPL(dccp_init_sock);
 200
 201void dccp_destroy_sock(struct sock *sk)
 202{
 203        struct dccp_sock *dp = dccp_sk(sk);
 204
 205        /*
 206         * DCCP doesn't use sk_write_queue, just sk_send_head
 207         * for retransmissions
 208         */
 209        if (sk->sk_send_head != NULL) {
 210                kfree_skb(sk->sk_send_head);
 211                sk->sk_send_head = NULL;
 212        }
 213
 214        /* Clean up a referenced DCCP bind bucket. */
 215        if (inet_csk(sk)->icsk_bind_hash != NULL)
 216                inet_put_port(sk);
 217
 218        kfree(dp->dccps_service_list);
 219        dp->dccps_service_list = NULL;
 220
 221        if (dp->dccps_hc_rx_ackvec != NULL) {
 222                dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
 223                dp->dccps_hc_rx_ackvec = NULL;
 224        }
 225        ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
 226        ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
 227        dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
 228
 229        /* clean up feature negotiation state */
 230        dccp_feat_list_purge(&dp->dccps_featneg);
 231}
 232
 233EXPORT_SYMBOL_GPL(dccp_destroy_sock);
 234
 235static inline int dccp_listen_start(struct sock *sk, int backlog)
 236{
 237        struct dccp_sock *dp = dccp_sk(sk);
 238
 239        dp->dccps_role = DCCP_ROLE_LISTEN;
 240        /* do not start to listen if feature negotiation setup fails */
 241        if (dccp_feat_finalise_settings(dp))
 242                return -EPROTO;
 243        return inet_csk_listen_start(sk, backlog);
 244}
 245
 246static inline int dccp_need_reset(int state)
 247{
 248        return state != DCCP_CLOSED && state != DCCP_LISTEN &&
 249               state != DCCP_REQUESTING;
 250}
 251
 252int dccp_disconnect(struct sock *sk, int flags)
 253{
 254        struct inet_connection_sock *icsk = inet_csk(sk);
 255        struct inet_sock *inet = inet_sk(sk);
 256        int err = 0;
 257        const int old_state = sk->sk_state;
 258
 259        if (old_state != DCCP_CLOSED)
 260                dccp_set_state(sk, DCCP_CLOSED);
 261
 262        /*
 263         * This corresponds to the ABORT function of RFC793, sec. 3.8
 264         * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
 265         */
 266        if (old_state == DCCP_LISTEN) {
 267                inet_csk_listen_stop(sk);
 268        } else if (dccp_need_reset(old_state)) {
 269                dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
 270                sk->sk_err = ECONNRESET;
 271        } else if (old_state == DCCP_REQUESTING)
 272                sk->sk_err = ECONNRESET;
 273
 274        dccp_clear_xmit_timers(sk);
 275
 276        __skb_queue_purge(&sk->sk_receive_queue);
 277        __skb_queue_purge(&sk->sk_write_queue);
 278        if (sk->sk_send_head != NULL) {
 279                __kfree_skb(sk->sk_send_head);
 280                sk->sk_send_head = NULL;
 281        }
 282
 283        inet->inet_dport = 0;
 284
 285        if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
 286                inet_reset_saddr(sk);
 287
 288        sk->sk_shutdown = 0;
 289        sock_reset_flag(sk, SOCK_DONE);
 290
 291        icsk->icsk_backoff = 0;
 292        inet_csk_delack_init(sk);
 293        __sk_dst_reset(sk);
 294
 295        WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
 296
 297        sk->sk_error_report(sk);
 298        return err;
 299}
 300
 301EXPORT_SYMBOL_GPL(dccp_disconnect);
 302
 303/*
 304 *      Wait for a DCCP event.
 305 *
 306 *      Note that we don't need to lock the socket, as the upper poll layers
 307 *      take care of normal races (between the test and the event) and we don't
 308 *      go look at any of the socket buffers directly.
 309 */
 310unsigned int dccp_poll(struct file *file, struct socket *sock,
 311                       poll_table *wait)
 312{
 313        unsigned int mask;
 314        struct sock *sk = sock->sk;
 315
 316        sock_poll_wait(file, sk_sleep(sk), wait);
 317        if (sk->sk_state == DCCP_LISTEN)
 318                return inet_csk_listen_poll(sk);
 319
 320        /* Socket is not locked. We are protected from async events
 321           by poll logic and correct handling of state changes
 322           made by another threads is impossible in any case.
 323         */
 324
 325        mask = 0;
 326        if (sk->sk_err)
 327                mask = POLLERR;
 328
 329        if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
 330                mask |= POLLHUP;
 331        if (sk->sk_shutdown & RCV_SHUTDOWN)
 332                mask |= POLLIN | POLLRDNORM | POLLRDHUP;
 333
 334        /* Connected? */
 335        if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
 336                if (atomic_read(&sk->sk_rmem_alloc) > 0)
 337                        mask |= POLLIN | POLLRDNORM;
 338
 339                if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
 340                        if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
 341                                mask |= POLLOUT | POLLWRNORM;
 342                        } else {  /* send SIGIO later */
 343                                set_bit(SOCK_ASYNC_NOSPACE,
 344                                        &sk->sk_socket->flags);
 345                                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 346
 347                                /* Race breaker. If space is freed after
 348                                 * wspace test but before the flags are set,
 349                                 * IO signal will be lost.
 350                                 */
 351                                if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
 352                                        mask |= POLLOUT | POLLWRNORM;
 353                        }
 354                }
 355        }
 356        return mask;
 357}
 358
 359EXPORT_SYMBOL_GPL(dccp_poll);
 360
 361int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 362{
 363        int rc = -ENOTCONN;
 364
 365        lock_sock(sk);
 366
 367        if (sk->sk_state == DCCP_LISTEN)
 368                goto out;
 369
 370        switch (cmd) {
 371        case SIOCINQ: {
 372                struct sk_buff *skb;
 373                unsigned long amount = 0;
 374
 375                skb = skb_peek(&sk->sk_receive_queue);
 376                if (skb != NULL) {
 377                        /*
 378                         * We will only return the amount of this packet since
 379                         * that is all that will be read.
 380                         */
 381                        amount = skb->len;
 382                }
 383                rc = put_user(amount, (int __user *)arg);
 384        }
 385                break;
 386        default:
 387                rc = -ENOIOCTLCMD;
 388                break;
 389        }
 390out:
 391        release_sock(sk);
 392        return rc;
 393}
 394
 395EXPORT_SYMBOL_GPL(dccp_ioctl);
 396
 397static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
 398                                   char __user *optval, unsigned int optlen)
 399{
 400        struct dccp_sock *dp = dccp_sk(sk);
 401        struct dccp_service_list *sl = NULL;
 402
 403        if (service == DCCP_SERVICE_INVALID_VALUE ||
 404            optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
 405                return -EINVAL;
 406
 407        if (optlen > sizeof(service)) {
 408                sl = kmalloc(optlen, GFP_KERNEL);
 409                if (sl == NULL)
 410                        return -ENOMEM;
 411
 412                sl->dccpsl_nr = optlen / sizeof(u32) - 1;
 413                if (copy_from_user(sl->dccpsl_list,
 414                                   optval + sizeof(service),
 415                                   optlen - sizeof(service)) ||
 416                    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
 417                        kfree(sl);
 418                        return -EFAULT;
 419                }
 420        }
 421
 422        lock_sock(sk);
 423        dp->dccps_service = service;
 424
 425        kfree(dp->dccps_service_list);
 426
 427        dp->dccps_service_list = sl;
 428        release_sock(sk);
 429        return 0;
 430}
 431
 432static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
 433{
 434        u8 *list, len;
 435        int i, rc;
 436
 437        if (cscov < 0 || cscov > 15)
 438                return -EINVAL;
 439        /*
 440         * Populate a list of permissible values, in the range cscov...15. This
 441         * is necessary since feature negotiation of single values only works if
 442         * both sides incidentally choose the same value. Since the list starts
 443         * lowest-value first, negotiation will pick the smallest shared value.
 444         */
 445        if (cscov == 0)
 446                return 0;
 447        len = 16 - cscov;
 448
 449        list = kmalloc(len, GFP_KERNEL);
 450        if (list == NULL)
 451                return -ENOBUFS;
 452
 453        for (i = 0; i < len; i++)
 454                list[i] = cscov++;
 455
 456        rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
 457
 458        if (rc == 0) {
 459                if (rx)
 460                        dccp_sk(sk)->dccps_pcrlen = cscov;
 461                else
 462                        dccp_sk(sk)->dccps_pcslen = cscov;
 463        }
 464        kfree(list);
 465        return rc;
 466}
 467
 468static int dccp_setsockopt_ccid(struct sock *sk, int type,
 469                                char __user *optval, unsigned int optlen)
 470{
 471        u8 *val;
 472        int rc = 0;
 473
 474        if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
 475                return -EINVAL;
 476
 477        val = memdup_user(optval, optlen);
 478        if (IS_ERR(val))
 479                return PTR_ERR(val);
 480
 481        lock_sock(sk);
 482        if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
 483                rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
 484
 485        if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
 486                rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
 487        release_sock(sk);
 488
 489        kfree(val);
 490        return rc;
 491}
 492
 493static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 494                char __user *optval, unsigned int optlen)
 495{
 496        struct dccp_sock *dp = dccp_sk(sk);
 497        int val, err = 0;
 498
 499        switch (optname) {
 500        case DCCP_SOCKOPT_PACKET_SIZE:
 501                DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
 502                return 0;
 503        case DCCP_SOCKOPT_CHANGE_L:
 504        case DCCP_SOCKOPT_CHANGE_R:
 505                DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
 506                return 0;
 507        case DCCP_SOCKOPT_CCID:
 508        case DCCP_SOCKOPT_RX_CCID:
 509        case DCCP_SOCKOPT_TX_CCID:
 510                return dccp_setsockopt_ccid(sk, optname, optval, optlen);
 511        }
 512
 513        if (optlen < (int)sizeof(int))
 514                return -EINVAL;
 515
 516        if (get_user(val, (int __user *)optval))
 517                return -EFAULT;
 518
 519        if (optname == DCCP_SOCKOPT_SERVICE)
 520                return dccp_setsockopt_service(sk, val, optval, optlen);
 521
 522        lock_sock(sk);
 523        switch (optname) {
 524        case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 525                if (dp->dccps_role != DCCP_ROLE_SERVER)
 526                        err = -EOPNOTSUPP;
 527                else
 528                        dp->dccps_server_timewait = (val != 0);
 529                break;
 530        case DCCP_SOCKOPT_SEND_CSCOV:
 531                err = dccp_setsockopt_cscov(sk, val, false);
 532                break;
 533        case DCCP_SOCKOPT_RECV_CSCOV:
 534                err = dccp_setsockopt_cscov(sk, val, true);
 535                break;
 536        case DCCP_SOCKOPT_QPOLICY_ID:
 537                if (sk->sk_state != DCCP_CLOSED)
 538                        err = -EISCONN;
 539                else if (val < 0 || val >= DCCPQ_POLICY_MAX)
 540                        err = -EINVAL;
 541                else
 542                        dp->dccps_qpolicy = val;
 543                break;
 544        case DCCP_SOCKOPT_QPOLICY_TXQLEN:
 545                if (val < 0)
 546                        err = -EINVAL;
 547                else
 548                        dp->dccps_tx_qlen = val;
 549                break;
 550        default:
 551                err = -ENOPROTOOPT;
 552                break;
 553        }
 554        release_sock(sk);
 555
 556        return err;
 557}
 558
 559int dccp_setsockopt(struct sock *sk, int level, int optname,
 560                    char __user *optval, unsigned int optlen)
 561{
 562        if (level != SOL_DCCP)
 563                return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
 564                                                             optname, optval,
 565                                                             optlen);
 566        return do_dccp_setsockopt(sk, level, optname, optval, optlen);
 567}
 568
 569EXPORT_SYMBOL_GPL(dccp_setsockopt);
 570
 571#ifdef CONFIG_COMPAT
 572int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
 573                           char __user *optval, unsigned int optlen)
 574{
 575        if (level != SOL_DCCP)
 576                return inet_csk_compat_setsockopt(sk, level, optname,
 577                                                  optval, optlen);
 578        return do_dccp_setsockopt(sk, level, optname, optval, optlen);
 579}
 580
 581EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
 582#endif
 583
 584static int dccp_getsockopt_service(struct sock *sk, int len,
 585                                   __be32 __user *optval,
 586                                   int __user *optlen)
 587{
 588        const struct dccp_sock *dp = dccp_sk(sk);
 589        const struct dccp_service_list *sl;
 590        int err = -ENOENT, slen = 0, total_len = sizeof(u32);
 591
 592        lock_sock(sk);
 593        if ((sl = dp->dccps_service_list) != NULL) {
 594                slen = sl->dccpsl_nr * sizeof(u32);
 595                total_len += slen;
 596        }
 597
 598        err = -EINVAL;
 599        if (total_len > len)
 600                goto out;
 601
 602        err = 0;
 603        if (put_user(total_len, optlen) ||
 604            put_user(dp->dccps_service, optval) ||
 605            (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
 606                err = -EFAULT;
 607out:
 608        release_sock(sk);
 609        return err;
 610}
 611
 612static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 613                    char __user *optval, int __user *optlen)
 614{
 615        struct dccp_sock *dp;
 616        int val, len;
 617
 618        if (get_user(len, optlen))
 619                return -EFAULT;
 620
 621        if (len < (int)sizeof(int))
 622                return -EINVAL;
 623
 624        dp = dccp_sk(sk);
 625
 626        switch (optname) {
 627        case DCCP_SOCKOPT_PACKET_SIZE:
 628                DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
 629                return 0;
 630        case DCCP_SOCKOPT_SERVICE:
 631                return dccp_getsockopt_service(sk, len,
 632                                               (__be32 __user *)optval, optlen);
 633        case DCCP_SOCKOPT_GET_CUR_MPS:
 634                val = dp->dccps_mss_cache;
 635                break;
 636        case DCCP_SOCKOPT_AVAILABLE_CCIDS:
 637                return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
 638        case DCCP_SOCKOPT_TX_CCID:
 639                val = ccid_get_current_tx_ccid(dp);
 640                if (val < 0)
 641                        return -ENOPROTOOPT;
 642                break;
 643        case DCCP_SOCKOPT_RX_CCID:
 644                val = ccid_get_current_rx_ccid(dp);
 645                if (val < 0)
 646                        return -ENOPROTOOPT;
 647                break;
 648        case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 649                val = dp->dccps_server_timewait;
 650                break;
 651        case DCCP_SOCKOPT_SEND_CSCOV:
 652                val = dp->dccps_pcslen;
 653                break;
 654        case DCCP_SOCKOPT_RECV_CSCOV:
 655                val = dp->dccps_pcrlen;
 656                break;
 657        case DCCP_SOCKOPT_QPOLICY_ID:
 658                val = dp->dccps_qpolicy;
 659                break;
 660        case DCCP_SOCKOPT_QPOLICY_TXQLEN:
 661                val = dp->dccps_tx_qlen;
 662                break;
 663        case 128 ... 191:
 664                return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
 665                                             len, (u32 __user *)optval, optlen);
 666        case 192 ... 255:
 667                return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
 668                                             len, (u32 __user *)optval, optlen);
 669        default:
 670                return -ENOPROTOOPT;
 671        }
 672
 673        len = sizeof(val);
 674        if (put_user(len, optlen) || copy_to_user(optval, &val, len))
 675                return -EFAULT;
 676
 677        return 0;
 678}
 679
 680int dccp_getsockopt(struct sock *sk, int level, int optname,
 681                    char __user *optval, int __user *optlen)
 682{
 683        if (level != SOL_DCCP)
 684                return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
 685                                                             optname, optval,
 686                                                             optlen);
 687        return do_dccp_getsockopt(sk, level, optname, optval, optlen);
 688}
 689
 690EXPORT_SYMBOL_GPL(dccp_getsockopt);
 691
 692#ifdef CONFIG_COMPAT
 693int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
 694                           char __user *optval, int __user *optlen)
 695{
 696        if (level != SOL_DCCP)
 697                return inet_csk_compat_getsockopt(sk, level, optname,
 698                                                  optval, optlen);
 699        return do_dccp_getsockopt(sk, level, optname, optval, optlen);
 700}
 701
 702EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
 703#endif
 704
 705static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
 706{
 707        struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
 708
 709        /*
 710         * Assign an (opaque) qpolicy priority value to skb->priority.
 711         *
 712         * We are overloading this skb field for use with the qpolicy subystem.
 713         * The skb->priority is normally used for the SO_PRIORITY option, which
 714         * is initialised from sk_priority. Since the assignment of sk_priority
 715         * to skb->priority happens later (on layer 3), we overload this field
 716         * for use with queueing priorities as long as the skb is on layer 4.
 717         * The default priority value (if nothing is set) is 0.
 718         */
 719        skb->priority = 0;
 720
 721        for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
 722
 723                if (!CMSG_OK(msg, cmsg))
 724                        return -EINVAL;
 725
 726                if (cmsg->cmsg_level != SOL_DCCP)
 727                        continue;
 728
 729                if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
 730                    !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
 731                        return -EINVAL;
 732
 733                switch (cmsg->cmsg_type) {
 734                case DCCP_SCM_PRIORITY:
 735                        if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
 736                                return -EINVAL;
 737                        skb->priority = *(__u32 *)CMSG_DATA(cmsg);
 738                        break;
 739                default:
 740                        return -EINVAL;
 741                }
 742        }
 743        return 0;
 744}
 745
 746int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 747                 size_t len)
 748{
 749        const struct dccp_sock *dp = dccp_sk(sk);
 750        const int flags = msg->msg_flags;
 751        const int noblock = flags & MSG_DONTWAIT;
 752        struct sk_buff *skb;
 753        int rc, size;
 754        long timeo;
 755
 756        if (len > dp->dccps_mss_cache)
 757                return -EMSGSIZE;
 758
 759        lock_sock(sk);
 760
 761        if (dccp_qpolicy_full(sk)) {
 762                rc = -EAGAIN;
 763                goto out_release;
 764        }
 765
 766        timeo = sock_sndtimeo(sk, noblock);
 767
 768        /*
 769         * We have to use sk_stream_wait_connect here to set sk_write_pending,
 770         * so that the trick in dccp_rcv_request_sent_state_process.
 771         */
 772        /* Wait for a connection to finish. */
 773        if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
 774                if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
 775                        goto out_release;
 776
 777        size = sk->sk_prot->max_header + len;
 778        release_sock(sk);
 779        skb = sock_alloc_send_skb(sk, size, noblock, &rc);
 780        lock_sock(sk);
 781        if (skb == NULL)
 782                goto out_release;
 783
 784        skb_reserve(skb, sk->sk_prot->max_header);
 785        rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
 786        if (rc != 0)
 787                goto out_discard;
 788
 789        rc = dccp_msghdr_parse(msg, skb);
 790        if (rc != 0)
 791                goto out_discard;
 792
 793        dccp_qpolicy_push(sk, skb);
 794        /*
 795         * The xmit_timer is set if the TX CCID is rate-based and will expire
 796         * when congestion control permits to release further packets into the
 797         * network. Window-based CCIDs do not use this timer.
 798         */
 799        if (!timer_pending(&dp->dccps_xmit_timer))
 800                dccp_write_xmit(sk);
 801out_release:
 802        release_sock(sk);
 803        return rc ? : len;
 804out_discard:
 805        kfree_skb(skb);
 806        goto out_release;
 807}
 808
 809EXPORT_SYMBOL_GPL(dccp_sendmsg);
 810
 811int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 812                 size_t len, int nonblock, int flags, int *addr_len)
 813{
 814        const struct dccp_hdr *dh;
 815        long timeo;
 816
 817        lock_sock(sk);
 818
 819        if (sk->sk_state == DCCP_LISTEN) {
 820                len = -ENOTCONN;
 821                goto out;
 822        }
 823
 824        timeo = sock_rcvtimeo(sk, nonblock);
 825
 826        do {
 827                struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 828
 829                if (skb == NULL)
 830                        goto verify_sock_status;
 831
 832                dh = dccp_hdr(skb);
 833
 834                switch (dh->dccph_type) {
 835                case DCCP_PKT_DATA:
 836                case DCCP_PKT_DATAACK:
 837                        goto found_ok_skb;
 838
 839                case DCCP_PKT_CLOSE:
 840                case DCCP_PKT_CLOSEREQ:
 841                        if (!(flags & MSG_PEEK))
 842                                dccp_finish_passive_close(sk);
 843                        /* fall through */
 844                case DCCP_PKT_RESET:
 845                        dccp_pr_debug("found fin (%s) ok!\n",
 846                                      dccp_packet_name(dh->dccph_type));
 847                        len = 0;
 848                        goto found_fin_ok;
 849                default:
 850                        dccp_pr_debug("packet_type=%s\n",
 851                                      dccp_packet_name(dh->dccph_type));
 852                        sk_eat_skb(sk, skb, 0);
 853                }
 854verify_sock_status:
 855                if (sock_flag(sk, SOCK_DONE)) {
 856                        len = 0;
 857                        break;
 858                }
 859
 860                if (sk->sk_err) {
 861                        len = sock_error(sk);
 862                        break;
 863                }
 864
 865                if (sk->sk_shutdown & RCV_SHUTDOWN) {
 866                        len = 0;
 867                        break;
 868                }
 869
 870                if (sk->sk_state == DCCP_CLOSED) {
 871                        if (!sock_flag(sk, SOCK_DONE)) {
 872                                /* This occurs when user tries to read
 873                                 * from never connected socket.
 874                                 */
 875                                len = -ENOTCONN;
 876                                break;
 877                        }
 878                        len = 0;
 879                        break;
 880                }
 881
 882                if (!timeo) {
 883                        len = -EAGAIN;
 884                        break;
 885                }
 886
 887                if (signal_pending(current)) {
 888                        len = sock_intr_errno(timeo);
 889                        break;
 890                }
 891
 892                sk_wait_data(sk, &timeo);
 893                continue;
 894        found_ok_skb:
 895                if (len > skb->len)
 896                        len = skb->len;
 897                else if (len < skb->len)
 898                        msg->msg_flags |= MSG_TRUNC;
 899
 900                if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
 901                        /* Exception. Bailout! */
 902                        len = -EFAULT;
 903                        break;
 904                }
 905                if (flags & MSG_TRUNC)
 906                        len = skb->len;
 907        found_fin_ok:
 908                if (!(flags & MSG_PEEK))
 909                        sk_eat_skb(sk, skb, 0);
 910                break;
 911        } while (1);
 912out:
 913        release_sock(sk);
 914        return len;
 915}
 916
 917EXPORT_SYMBOL_GPL(dccp_recvmsg);
 918
 919int inet_dccp_listen(struct socket *sock, int backlog)
 920{
 921        struct sock *sk = sock->sk;
 922        unsigned char old_state;
 923        int err;
 924
 925        lock_sock(sk);
 926
 927        err = -EINVAL;
 928        if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
 929                goto out;
 930
 931        old_state = sk->sk_state;
 932        if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
 933                goto out;
 934
 935        /* Really, if the socket is already in listen state
 936         * we can only allow the backlog to be adjusted.
 937         */
 938        if (old_state != DCCP_LISTEN) {
 939                /*
 940                 * FIXME: here it probably should be sk->sk_prot->listen_start
 941                 * see tcp_listen_start
 942                 */
 943                err = dccp_listen_start(sk, backlog);
 944                if (err)
 945                        goto out;
 946        }
 947        sk->sk_max_ack_backlog = backlog;
 948        err = 0;
 949
 950out:
 951        release_sock(sk);
 952        return err;
 953}
 954
 955EXPORT_SYMBOL_GPL(inet_dccp_listen);
 956
 957static void dccp_terminate_connection(struct sock *sk)
 958{
 959        u8 next_state = DCCP_CLOSED;
 960
 961        switch (sk->sk_state) {
 962        case DCCP_PASSIVE_CLOSE:
 963        case DCCP_PASSIVE_CLOSEREQ:
 964                dccp_finish_passive_close(sk);
 965                break;
 966        case DCCP_PARTOPEN:
 967                dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
 968                inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
 969                /* fall through */
 970        case DCCP_OPEN:
 971                dccp_send_close(sk, 1);
 972
 973                if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
 974                    !dccp_sk(sk)->dccps_server_timewait)
 975                        next_state = DCCP_ACTIVE_CLOSEREQ;
 976                else
 977                        next_state = DCCP_CLOSING;
 978                /* fall through */
 979        default:
 980                dccp_set_state(sk, next_state);
 981        }
 982}
 983
 984void dccp_close(struct sock *sk, long timeout)
 985{
 986        struct dccp_sock *dp = dccp_sk(sk);
 987        struct sk_buff *skb;
 988        u32 data_was_unread = 0;
 989        int state;
 990
 991        lock_sock(sk);
 992
 993        sk->sk_shutdown = SHUTDOWN_MASK;
 994
 995        if (sk->sk_state == DCCP_LISTEN) {
 996                dccp_set_state(sk, DCCP_CLOSED);
 997
 998                /* Special case. */
 999                inet_csk_listen_stop(sk);
1000
1001                goto adjudge_to_death;
1002        }
1003
1004        sk_stop_timer(sk, &dp->dccps_xmit_timer);
1005
1006        /*
1007         * We need to flush the recv. buffs.  We do this only on the
1008         * descriptor close, not protocol-sourced closes, because the
1009          *reader process may not have drained the data yet!
1010         */
1011        while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1012                data_was_unread += skb->len;
1013                __kfree_skb(skb);
1014        }
1015
1016        if (data_was_unread) {
1017                /* Unread data was tossed, send an appropriate Reset Code */
1018                DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1019                dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1020                dccp_set_state(sk, DCCP_CLOSED);
1021        } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1022                /* Check zero linger _after_ checking for unread data. */
1023                sk->sk_prot->disconnect(sk, 0);
1024        } else if (sk->sk_state != DCCP_CLOSED) {
1025                /*
1026                 * Normal connection termination. May need to wait if there are
1027                 * still packets in the TX queue that are delayed by the CCID.
1028                 */
1029                dccp_flush_write_queue(sk, &timeout);
1030                dccp_terminate_connection(sk);
1031        }
1032
1033        /*
1034         * Flush write queue. This may be necessary in several cases:
1035         * - we have been closed by the peer but still have application data;
1036         * - abortive termination (unread data or zero linger time),
1037         * - normal termination but queue could not be flushed within time limit
1038         */
1039        __skb_queue_purge(&sk->sk_write_queue);
1040
1041        sk_stream_wait_close(sk, timeout);
1042
1043adjudge_to_death:
1044        state = sk->sk_state;
1045        sock_hold(sk);
1046        sock_orphan(sk);
1047
1048        /*
1049         * It is the last release_sock in its life. It will remove backlog.
1050         */
1051        release_sock(sk);
1052        /*
1053         * Now socket is owned by kernel and we acquire BH lock
1054         * to finish close. No need to check for user refs.
1055         */
1056        local_bh_disable();
1057        bh_lock_sock(sk);
1058        WARN_ON(sock_owned_by_user(sk));
1059
1060        percpu_counter_inc(sk->sk_prot->orphan_count);
1061
1062        /* Have we already been destroyed by a softirq or backlog? */
1063        if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1064                goto out;
1065
1066        if (sk->sk_state == DCCP_CLOSED)
1067                inet_csk_destroy_sock(sk);
1068
1069        /* Otherwise, socket is reprieved until protocol close. */
1070
1071out:
1072        bh_unlock_sock(sk);
1073        local_bh_enable();
1074        sock_put(sk);
1075}
1076
1077EXPORT_SYMBOL_GPL(dccp_close);
1078
1079void dccp_shutdown(struct sock *sk, int how)
1080{
1081        dccp_pr_debug("called shutdown(%x)\n", how);
1082}
1083
1084EXPORT_SYMBOL_GPL(dccp_shutdown);
1085
1086static inline int dccp_mib_init(void)
1087{
1088        return snmp_mib_init((void __percpu **)dccp_statistics,
1089                             sizeof(struct dccp_mib),
1090                             __alignof__(struct dccp_mib));
1091}
1092
1093static inline void dccp_mib_exit(void)
1094{
1095        snmp_mib_free((void __percpu **)dccp_statistics);
1096}
1097
1098static int thash_entries;
1099module_param(thash_entries, int, 0444);
1100MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1101
1102#ifdef CONFIG_IP_DCCP_DEBUG
1103int dccp_debug;
1104module_param(dccp_debug, bool, 0644);
1105MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1106
1107EXPORT_SYMBOL_GPL(dccp_debug);
1108#endif
1109
1110static int __init dccp_init(void)
1111{
1112        unsigned long goal;
1113        int ehash_order, bhash_order, i;
1114        int rc;
1115
1116        BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1117                     FIELD_SIZEOF(struct sk_buff, cb));
1118        rc = percpu_counter_init(&dccp_orphan_count, 0);
1119        if (rc)
1120                goto out_fail;
1121        rc = -ENOBUFS;
1122        inet_hashinfo_init(&dccp_hashinfo);
1123        dccp_hashinfo.bind_bucket_cachep =
1124                kmem_cache_create("dccp_bind_bucket",
1125                                  sizeof(struct inet_bind_bucket), 0,
1126                                  SLAB_HWCACHE_ALIGN, NULL);
1127        if (!dccp_hashinfo.bind_bucket_cachep)
1128                goto out_free_percpu;
1129
1130        /*
1131         * Size and allocate the main established and bind bucket
1132         * hash tables.
1133         *
1134         * The methodology is similar to that of the buffer cache.
1135         */
1136        if (totalram_pages >= (128 * 1024))
1137                goal = totalram_pages >> (21 - PAGE_SHIFT);
1138        else
1139                goal = totalram_pages >> (23 - PAGE_SHIFT);
1140
1141        if (thash_entries)
1142                goal = (thash_entries *
1143                        sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1144        for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1145                ;
1146        do {
1147                unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1148                                        sizeof(struct inet_ehash_bucket);
1149
1150                while (hash_size & (hash_size - 1))
1151                        hash_size--;
1152                dccp_hashinfo.ehash_mask = hash_size - 1;
1153                dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1154                        __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1155        } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1156
1157        if (!dccp_hashinfo.ehash) {
1158                DCCP_CRIT("Failed to allocate DCCP established hash table");
1159                goto out_free_bind_bucket_cachep;
1160        }
1161
1162        for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) {
1163                INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1164                INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
1165        }
1166
1167        if (inet_ehash_locks_alloc(&dccp_hashinfo))
1168                        goto out_free_dccp_ehash;
1169
1170        bhash_order = ehash_order;
1171
1172        do {
1173                dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1174                                        sizeof(struct inet_bind_hashbucket);
1175                if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1176                    bhash_order > 0)
1177                        continue;
1178                dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1179                        __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1180        } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1181
1182        if (!dccp_hashinfo.bhash) {
1183                DCCP_CRIT("Failed to allocate DCCP bind hash table");
1184                goto out_free_dccp_locks;
1185        }
1186
1187        for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1188                spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1189                INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1190        }
1191
1192        rc = dccp_mib_init();
1193        if (rc)
1194                goto out_free_dccp_bhash;
1195
1196        rc = dccp_ackvec_init();
1197        if (rc)
1198                goto out_free_dccp_mib;
1199
1200        rc = dccp_sysctl_init();
1201        if (rc)
1202                goto out_ackvec_exit;
1203
1204        rc = ccid_initialize_builtins();
1205        if (rc)
1206                goto out_sysctl_exit;
1207
1208        dccp_timestamping_init();
1209
1210        return 0;
1211
1212out_sysctl_exit:
1213        dccp_sysctl_exit();
1214out_ackvec_exit:
1215        dccp_ackvec_exit();
1216out_free_dccp_mib:
1217        dccp_mib_exit();
1218out_free_dccp_bhash:
1219        free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1220out_free_dccp_locks:
1221        inet_ehash_locks_free(&dccp_hashinfo);
1222out_free_dccp_ehash:
1223        free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1224out_free_bind_bucket_cachep:
1225        kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1226out_free_percpu:
1227        percpu_counter_destroy(&dccp_orphan_count);
1228out_fail:
1229        dccp_hashinfo.bhash = NULL;
1230        dccp_hashinfo.ehash = NULL;
1231        dccp_hashinfo.bind_bucket_cachep = NULL;
1232        return rc;
1233}
1234
1235static void __exit dccp_fini(void)
1236{
1237        ccid_cleanup_builtins();
1238        dccp_mib_exit();
1239        free_pages((unsigned long)dccp_hashinfo.bhash,
1240                   get_order(dccp_hashinfo.bhash_size *
1241                             sizeof(struct inet_bind_hashbucket)));
1242        free_pages((unsigned long)dccp_hashinfo.ehash,
1243                   get_order((dccp_hashinfo.ehash_mask + 1) *
1244                             sizeof(struct inet_ehash_bucket)));
1245        inet_ehash_locks_free(&dccp_hashinfo);
1246        kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1247        dccp_ackvec_exit();
1248        dccp_sysctl_exit();
1249        percpu_counter_destroy(&dccp_orphan_count);
1250}
1251
1252module_init(dccp_init);
1253module_exit(dccp_fini);
1254
1255MODULE_LICENSE("GPL");
1256MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1257MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1258