linux/net/dccp/proto.c
<<
>>
Prefs
   1/*
   2 *  net/dccp/proto.c
   3 *
   4 *  An implementation of the DCCP protocol
   5 *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
   6 *
   7 *      This program is free software; you can redistribute it and/or modify it
   8 *      under the terms of the GNU General Public License version 2 as
   9 *      published by the Free Software Foundation.
  10 */
  11
  12#include <linux/dccp.h>
  13#include <linux/module.h>
  14#include <linux/types.h>
  15#include <linux/sched.h>
  16#include <linux/kernel.h>
  17#include <linux/skbuff.h>
  18#include <linux/netdevice.h>
  19#include <linux/in.h>
  20#include <linux/if_arp.h>
  21#include <linux/init.h>
  22#include <linux/random.h>
  23#include <linux/slab.h>
  24#include <net/checksum.h>
  25
  26#include <net/inet_sock.h>
  27#include <net/sock.h>
  28#include <net/xfrm.h>
  29
  30#include <asm/ioctls.h>
  31#include <linux/spinlock.h>
  32#include <linux/timer.h>
  33#include <linux/delay.h>
  34#include <linux/poll.h>
  35
  36#include "ccid.h"
  37#include "dccp.h"
  38#include "feat.h"
  39
  40DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
  41
  42EXPORT_SYMBOL_GPL(dccp_statistics);
  43
  44struct percpu_counter dccp_orphan_count;
  45EXPORT_SYMBOL_GPL(dccp_orphan_count);
  46
  47struct inet_hashinfo dccp_hashinfo;
  48EXPORT_SYMBOL_GPL(dccp_hashinfo);
  49
  50/* the maximum queue length for tx in packets. 0 is no limit */
  51int sysctl_dccp_tx_qlen __read_mostly = 5;
  52
  53#ifdef CONFIG_IP_DCCP_DEBUG
  54static const char *dccp_state_name(const int state)
  55{
  56        static const char *const dccp_state_names[] = {
  57        [DCCP_OPEN]             = "OPEN",
  58        [DCCP_REQUESTING]       = "REQUESTING",
  59        [DCCP_PARTOPEN]         = "PARTOPEN",
  60        [DCCP_LISTEN]           = "LISTEN",
  61        [DCCP_RESPOND]          = "RESPOND",
  62        [DCCP_CLOSING]          = "CLOSING",
  63        [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
  64        [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
  65        [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
  66        [DCCP_TIME_WAIT]        = "TIME_WAIT",
  67        [DCCP_CLOSED]           = "CLOSED",
  68        };
  69
  70        if (state >= DCCP_MAX_STATES)
  71                return "INVALID STATE!";
  72        else
  73                return dccp_state_names[state];
  74}
  75#endif
  76
  77void dccp_set_state(struct sock *sk, const int state)
  78{
  79        const int oldstate = sk->sk_state;
  80
  81        dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
  82                      dccp_state_name(oldstate), dccp_state_name(state));
  83        WARN_ON(state == oldstate);
  84
  85        switch (state) {
  86        case DCCP_OPEN:
  87                if (oldstate != DCCP_OPEN)
  88                        DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
  89                /* Client retransmits all Confirm options until entering OPEN */
  90                if (oldstate == DCCP_PARTOPEN)
  91                        dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
  92                break;
  93
  94        case DCCP_CLOSED:
  95                if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
  96                    oldstate == DCCP_CLOSING)
  97                        DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
  98
  99                sk->sk_prot->unhash(sk);
 100                if (inet_csk(sk)->icsk_bind_hash != NULL &&
 101                    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
 102                        inet_put_port(sk);
 103                /* fall through */
 104        default:
 105                if (oldstate == DCCP_OPEN)
 106                        DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
 107        }
 108
 109        /* Change state AFTER socket is unhashed to avoid closed
 110         * socket sitting in hash tables.
 111         */
 112        sk->sk_state = state;
 113}
 114
 115EXPORT_SYMBOL_GPL(dccp_set_state);
 116
 117static void dccp_finish_passive_close(struct sock *sk)
 118{
 119        switch (sk->sk_state) {
 120        case DCCP_PASSIVE_CLOSE:
 121                /* Node (client or server) has received Close packet. */
 122                dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
 123                dccp_set_state(sk, DCCP_CLOSED);
 124                break;
 125        case DCCP_PASSIVE_CLOSEREQ:
 126                /*
 127                 * Client received CloseReq. We set the `active' flag so that
 128                 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
 129                 */
 130                dccp_send_close(sk, 1);
 131                dccp_set_state(sk, DCCP_CLOSING);
 132        }
 133}
 134
 135void dccp_done(struct sock *sk)
 136{
 137        dccp_set_state(sk, DCCP_CLOSED);
 138        dccp_clear_xmit_timers(sk);
 139
 140        sk->sk_shutdown = SHUTDOWN_MASK;
 141
 142        if (!sock_flag(sk, SOCK_DEAD))
 143                sk->sk_state_change(sk);
 144        else
 145                inet_csk_destroy_sock(sk);
 146}
 147
 148EXPORT_SYMBOL_GPL(dccp_done);
 149
 150const char *dccp_packet_name(const int type)
 151{
 152        static const char *const dccp_packet_names[] = {
 153                [DCCP_PKT_REQUEST]  = "REQUEST",
 154                [DCCP_PKT_RESPONSE] = "RESPONSE",
 155                [DCCP_PKT_DATA]     = "DATA",
 156                [DCCP_PKT_ACK]      = "ACK",
 157                [DCCP_PKT_DATAACK]  = "DATAACK",
 158                [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
 159                [DCCP_PKT_CLOSE]    = "CLOSE",
 160                [DCCP_PKT_RESET]    = "RESET",
 161                [DCCP_PKT_SYNC]     = "SYNC",
 162                [DCCP_PKT_SYNCACK]  = "SYNCACK",
 163        };
 164
 165        if (type >= DCCP_NR_PKT_TYPES)
 166                return "INVALID";
 167        else
 168                return dccp_packet_names[type];
 169}
 170
 171EXPORT_SYMBOL_GPL(dccp_packet_name);
 172
 173int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 174{
 175        struct dccp_sock *dp = dccp_sk(sk);
 176        struct inet_connection_sock *icsk = inet_csk(sk);
 177
 178        icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
 179        icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
 180        sk->sk_state            = DCCP_CLOSED;
 181        sk->sk_write_space      = dccp_write_space;
 182        icsk->icsk_sync_mss     = dccp_sync_mss;
 183        dp->dccps_mss_cache     = 536;
 184        dp->dccps_rate_last     = jiffies;
 185        dp->dccps_role          = DCCP_ROLE_UNDEFINED;
 186        dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
 187        dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
 188
 189        dccp_init_xmit_timers(sk);
 190
 191        INIT_LIST_HEAD(&dp->dccps_featneg);
 192        /* control socket doesn't need feat nego */
 193        if (likely(ctl_sock_initialized))
 194                return dccp_feat_init(sk);
 195        return 0;
 196}
 197
 198EXPORT_SYMBOL_GPL(dccp_init_sock);
 199
 200void dccp_destroy_sock(struct sock *sk)
 201{
 202        struct dccp_sock *dp = dccp_sk(sk);
 203
 204        /*
 205         * DCCP doesn't use sk_write_queue, just sk_send_head
 206         * for retransmissions
 207         */
 208        if (sk->sk_send_head != NULL) {
 209                kfree_skb(sk->sk_send_head);
 210                sk->sk_send_head = NULL;
 211        }
 212
 213        /* Clean up a referenced DCCP bind bucket. */
 214        if (inet_csk(sk)->icsk_bind_hash != NULL)
 215                inet_put_port(sk);
 216
 217        kfree(dp->dccps_service_list);
 218        dp->dccps_service_list = NULL;
 219
 220        if (dp->dccps_hc_rx_ackvec != NULL) {
 221                dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
 222                dp->dccps_hc_rx_ackvec = NULL;
 223        }
 224        ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
 225        ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
 226        dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
 227
 228        /* clean up feature negotiation state */
 229        dccp_feat_list_purge(&dp->dccps_featneg);
 230}
 231
 232EXPORT_SYMBOL_GPL(dccp_destroy_sock);
 233
 234static inline int dccp_listen_start(struct sock *sk, int backlog)
 235{
 236        struct dccp_sock *dp = dccp_sk(sk);
 237
 238        dp->dccps_role = DCCP_ROLE_LISTEN;
 239        /* do not start to listen if feature negotiation setup fails */
 240        if (dccp_feat_finalise_settings(dp))
 241                return -EPROTO;
 242        return inet_csk_listen_start(sk, backlog);
 243}
 244
 245static inline int dccp_need_reset(int state)
 246{
 247        return state != DCCP_CLOSED && state != DCCP_LISTEN &&
 248               state != DCCP_REQUESTING;
 249}
 250
 251int dccp_disconnect(struct sock *sk, int flags)
 252{
 253        struct inet_connection_sock *icsk = inet_csk(sk);
 254        struct inet_sock *inet = inet_sk(sk);
 255        int err = 0;
 256        const int old_state = sk->sk_state;
 257
 258        if (old_state != DCCP_CLOSED)
 259                dccp_set_state(sk, DCCP_CLOSED);
 260
 261        /*
 262         * This corresponds to the ABORT function of RFC793, sec. 3.8
 263         * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
 264         */
 265        if (old_state == DCCP_LISTEN) {
 266                inet_csk_listen_stop(sk);
 267        } else if (dccp_need_reset(old_state)) {
 268                dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
 269                sk->sk_err = ECONNRESET;
 270        } else if (old_state == DCCP_REQUESTING)
 271                sk->sk_err = ECONNRESET;
 272
 273        dccp_clear_xmit_timers(sk);
 274
 275        __skb_queue_purge(&sk->sk_receive_queue);
 276        __skb_queue_purge(&sk->sk_write_queue);
 277        if (sk->sk_send_head != NULL) {
 278                __kfree_skb(sk->sk_send_head);
 279                sk->sk_send_head = NULL;
 280        }
 281
 282        inet->inet_dport = 0;
 283
 284        if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
 285                inet_reset_saddr(sk);
 286
 287        sk->sk_shutdown = 0;
 288        sock_reset_flag(sk, SOCK_DONE);
 289
 290        icsk->icsk_backoff = 0;
 291        inet_csk_delack_init(sk);
 292        __sk_dst_reset(sk);
 293
 294        WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
 295
 296        sk->sk_error_report(sk);
 297        return err;
 298}
 299
 300EXPORT_SYMBOL_GPL(dccp_disconnect);
 301
 302/*
 303 *      Wait for a DCCP event.
 304 *
 305 *      Note that we don't need to lock the socket, as the upper poll layers
 306 *      take care of normal races (between the test and the event) and we don't
 307 *      go look at any of the socket buffers directly.
 308 */
 309unsigned int dccp_poll(struct file *file, struct socket *sock,
 310                       poll_table *wait)
 311{
 312        unsigned int mask;
 313        struct sock *sk = sock->sk;
 314
 315        sock_poll_wait(file, sk_sleep(sk), wait);
 316        if (sk->sk_state == DCCP_LISTEN)
 317                return inet_csk_listen_poll(sk);
 318
 319        /* Socket is not locked. We are protected from async events
 320           by poll logic and correct handling of state changes
 321           made by another threads is impossible in any case.
 322         */
 323
 324        mask = 0;
 325        if (sk->sk_err)
 326                mask = POLLERR;
 327
 328        if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
 329                mask |= POLLHUP;
 330        if (sk->sk_shutdown & RCV_SHUTDOWN)
 331                mask |= POLLIN | POLLRDNORM | POLLRDHUP;
 332
 333        /* Connected? */
 334        if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
 335                if (atomic_read(&sk->sk_rmem_alloc) > 0)
 336                        mask |= POLLIN | POLLRDNORM;
 337
 338                if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
 339                        if (sk_stream_is_writeable(sk)) {
 340                                mask |= POLLOUT | POLLWRNORM;
 341                        } else {  /* send SIGIO later */
 342                                sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 343                                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 344
 345                                /* Race breaker. If space is freed after
 346                                 * wspace test but before the flags are set,
 347                                 * IO signal will be lost.
 348                                 */
 349                                if (sk_stream_is_writeable(sk))
 350                                        mask |= POLLOUT | POLLWRNORM;
 351                        }
 352                }
 353        }
 354        return mask;
 355}
 356
 357EXPORT_SYMBOL_GPL(dccp_poll);
 358
 359int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 360{
 361        int rc = -ENOTCONN;
 362
 363        lock_sock(sk);
 364
 365        if (sk->sk_state == DCCP_LISTEN)
 366                goto out;
 367
 368        switch (cmd) {
 369        case SIOCINQ: {
 370                struct sk_buff *skb;
 371                unsigned long amount = 0;
 372
 373                skb = skb_peek(&sk->sk_receive_queue);
 374                if (skb != NULL) {
 375                        /*
 376                         * We will only return the amount of this packet since
 377                         * that is all that will be read.
 378                         */
 379                        amount = skb->len;
 380                }
 381                rc = put_user(amount, (int __user *)arg);
 382        }
 383                break;
 384        default:
 385                rc = -ENOIOCTLCMD;
 386                break;
 387        }
 388out:
 389        release_sock(sk);
 390        return rc;
 391}
 392
 393EXPORT_SYMBOL_GPL(dccp_ioctl);
 394
 395static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
 396                                   char __user *optval, unsigned int optlen)
 397{
 398        struct dccp_sock *dp = dccp_sk(sk);
 399        struct dccp_service_list *sl = NULL;
 400
 401        if (service == DCCP_SERVICE_INVALID_VALUE ||
 402            optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
 403                return -EINVAL;
 404
 405        if (optlen > sizeof(service)) {
 406                sl = kmalloc(optlen, GFP_KERNEL);
 407                if (sl == NULL)
 408                        return -ENOMEM;
 409
 410                sl->dccpsl_nr = optlen / sizeof(u32) - 1;
 411                if (copy_from_user(sl->dccpsl_list,
 412                                   optval + sizeof(service),
 413                                   optlen - sizeof(service)) ||
 414                    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
 415                        kfree(sl);
 416                        return -EFAULT;
 417                }
 418        }
 419
 420        lock_sock(sk);
 421        dp->dccps_service = service;
 422
 423        kfree(dp->dccps_service_list);
 424
 425        dp->dccps_service_list = sl;
 426        release_sock(sk);
 427        return 0;
 428}
 429
 430static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
 431{
 432        u8 *list, len;
 433        int i, rc;
 434
 435        if (cscov < 0 || cscov > 15)
 436                return -EINVAL;
 437        /*
 438         * Populate a list of permissible values, in the range cscov...15. This
 439         * is necessary since feature negotiation of single values only works if
 440         * both sides incidentally choose the same value. Since the list starts
 441         * lowest-value first, negotiation will pick the smallest shared value.
 442         */
 443        if (cscov == 0)
 444                return 0;
 445        len = 16 - cscov;
 446
 447        list = kmalloc(len, GFP_KERNEL);
 448        if (list == NULL)
 449                return -ENOBUFS;
 450
 451        for (i = 0; i < len; i++)
 452                list[i] = cscov++;
 453
 454        rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
 455
 456        if (rc == 0) {
 457                if (rx)
 458                        dccp_sk(sk)->dccps_pcrlen = cscov;
 459                else
 460                        dccp_sk(sk)->dccps_pcslen = cscov;
 461        }
 462        kfree(list);
 463        return rc;
 464}
 465
 466static int dccp_setsockopt_ccid(struct sock *sk, int type,
 467                                char __user *optval, unsigned int optlen)
 468{
 469        u8 *val;
 470        int rc = 0;
 471
 472        if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
 473                return -EINVAL;
 474
 475        val = memdup_user(optval, optlen);
 476        if (IS_ERR(val))
 477                return PTR_ERR(val);
 478
 479        lock_sock(sk);
 480        if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
 481                rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
 482
 483        if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
 484                rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
 485        release_sock(sk);
 486
 487        kfree(val);
 488        return rc;
 489}
 490
 491static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 492                char __user *optval, unsigned int optlen)
 493{
 494        struct dccp_sock *dp = dccp_sk(sk);
 495        int val, err = 0;
 496
 497        switch (optname) {
 498        case DCCP_SOCKOPT_PACKET_SIZE:
 499                DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
 500                return 0;
 501        case DCCP_SOCKOPT_CHANGE_L:
 502        case DCCP_SOCKOPT_CHANGE_R:
 503                DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
 504                return 0;
 505        case DCCP_SOCKOPT_CCID:
 506        case DCCP_SOCKOPT_RX_CCID:
 507        case DCCP_SOCKOPT_TX_CCID:
 508                return dccp_setsockopt_ccid(sk, optname, optval, optlen);
 509        }
 510
 511        if (optlen < (int)sizeof(int))
 512                return -EINVAL;
 513
 514        if (get_user(val, (int __user *)optval))
 515                return -EFAULT;
 516
 517        if (optname == DCCP_SOCKOPT_SERVICE)
 518                return dccp_setsockopt_service(sk, val, optval, optlen);
 519
 520        lock_sock(sk);
 521        switch (optname) {
 522        case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 523                if (dp->dccps_role != DCCP_ROLE_SERVER)
 524                        err = -EOPNOTSUPP;
 525                else
 526                        dp->dccps_server_timewait = (val != 0);
 527                break;
 528        case DCCP_SOCKOPT_SEND_CSCOV:
 529                err = dccp_setsockopt_cscov(sk, val, false);
 530                break;
 531        case DCCP_SOCKOPT_RECV_CSCOV:
 532                err = dccp_setsockopt_cscov(sk, val, true);
 533                break;
 534        case DCCP_SOCKOPT_QPOLICY_ID:
 535                if (sk->sk_state != DCCP_CLOSED)
 536                        err = -EISCONN;
 537                else if (val < 0 || val >= DCCPQ_POLICY_MAX)
 538                        err = -EINVAL;
 539                else
 540                        dp->dccps_qpolicy = val;
 541                break;
 542        case DCCP_SOCKOPT_QPOLICY_TXQLEN:
 543                if (val < 0)
 544                        err = -EINVAL;
 545                else
 546                        dp->dccps_tx_qlen = val;
 547                break;
 548        default:
 549                err = -ENOPROTOOPT;
 550                break;
 551        }
 552        release_sock(sk);
 553
 554        return err;
 555}
 556
 557int dccp_setsockopt(struct sock *sk, int level, int optname,
 558                    char __user *optval, unsigned int optlen)
 559{
 560        if (level != SOL_DCCP)
 561                return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
 562                                                             optname, optval,
 563                                                             optlen);
 564        return do_dccp_setsockopt(sk, level, optname, optval, optlen);
 565}
 566
 567EXPORT_SYMBOL_GPL(dccp_setsockopt);
 568
 569#ifdef CONFIG_COMPAT
 570int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
 571                           char __user *optval, unsigned int optlen)
 572{
 573        if (level != SOL_DCCP)
 574                return inet_csk_compat_setsockopt(sk, level, optname,
 575                                                  optval, optlen);
 576        return do_dccp_setsockopt(sk, level, optname, optval, optlen);
 577}
 578
 579EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
 580#endif
 581
 582static int dccp_getsockopt_service(struct sock *sk, int len,
 583                                   __be32 __user *optval,
 584                                   int __user *optlen)
 585{
 586        const struct dccp_sock *dp = dccp_sk(sk);
 587        const struct dccp_service_list *sl;
 588        int err = -ENOENT, slen = 0, total_len = sizeof(u32);
 589
 590        lock_sock(sk);
 591        if ((sl = dp->dccps_service_list) != NULL) {
 592                slen = sl->dccpsl_nr * sizeof(u32);
 593                total_len += slen;
 594        }
 595
 596        err = -EINVAL;
 597        if (total_len > len)
 598                goto out;
 599
 600        err = 0;
 601        if (put_user(total_len, optlen) ||
 602            put_user(dp->dccps_service, optval) ||
 603            (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
 604                err = -EFAULT;
 605out:
 606        release_sock(sk);
 607        return err;
 608}
 609
 610static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 611                    char __user *optval, int __user *optlen)
 612{
 613        struct dccp_sock *dp;
 614        int val, len;
 615
 616        if (get_user(len, optlen))
 617                return -EFAULT;
 618
 619        if (len < (int)sizeof(int))
 620                return -EINVAL;
 621
 622        dp = dccp_sk(sk);
 623
 624        switch (optname) {
 625        case DCCP_SOCKOPT_PACKET_SIZE:
 626                DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
 627                return 0;
 628        case DCCP_SOCKOPT_SERVICE:
 629                return dccp_getsockopt_service(sk, len,
 630                                               (__be32 __user *)optval, optlen);
 631        case DCCP_SOCKOPT_GET_CUR_MPS:
 632                val = dp->dccps_mss_cache;
 633                break;
 634        case DCCP_SOCKOPT_AVAILABLE_CCIDS:
 635                return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
 636        case DCCP_SOCKOPT_TX_CCID:
 637                val = ccid_get_current_tx_ccid(dp);
 638                if (val < 0)
 639                        return -ENOPROTOOPT;
 640                break;
 641        case DCCP_SOCKOPT_RX_CCID:
 642                val = ccid_get_current_rx_ccid(dp);
 643                if (val < 0)
 644                        return -ENOPROTOOPT;
 645                break;
 646        case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 647                val = dp->dccps_server_timewait;
 648                break;
 649        case DCCP_SOCKOPT_SEND_CSCOV:
 650                val = dp->dccps_pcslen;
 651                break;
 652        case DCCP_SOCKOPT_RECV_CSCOV:
 653                val = dp->dccps_pcrlen;
 654                break;
 655        case DCCP_SOCKOPT_QPOLICY_ID:
 656                val = dp->dccps_qpolicy;
 657                break;
 658        case DCCP_SOCKOPT_QPOLICY_TXQLEN:
 659                val = dp->dccps_tx_qlen;
 660                break;
 661        case 128 ... 191:
 662                return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
 663                                             len, (u32 __user *)optval, optlen);
 664        case 192 ... 255:
 665                return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
 666                                             len, (u32 __user *)optval, optlen);
 667        default:
 668                return -ENOPROTOOPT;
 669        }
 670
 671        len = sizeof(val);
 672        if (put_user(len, optlen) || copy_to_user(optval, &val, len))
 673                return -EFAULT;
 674
 675        return 0;
 676}
 677
 678int dccp_getsockopt(struct sock *sk, int level, int optname,
 679                    char __user *optval, int __user *optlen)
 680{
 681        if (level != SOL_DCCP)
 682                return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
 683                                                             optname, optval,
 684                                                             optlen);
 685        return do_dccp_getsockopt(sk, level, optname, optval, optlen);
 686}
 687
 688EXPORT_SYMBOL_GPL(dccp_getsockopt);
 689
 690#ifdef CONFIG_COMPAT
 691int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
 692                           char __user *optval, int __user *optlen)
 693{
 694        if (level != SOL_DCCP)
 695                return inet_csk_compat_getsockopt(sk, level, optname,
 696                                                  optval, optlen);
 697        return do_dccp_getsockopt(sk, level, optname, optval, optlen);
 698}
 699
 700EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
 701#endif
 702
 703static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
 704{
 705        struct cmsghdr *cmsg;
 706
 707        /*
 708         * Assign an (opaque) qpolicy priority value to skb->priority.
 709         *
 710         * We are overloading this skb field for use with the qpolicy subystem.
 711         * The skb->priority is normally used for the SO_PRIORITY option, which
 712         * is initialised from sk_priority. Since the assignment of sk_priority
 713         * to skb->priority happens later (on layer 3), we overload this field
 714         * for use with queueing priorities as long as the skb is on layer 4.
 715         * The default priority value (if nothing is set) is 0.
 716         */
 717        skb->priority = 0;
 718
 719        for_each_cmsghdr(cmsg, msg) {
 720                if (!CMSG_OK(msg, cmsg))
 721                        return -EINVAL;
 722
 723                if (cmsg->cmsg_level != SOL_DCCP)
 724                        continue;
 725
 726                if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
 727                    !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
 728                        return -EINVAL;
 729
 730                switch (cmsg->cmsg_type) {
 731                case DCCP_SCM_PRIORITY:
 732                        if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
 733                                return -EINVAL;
 734                        skb->priority = *(__u32 *)CMSG_DATA(cmsg);
 735                        break;
 736                default:
 737                        return -EINVAL;
 738                }
 739        }
 740        return 0;
 741}
 742
 743int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 744{
 745        const struct dccp_sock *dp = dccp_sk(sk);
 746        const int flags = msg->msg_flags;
 747        const int noblock = flags & MSG_DONTWAIT;
 748        struct sk_buff *skb;
 749        int rc, size;
 750        long timeo;
 751
 752        if (len > dp->dccps_mss_cache)
 753                return -EMSGSIZE;
 754
 755        lock_sock(sk);
 756
 757        if (dccp_qpolicy_full(sk)) {
 758                rc = -EAGAIN;
 759                goto out_release;
 760        }
 761
 762        timeo = sock_sndtimeo(sk, noblock);
 763
 764        /*
 765         * We have to use sk_stream_wait_connect here to set sk_write_pending,
 766         * so that the trick in dccp_rcv_request_sent_state_process.
 767         */
 768        /* Wait for a connection to finish. */
 769        if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
 770                if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
 771                        goto out_release;
 772
 773        size = sk->sk_prot->max_header + len;
 774        release_sock(sk);
 775        skb = sock_alloc_send_skb(sk, size, noblock, &rc);
 776        lock_sock(sk);
 777        if (skb == NULL)
 778                goto out_release;
 779
 780        skb_reserve(skb, sk->sk_prot->max_header);
 781        rc = memcpy_from_msg(skb_put(skb, len), msg, len);
 782        if (rc != 0)
 783                goto out_discard;
 784
 785        rc = dccp_msghdr_parse(msg, skb);
 786        if (rc != 0)
 787                goto out_discard;
 788
 789        dccp_qpolicy_push(sk, skb);
 790        /*
 791         * The xmit_timer is set if the TX CCID is rate-based and will expire
 792         * when congestion control permits to release further packets into the
 793         * network. Window-based CCIDs do not use this timer.
 794         */
 795        if (!timer_pending(&dp->dccps_xmit_timer))
 796                dccp_write_xmit(sk);
 797out_release:
 798        release_sock(sk);
 799        return rc ? : len;
 800out_discard:
 801        kfree_skb(skb);
 802        goto out_release;
 803}
 804
 805EXPORT_SYMBOL_GPL(dccp_sendmsg);
 806
 807int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 808                 int flags, int *addr_len)
 809{
 810        const struct dccp_hdr *dh;
 811        long timeo;
 812
 813        lock_sock(sk);
 814
 815        if (sk->sk_state == DCCP_LISTEN) {
 816                len = -ENOTCONN;
 817                goto out;
 818        }
 819
 820        timeo = sock_rcvtimeo(sk, nonblock);
 821
 822        do {
 823                struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 824
 825                if (skb == NULL)
 826                        goto verify_sock_status;
 827
 828                dh = dccp_hdr(skb);
 829
 830                switch (dh->dccph_type) {
 831                case DCCP_PKT_DATA:
 832                case DCCP_PKT_DATAACK:
 833                        goto found_ok_skb;
 834
 835                case DCCP_PKT_CLOSE:
 836                case DCCP_PKT_CLOSEREQ:
 837                        if (!(flags & MSG_PEEK))
 838                                dccp_finish_passive_close(sk);
 839                        /* fall through */
 840                case DCCP_PKT_RESET:
 841                        dccp_pr_debug("found fin (%s) ok!\n",
 842                                      dccp_packet_name(dh->dccph_type));
 843                        len = 0;
 844                        goto found_fin_ok;
 845                default:
 846                        dccp_pr_debug("packet_type=%s\n",
 847                                      dccp_packet_name(dh->dccph_type));
 848                        sk_eat_skb(sk, skb);
 849                }
 850verify_sock_status:
 851                if (sock_flag(sk, SOCK_DONE)) {
 852                        len = 0;
 853                        break;
 854                }
 855
 856                if (sk->sk_err) {
 857                        len = sock_error(sk);
 858                        break;
 859                }
 860
 861                if (sk->sk_shutdown & RCV_SHUTDOWN) {
 862                        len = 0;
 863                        break;
 864                }
 865
 866                if (sk->sk_state == DCCP_CLOSED) {
 867                        if (!sock_flag(sk, SOCK_DONE)) {
 868                                /* This occurs when user tries to read
 869                                 * from never connected socket.
 870                                 */
 871                                len = -ENOTCONN;
 872                                break;
 873                        }
 874                        len = 0;
 875                        break;
 876                }
 877
 878                if (!timeo) {
 879                        len = -EAGAIN;
 880                        break;
 881                }
 882
 883                if (signal_pending(current)) {
 884                        len = sock_intr_errno(timeo);
 885                        break;
 886                }
 887
 888                sk_wait_data(sk, &timeo, NULL);
 889                continue;
 890        found_ok_skb:
 891                if (len > skb->len)
 892                        len = skb->len;
 893                else if (len < skb->len)
 894                        msg->msg_flags |= MSG_TRUNC;
 895
 896                if (skb_copy_datagram_msg(skb, 0, msg, len)) {
 897                        /* Exception. Bailout! */
 898                        len = -EFAULT;
 899                        break;
 900                }
 901                if (flags & MSG_TRUNC)
 902                        len = skb->len;
 903        found_fin_ok:
 904                if (!(flags & MSG_PEEK))
 905                        sk_eat_skb(sk, skb);
 906                break;
 907        } while (1);
 908out:
 909        release_sock(sk);
 910        return len;
 911}
 912
 913EXPORT_SYMBOL_GPL(dccp_recvmsg);
 914
 915int inet_dccp_listen(struct socket *sock, int backlog)
 916{
 917        struct sock *sk = sock->sk;
 918        unsigned char old_state;
 919        int err;
 920
 921        lock_sock(sk);
 922
 923        err = -EINVAL;
 924        if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
 925                goto out;
 926
 927        old_state = sk->sk_state;
 928        if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
 929                goto out;
 930
 931        /* Really, if the socket is already in listen state
 932         * we can only allow the backlog to be adjusted.
 933         */
 934        if (old_state != DCCP_LISTEN) {
 935                /*
 936                 * FIXME: here it probably should be sk->sk_prot->listen_start
 937                 * see tcp_listen_start
 938                 */
 939                err = dccp_listen_start(sk, backlog);
 940                if (err)
 941                        goto out;
 942        }
 943        sk->sk_max_ack_backlog = backlog;
 944        err = 0;
 945
 946out:
 947        release_sock(sk);
 948        return err;
 949}
 950
 951EXPORT_SYMBOL_GPL(inet_dccp_listen);
 952
 953static void dccp_terminate_connection(struct sock *sk)
 954{
 955        u8 next_state = DCCP_CLOSED;
 956
 957        switch (sk->sk_state) {
 958        case DCCP_PASSIVE_CLOSE:
 959        case DCCP_PASSIVE_CLOSEREQ:
 960                dccp_finish_passive_close(sk);
 961                break;
 962        case DCCP_PARTOPEN:
 963                dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
 964                inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
 965                /* fall through */
 966        case DCCP_OPEN:
 967                dccp_send_close(sk, 1);
 968
 969                if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
 970                    !dccp_sk(sk)->dccps_server_timewait)
 971                        next_state = DCCP_ACTIVE_CLOSEREQ;
 972                else
 973                        next_state = DCCP_CLOSING;
 974                /* fall through */
 975        default:
 976                dccp_set_state(sk, next_state);
 977        }
 978}
 979
 980void dccp_close(struct sock *sk, long timeout)
 981{
 982        struct dccp_sock *dp = dccp_sk(sk);
 983        struct sk_buff *skb;
 984        u32 data_was_unread = 0;
 985        int state;
 986
 987        lock_sock(sk);
 988
 989        sk->sk_shutdown = SHUTDOWN_MASK;
 990
 991        if (sk->sk_state == DCCP_LISTEN) {
 992                dccp_set_state(sk, DCCP_CLOSED);
 993
 994                /* Special case. */
 995                inet_csk_listen_stop(sk);
 996
 997                goto adjudge_to_death;
 998        }
 999
1000        sk_stop_timer(sk, &dp->dccps_xmit_timer);
1001
1002        /*
1003         * We need to flush the recv. buffs.  We do this only on the
1004         * descriptor close, not protocol-sourced closes, because the
1005          *reader process may not have drained the data yet!
1006         */
1007        while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1008                data_was_unread += skb->len;
1009                __kfree_skb(skb);
1010        }
1011
1012        /* If socket has been already reset kill it. */
1013        if (sk->sk_state == DCCP_CLOSED)
1014                goto adjudge_to_death;
1015
1016        if (data_was_unread) {
1017                /* Unread data was tossed, send an appropriate Reset Code */
1018                DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1019                dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1020                dccp_set_state(sk, DCCP_CLOSED);
1021        } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1022                /* Check zero linger _after_ checking for unread data. */
1023                sk->sk_prot->disconnect(sk, 0);
1024        } else if (sk->sk_state != DCCP_CLOSED) {
1025                /*
1026                 * Normal connection termination. May need to wait if there are
1027                 * still packets in the TX queue that are delayed by the CCID.
1028                 */
1029                dccp_flush_write_queue(sk, &timeout);
1030                dccp_terminate_connection(sk);
1031        }
1032
1033        /*
1034         * Flush write queue. This may be necessary in several cases:
1035         * - we have been closed by the peer but still have application data;
1036         * - abortive termination (unread data or zero linger time),
1037         * - normal termination but queue could not be flushed within time limit
1038         */
1039        __skb_queue_purge(&sk->sk_write_queue);
1040
1041        sk_stream_wait_close(sk, timeout);
1042
1043adjudge_to_death:
1044        state = sk->sk_state;
1045        sock_hold(sk);
1046        sock_orphan(sk);
1047
1048        /*
1049         * It is the last release_sock in its life. It will remove backlog.
1050         */
1051        release_sock(sk);
1052        /*
1053         * Now socket is owned by kernel and we acquire BH lock
1054         * to finish close. No need to check for user refs.
1055         */
1056        local_bh_disable();
1057        bh_lock_sock(sk);
1058        WARN_ON(sock_owned_by_user(sk));
1059
1060        percpu_counter_inc(sk->sk_prot->orphan_count);
1061
1062        /* Have we already been destroyed by a softirq or backlog? */
1063        if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1064                goto out;
1065
1066        if (sk->sk_state == DCCP_CLOSED)
1067                inet_csk_destroy_sock(sk);
1068
1069        /* Otherwise, socket is reprieved until protocol close. */
1070
1071out:
1072        bh_unlock_sock(sk);
1073        local_bh_enable();
1074        sock_put(sk);
1075}
1076
1077EXPORT_SYMBOL_GPL(dccp_close);
1078
1079void dccp_shutdown(struct sock *sk, int how)
1080{
1081        dccp_pr_debug("called shutdown(%x)\n", how);
1082}
1083
1084EXPORT_SYMBOL_GPL(dccp_shutdown);
1085
1086static inline int __init dccp_mib_init(void)
1087{
1088        dccp_statistics = alloc_percpu(struct dccp_mib);
1089        if (!dccp_statistics)
1090                return -ENOMEM;
1091        return 0;
1092}
1093
1094static inline void dccp_mib_exit(void)
1095{
1096        free_percpu(dccp_statistics);
1097}
1098
1099static int thash_entries;
1100module_param(thash_entries, int, 0444);
1101MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1102
1103#ifdef CONFIG_IP_DCCP_DEBUG
1104bool dccp_debug;
1105module_param(dccp_debug, bool, 0644);
1106MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1107
1108EXPORT_SYMBOL_GPL(dccp_debug);
1109#endif
1110
1111static int __init dccp_init(void)
1112{
1113        unsigned long goal;
1114        int ehash_order, bhash_order, i;
1115        int rc;
1116
1117        BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1118                     FIELD_SIZEOF(struct sk_buff, cb));
1119        rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1120        if (rc)
1121                goto out_fail;
1122        rc = -ENOBUFS;
1123        inet_hashinfo_init(&dccp_hashinfo);
1124        dccp_hashinfo.bind_bucket_cachep =
1125                kmem_cache_create("dccp_bind_bucket",
1126                                  sizeof(struct inet_bind_bucket), 0,
1127                                  SLAB_HWCACHE_ALIGN, NULL);
1128        if (!dccp_hashinfo.bind_bucket_cachep)
1129                goto out_free_percpu;
1130
1131        /*
1132         * Size and allocate the main established and bind bucket
1133         * hash tables.
1134         *
1135         * The methodology is similar to that of the buffer cache.
1136         */
1137        if (totalram_pages >= (128 * 1024))
1138                goal = totalram_pages >> (21 - PAGE_SHIFT);
1139        else
1140                goal = totalram_pages >> (23 - PAGE_SHIFT);
1141
1142        if (thash_entries)
1143                goal = (thash_entries *
1144                        sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1145        for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1146                ;
1147        do {
1148                unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1149                                        sizeof(struct inet_ehash_bucket);
1150
1151                while (hash_size & (hash_size - 1))
1152                        hash_size--;
1153                dccp_hashinfo.ehash_mask = hash_size - 1;
1154                dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1155                        __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1156        } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1157
1158        if (!dccp_hashinfo.ehash) {
1159                DCCP_CRIT("Failed to allocate DCCP established hash table");
1160                goto out_free_bind_bucket_cachep;
1161        }
1162
1163        for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1164                INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1165
1166        if (inet_ehash_locks_alloc(&dccp_hashinfo))
1167                        goto out_free_dccp_ehash;
1168
1169        bhash_order = ehash_order;
1170
1171        do {
1172                dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1173                                        sizeof(struct inet_bind_hashbucket);
1174                if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1175                    bhash_order > 0)
1176                        continue;
1177                dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1178                        __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1179        } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1180
1181        if (!dccp_hashinfo.bhash) {
1182                DCCP_CRIT("Failed to allocate DCCP bind hash table");
1183                goto out_free_dccp_locks;
1184        }
1185
1186        for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1187                spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1188                INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1189        }
1190
1191        rc = dccp_mib_init();
1192        if (rc)
1193                goto out_free_dccp_bhash;
1194
1195        rc = dccp_ackvec_init();
1196        if (rc)
1197                goto out_free_dccp_mib;
1198
1199        rc = dccp_sysctl_init();
1200        if (rc)
1201                goto out_ackvec_exit;
1202
1203        rc = ccid_initialize_builtins();
1204        if (rc)
1205                goto out_sysctl_exit;
1206
1207        dccp_timestamping_init();
1208
1209        return 0;
1210
1211out_sysctl_exit:
1212        dccp_sysctl_exit();
1213out_ackvec_exit:
1214        dccp_ackvec_exit();
1215out_free_dccp_mib:
1216        dccp_mib_exit();
1217out_free_dccp_bhash:
1218        free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1219out_free_dccp_locks:
1220        inet_ehash_locks_free(&dccp_hashinfo);
1221out_free_dccp_ehash:
1222        free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1223out_free_bind_bucket_cachep:
1224        kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1225out_free_percpu:
1226        percpu_counter_destroy(&dccp_orphan_count);
1227out_fail:
1228        dccp_hashinfo.bhash = NULL;
1229        dccp_hashinfo.ehash = NULL;
1230        dccp_hashinfo.bind_bucket_cachep = NULL;
1231        return rc;
1232}
1233
1234static void __exit dccp_fini(void)
1235{
1236        ccid_cleanup_builtins();
1237        dccp_mib_exit();
1238        free_pages((unsigned long)dccp_hashinfo.bhash,
1239                   get_order(dccp_hashinfo.bhash_size *
1240                             sizeof(struct inet_bind_hashbucket)));
1241        free_pages((unsigned long)dccp_hashinfo.ehash,
1242                   get_order((dccp_hashinfo.ehash_mask + 1) *
1243                             sizeof(struct inet_ehash_bucket)));
1244        inet_ehash_locks_free(&dccp_hashinfo);
1245        kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1246        dccp_ackvec_exit();
1247        dccp_sysctl_exit();
1248        percpu_counter_destroy(&dccp_orphan_count);
1249}
1250
1251module_init(dccp_init);
1252module_exit(dccp_fini);
1253
1254MODULE_LICENSE("GPL");
1255MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1256MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1257