linux/drivers/infiniband/hw/cxgb4/cm.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/module.h>
  33#include <linux/list.h>
  34#include <linux/workqueue.h>
  35#include <linux/skbuff.h>
  36#include <linux/timer.h>
  37#include <linux/notifier.h>
  38#include <linux/inetdevice.h>
  39#include <linux/ip.h>
  40#include <linux/tcp.h>
  41#include <linux/if_vlan.h>
  42
  43#include <net/neighbour.h>
  44#include <net/netevent.h>
  45#include <net/route.h>
  46#include <net/tcp.h>
  47#include <net/ip6_route.h>
  48#include <net/addrconf.h>
  49
  50#include <rdma/ib_addr.h>
  51
  52#include <libcxgb_cm.h>
  53#include "iw_cxgb4.h"
  54#include "clip_tbl.h"
  55
  56static char *states[] = {
  57        "idle",
  58        "listen",
  59        "connecting",
  60        "mpa_wait_req",
  61        "mpa_req_sent",
  62        "mpa_req_rcvd",
  63        "mpa_rep_sent",
  64        "fpdu_mode",
  65        "aborting",
  66        "closing",
  67        "moribund",
  68        "dead",
  69        NULL,
  70};
  71
  72static int nocong;
  73module_param(nocong, int, 0644);
  74MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)");
  75
  76static int enable_ecn;
  77module_param(enable_ecn, int, 0644);
  78MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)");
  79
  80static int dack_mode = 1;
  81module_param(dack_mode, int, 0644);
  82MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
  83
  84uint c4iw_max_read_depth = 32;
  85module_param(c4iw_max_read_depth, int, 0644);
  86MODULE_PARM_DESC(c4iw_max_read_depth,
  87                 "Per-connection max ORD/IRD (default=32)");
  88
  89static int enable_tcp_timestamps;
  90module_param(enable_tcp_timestamps, int, 0644);
  91MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)");
  92
  93static int enable_tcp_sack;
  94module_param(enable_tcp_sack, int, 0644);
  95MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)");
  96
  97static int enable_tcp_window_scaling = 1;
  98module_param(enable_tcp_window_scaling, int, 0644);
  99MODULE_PARM_DESC(enable_tcp_window_scaling,
 100                 "Enable tcp window scaling (default=1)");
 101
 102static int peer2peer = 1;
 103module_param(peer2peer, int, 0644);
 104MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)");
 105
 106static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
 107module_param(p2p_type, int, 0644);
 108MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: "
 109                           "1=RDMA_READ 0=RDMA_WRITE (default 1)");
 110
 111static int ep_timeout_secs = 60;
 112module_param(ep_timeout_secs, int, 0644);
 113MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
 114                                   "in seconds (default=60)");
 115
 116static int mpa_rev = 2;
 117module_param(mpa_rev, int, 0644);
 118MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
 119                "1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft"
 120                " compliant (default=2)");
 121
 122static int markers_enabled;
 123module_param(markers_enabled, int, 0644);
 124MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
 125
 126static int crc_enabled = 1;
 127module_param(crc_enabled, int, 0644);
 128MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
 129
 130static int rcv_win = 256 * 1024;
 131module_param(rcv_win, int, 0644);
 132MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)");
 133
 134static int snd_win = 128 * 1024;
 135module_param(snd_win, int, 0644);
 136MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)");
 137
 138static struct workqueue_struct *workq;
 139
 140static struct sk_buff_head rxq;
 141
 142static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
 143static void ep_timeout(struct timer_list *t);
 144static void connect_reply_upcall(struct c4iw_ep *ep, int status);
 145static int sched(struct c4iw_dev *dev, struct sk_buff *skb);
 146
 147static LIST_HEAD(timeout_list);
 148static spinlock_t timeout_lock;
 149
 150static void deref_cm_id(struct c4iw_ep_common *epc)
 151{
 152        epc->cm_id->rem_ref(epc->cm_id);
 153        epc->cm_id = NULL;
 154        set_bit(CM_ID_DEREFED, &epc->history);
 155}
 156
 157static void ref_cm_id(struct c4iw_ep_common *epc)
 158{
 159        set_bit(CM_ID_REFED, &epc->history);
 160        epc->cm_id->add_ref(epc->cm_id);
 161}
 162
 163static void deref_qp(struct c4iw_ep *ep)
 164{
 165        c4iw_qp_rem_ref(&ep->com.qp->ibqp);
 166        clear_bit(QP_REFERENCED, &ep->com.flags);
 167        set_bit(QP_DEREFED, &ep->com.history);
 168}
 169
 170static void ref_qp(struct c4iw_ep *ep)
 171{
 172        set_bit(QP_REFERENCED, &ep->com.flags);
 173        set_bit(QP_REFED, &ep->com.history);
 174        c4iw_qp_add_ref(&ep->com.qp->ibqp);
 175}
 176
 177static void start_ep_timer(struct c4iw_ep *ep)
 178{
 179        pr_debug("ep %p\n", ep);
 180        if (timer_pending(&ep->timer)) {
 181                pr_err("%s timer already started! ep %p\n",
 182                       __func__, ep);
 183                return;
 184        }
 185        clear_bit(TIMEOUT, &ep->com.flags);
 186        c4iw_get_ep(&ep->com);
 187        ep->timer.expires = jiffies + ep_timeout_secs * HZ;
 188        add_timer(&ep->timer);
 189}
 190
 191static int stop_ep_timer(struct c4iw_ep *ep)
 192{
 193        pr_debug("ep %p stopping\n", ep);
 194        del_timer_sync(&ep->timer);
 195        if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
 196                c4iw_put_ep(&ep->com);
 197                return 0;
 198        }
 199        return 1;
 200}
 201
 202static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
 203                  struct l2t_entry *l2e)
 204{
 205        int     error = 0;
 206
 207        if (c4iw_fatal_error(rdev)) {
 208                kfree_skb(skb);
 209                pr_err("%s - device in error state - dropping\n", __func__);
 210                return -EIO;
 211        }
 212        error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
 213        if (error < 0)
 214                kfree_skb(skb);
 215        else if (error == NET_XMIT_DROP)
 216                return -ENOMEM;
 217        return error < 0 ? error : 0;
 218}
 219
 220int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
 221{
 222        int     error = 0;
 223
 224        if (c4iw_fatal_error(rdev)) {
 225                kfree_skb(skb);
 226                pr_err("%s - device in error state - dropping\n", __func__);
 227                return -EIO;
 228        }
 229        error = cxgb4_ofld_send(rdev->lldi.ports[0], skb);
 230        if (error < 0)
 231                kfree_skb(skb);
 232        return error < 0 ? error : 0;
 233}
 234
 235static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
 236{
 237        u32 len = roundup(sizeof(struct cpl_tid_release), 16);
 238
 239        skb = get_skb(skb, len, GFP_KERNEL);
 240        if (!skb)
 241                return;
 242
 243        cxgb_mk_tid_release(skb, len, hwtid, 0);
 244        c4iw_ofld_send(rdev, skb);
 245        return;
 246}
 247
 248static void set_emss(struct c4iw_ep *ep, u16 opt)
 249{
 250        ep->emss = ep->com.dev->rdev.lldi.mtus[TCPOPT_MSS_G(opt)] -
 251                   ((AF_INET == ep->com.remote_addr.ss_family) ?
 252                    sizeof(struct iphdr) : sizeof(struct ipv6hdr)) -
 253                   sizeof(struct tcphdr);
 254        ep->mss = ep->emss;
 255        if (TCPOPT_TSTAMP_G(opt))
 256                ep->emss -= round_up(TCPOLEN_TIMESTAMP, 4);
 257        if (ep->emss < 128)
 258                ep->emss = 128;
 259        if (ep->emss & 7)
 260                pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n",
 261                         TCPOPT_MSS_G(opt), ep->mss, ep->emss);
 262        pr_debug("mss_idx %u mss %u emss=%u\n", TCPOPT_MSS_G(opt), ep->mss,
 263                 ep->emss);
 264}
 265
 266static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc)
 267{
 268        enum c4iw_ep_state state;
 269
 270        mutex_lock(&epc->mutex);
 271        state = epc->state;
 272        mutex_unlock(&epc->mutex);
 273        return state;
 274}
 275
 276static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
 277{
 278        epc->state = new;
 279}
 280
 281static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
 282{
 283        mutex_lock(&epc->mutex);
 284        pr_debug("%s -> %s\n", states[epc->state], states[new]);
 285        __state_set(epc, new);
 286        mutex_unlock(&epc->mutex);
 287        return;
 288}
 289
 290static int alloc_ep_skb_list(struct sk_buff_head *ep_skb_list, int size)
 291{
 292        struct sk_buff *skb;
 293        unsigned int i;
 294        size_t len;
 295
 296        len = roundup(sizeof(union cpl_wr_size), 16);
 297        for (i = 0; i < size; i++) {
 298                skb = alloc_skb(len, GFP_KERNEL);
 299                if (!skb)
 300                        goto fail;
 301                skb_queue_tail(ep_skb_list, skb);
 302        }
 303        return 0;
 304fail:
 305        skb_queue_purge(ep_skb_list);
 306        return -ENOMEM;
 307}
 308
 309static void *alloc_ep(int size, gfp_t gfp)
 310{
 311        struct c4iw_ep_common *epc;
 312
 313        epc = kzalloc(size, gfp);
 314        if (epc) {
 315                epc->wr_waitp = c4iw_alloc_wr_wait(gfp);
 316                if (!epc->wr_waitp) {
 317                        kfree(epc);
 318                        epc = NULL;
 319                        goto out;
 320                }
 321                kref_init(&epc->kref);
 322                mutex_init(&epc->mutex);
 323                c4iw_init_wr_wait(epc->wr_waitp);
 324        }
 325        pr_debug("alloc ep %p\n", epc);
 326out:
 327        return epc;
 328}
 329
 330static void remove_ep_tid(struct c4iw_ep *ep)
 331{
 332        unsigned long flags;
 333
 334        xa_lock_irqsave(&ep->com.dev->hwtids, flags);
 335        __xa_erase(&ep->com.dev->hwtids, ep->hwtid);
 336        if (xa_empty(&ep->com.dev->hwtids))
 337                wake_up(&ep->com.dev->wait);
 338        xa_unlock_irqrestore(&ep->com.dev->hwtids, flags);
 339}
 340
 341static int insert_ep_tid(struct c4iw_ep *ep)
 342{
 343        unsigned long flags;
 344        int err;
 345
 346        xa_lock_irqsave(&ep->com.dev->hwtids, flags);
 347        err = __xa_insert(&ep->com.dev->hwtids, ep->hwtid, ep, GFP_KERNEL);
 348        xa_unlock_irqrestore(&ep->com.dev->hwtids, flags);
 349
 350        return err;
 351}
 352
 353/*
 354 * Atomically lookup the ep ptr given the tid and grab a reference on the ep.
 355 */
 356static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid)
 357{
 358        struct c4iw_ep *ep;
 359        unsigned long flags;
 360
 361        xa_lock_irqsave(&dev->hwtids, flags);
 362        ep = xa_load(&dev->hwtids, tid);
 363        if (ep)
 364                c4iw_get_ep(&ep->com);
 365        xa_unlock_irqrestore(&dev->hwtids, flags);
 366        return ep;
 367}
 368
 369/*
 370 * Atomically lookup the ep ptr given the stid and grab a reference on the ep.
 371 */
 372static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev,
 373                                               unsigned int stid)
 374{
 375        struct c4iw_listen_ep *ep;
 376        unsigned long flags;
 377
 378        xa_lock_irqsave(&dev->stids, flags);
 379        ep = xa_load(&dev->stids, stid);
 380        if (ep)
 381                c4iw_get_ep(&ep->com);
 382        xa_unlock_irqrestore(&dev->stids, flags);
 383        return ep;
 384}
 385
 386void _c4iw_free_ep(struct kref *kref)
 387{
 388        struct c4iw_ep *ep;
 389
 390        ep = container_of(kref, struct c4iw_ep, com.kref);
 391        pr_debug("ep %p state %s\n", ep, states[ep->com.state]);
 392        if (test_bit(QP_REFERENCED, &ep->com.flags))
 393                deref_qp(ep);
 394        if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
 395                if (ep->com.remote_addr.ss_family == AF_INET6) {
 396                        struct sockaddr_in6 *sin6 =
 397                                        (struct sockaddr_in6 *)
 398                                        &ep->com.local_addr;
 399
 400                        cxgb4_clip_release(
 401                                        ep->com.dev->rdev.lldi.ports[0],
 402                                        (const u32 *)&sin6->sin6_addr.s6_addr,
 403                                        1);
 404                }
 405                cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
 406                                 ep->com.local_addr.ss_family);
 407                dst_release(ep->dst);
 408                cxgb4_l2t_release(ep->l2t);
 409                kfree_skb(ep->mpa_skb);
 410        }
 411        if (!skb_queue_empty(&ep->com.ep_skb_list))
 412                skb_queue_purge(&ep->com.ep_skb_list);
 413        c4iw_put_wr_wait(ep->com.wr_waitp);
 414        kfree(ep);
 415}
 416
 417static void release_ep_resources(struct c4iw_ep *ep)
 418{
 419        set_bit(RELEASE_RESOURCES, &ep->com.flags);
 420
 421        /*
 422         * If we have a hwtid, then remove it from the idr table
 423         * so lookups will no longer find this endpoint.  Otherwise
 424         * we have a race where one thread finds the ep ptr just
 425         * before the other thread is freeing the ep memory.
 426         */
 427        if (ep->hwtid != -1)
 428                remove_ep_tid(ep);
 429        c4iw_put_ep(&ep->com);
 430}
 431
 432static int status2errno(int status)
 433{
 434        switch (status) {
 435        case CPL_ERR_NONE:
 436                return 0;
 437        case CPL_ERR_CONN_RESET:
 438                return -ECONNRESET;
 439        case CPL_ERR_ARP_MISS:
 440                return -EHOSTUNREACH;
 441        case CPL_ERR_CONN_TIMEDOUT:
 442                return -ETIMEDOUT;
 443        case CPL_ERR_TCAM_FULL:
 444                return -ENOMEM;
 445        case CPL_ERR_CONN_EXIST:
 446                return -EADDRINUSE;
 447        default:
 448                return -EIO;
 449        }
 450}
 451
 452/*
 453 * Try and reuse skbs already allocated...
 454 */
 455static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
 456{
 457        if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
 458                skb_trim(skb, 0);
 459                skb_get(skb);
 460                skb_reset_transport_header(skb);
 461        } else {
 462                skb = alloc_skb(len, gfp);
 463                if (!skb)
 464                        return NULL;
 465        }
 466        t4_set_arp_err_handler(skb, NULL, NULL);
 467        return skb;
 468}
 469
 470static struct net_device *get_real_dev(struct net_device *egress_dev)
 471{
 472        return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
 473}
 474
 475static void arp_failure_discard(void *handle, struct sk_buff *skb)
 476{
 477        pr_err("ARP failure\n");
 478        kfree_skb(skb);
 479}
 480
 481static void mpa_start_arp_failure(void *handle, struct sk_buff *skb)
 482{
 483        pr_err("ARP failure during MPA Negotiation - Closing Connection\n");
 484}
 485
 486enum {
 487        NUM_FAKE_CPLS = 2,
 488        FAKE_CPL_PUT_EP_SAFE = NUM_CPL_CMDS + 0,
 489        FAKE_CPL_PASS_PUT_EP_SAFE = NUM_CPL_CMDS + 1,
 490};
 491
 492static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
 493{
 494        struct c4iw_ep *ep;
 495
 496        ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
 497        release_ep_resources(ep);
 498        return 0;
 499}
 500
 501static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
 502{
 503        struct c4iw_ep *ep;
 504
 505        ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
 506        c4iw_put_ep(&ep->parent_ep->com);
 507        release_ep_resources(ep);
 508        return 0;
 509}
 510
 511/*
 512 * Fake up a special CPL opcode and call sched() so process_work() will call
 513 * _put_ep_safe() in a safe context to free the ep resources.  This is needed
 514 * because ARP error handlers are called in an ATOMIC context, and
 515 * _c4iw_free_ep() needs to block.
 516 */
 517static void queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb,
 518                                  int cpl)
 519{
 520        struct cpl_act_establish *rpl = cplhdr(skb);
 521
 522        /* Set our special ARP_FAILURE opcode */
 523        rpl->ot.opcode = cpl;
 524
 525        /*
 526         * Save ep in the skb->cb area, after where sched() will save the dev
 527         * ptr.
 528         */
 529        *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))) = ep;
 530        sched(ep->com.dev, skb);
 531}
 532
 533/* Handle an ARP failure for an accept */
 534static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb)
 535{
 536        struct c4iw_ep *ep = handle;
 537
 538        pr_err("ARP failure during accept - tid %u - dropping connection\n",
 539               ep->hwtid);
 540
 541        __state_set(&ep->com, DEAD);
 542        queue_arp_failure_cpl(ep, skb, FAKE_CPL_PASS_PUT_EP_SAFE);
 543}
 544
 545/*
 546 * Handle an ARP failure for an active open.
 547 */
 548static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
 549{
 550        struct c4iw_ep *ep = handle;
 551
 552        pr_err("ARP failure during connect\n");
 553        connect_reply_upcall(ep, -EHOSTUNREACH);
 554        __state_set(&ep->com, DEAD);
 555        if (ep->com.remote_addr.ss_family == AF_INET6) {
 556                struct sockaddr_in6 *sin6 =
 557                        (struct sockaddr_in6 *)&ep->com.local_addr;
 558                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 559                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 560        }
 561        xa_erase_irq(&ep->com.dev->atids, ep->atid);
 562        cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
 563        queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
 564}
 565
 566/*
 567 * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
 568 * and send it along.
 569 */
 570static void abort_arp_failure(void *handle, struct sk_buff *skb)
 571{
 572        int ret;
 573        struct c4iw_ep *ep = handle;
 574        struct c4iw_rdev *rdev = &ep->com.dev->rdev;
 575        struct cpl_abort_req *req = cplhdr(skb);
 576
 577        pr_debug("rdev %p\n", rdev);
 578        req->cmd = CPL_ABORT_NO_RST;
 579        skb_get(skb);
 580        ret = c4iw_ofld_send(rdev, skb);
 581        if (ret) {
 582                __state_set(&ep->com, DEAD);
 583                queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
 584        } else
 585                kfree_skb(skb);
 586}
 587
 588static int send_flowc(struct c4iw_ep *ep)
 589{
 590        struct fw_flowc_wr *flowc;
 591        struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
 592        u16 vlan = ep->l2t->vlan;
 593        int nparams;
 594        int flowclen, flowclen16;
 595
 596        if (WARN_ON(!skb))
 597                return -ENOMEM;
 598
 599        if (vlan == CPL_L2T_VLAN_NONE)
 600                nparams = 9;
 601        else
 602                nparams = 10;
 603
 604        flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
 605        flowclen16 = DIV_ROUND_UP(flowclen, 16);
 606        flowclen = flowclen16 * 16;
 607
 608        flowc = __skb_put(skb, flowclen);
 609        memset(flowc, 0, flowclen);
 610
 611        flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
 612                                           FW_FLOWC_WR_NPARAMS_V(nparams));
 613        flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
 614                                          FW_WR_FLOWID_V(ep->hwtid));
 615
 616        flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
 617        flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V
 618                                            (ep->com.dev->rdev.lldi.pf));
 619        flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
 620        flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan);
 621        flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
 622        flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan);
 623        flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
 624        flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid);
 625        flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT;
 626        flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq);
 627        flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
 628        flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq);
 629        flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
 630        flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
 631        flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
 632        flowc->mnemval[7].val = cpu_to_be32(ep->emss);
 633        flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_RCV_SCALE;
 634        flowc->mnemval[8].val = cpu_to_be32(ep->snd_wscale);
 635        if (nparams == 10) {
 636                u16 pri;
 637                pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
 638                flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
 639                flowc->mnemval[9].val = cpu_to_be32(pri);
 640        }
 641
 642        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
 643        return c4iw_ofld_send(&ep->com.dev->rdev, skb);
 644}
 645
 646static int send_halfclose(struct c4iw_ep *ep)
 647{
 648        struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
 649        u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
 650
 651        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
 652        if (WARN_ON(!skb))
 653                return -ENOMEM;
 654
 655        cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx,
 656                              NULL, arp_failure_discard);
 657
 658        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 659}
 660
 661static void read_tcb(struct c4iw_ep *ep)
 662{
 663        struct sk_buff *skb;
 664        struct cpl_get_tcb *req;
 665        int wrlen = roundup(sizeof(*req), 16);
 666
 667        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 668        if (WARN_ON(!skb))
 669                return;
 670
 671        set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
 672        req = (struct cpl_get_tcb *) skb_put(skb, wrlen);
 673        memset(req, 0, wrlen);
 674        INIT_TP_WR(req, ep->hwtid);
 675        OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_GET_TCB, ep->hwtid));
 676        req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(ep->rss_qid));
 677
 678        /*
 679         * keep a ref on the ep so the tcb is not unlocked before this
 680         * cpl completes. The ref is released in read_tcb_rpl().
 681         */
 682        c4iw_get_ep(&ep->com);
 683        if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb)))
 684                c4iw_put_ep(&ep->com);
 685}
 686
 687static int send_abort_req(struct c4iw_ep *ep)
 688{
 689        u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
 690        struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
 691
 692        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
 693        if (WARN_ON(!req_skb))
 694                return -ENOMEM;
 695
 696        cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx,
 697                          ep, abort_arp_failure);
 698
 699        return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
 700}
 701
 702static int send_abort(struct c4iw_ep *ep)
 703{
 704        if (!ep->com.qp || !ep->com.qp->srq) {
 705                send_abort_req(ep);
 706                return 0;
 707        }
 708        set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags);
 709        read_tcb(ep);
 710        return 0;
 711}
 712
 713static int send_connect(struct c4iw_ep *ep)
 714{
 715        struct cpl_act_open_req *req = NULL;
 716        struct cpl_t5_act_open_req *t5req = NULL;
 717        struct cpl_t6_act_open_req *t6req = NULL;
 718        struct cpl_act_open_req6 *req6 = NULL;
 719        struct cpl_t5_act_open_req6 *t5req6 = NULL;
 720        struct cpl_t6_act_open_req6 *t6req6 = NULL;
 721        struct sk_buff *skb;
 722        u64 opt0;
 723        u32 opt2;
 724        unsigned int mtu_idx;
 725        u32 wscale;
 726        int win, sizev4, sizev6, wrlen;
 727        struct sockaddr_in *la = (struct sockaddr_in *)
 728                                 &ep->com.local_addr;
 729        struct sockaddr_in *ra = (struct sockaddr_in *)
 730                                 &ep->com.remote_addr;
 731        struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)
 732                                   &ep->com.local_addr;
 733        struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
 734                                   &ep->com.remote_addr;
 735        int ret;
 736        enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
 737        u32 isn = (prandom_u32() & ~7UL) - 1;
 738        struct net_device *netdev;
 739        u64 params;
 740
 741        netdev = ep->com.dev->rdev.lldi.ports[0];
 742
 743        switch (CHELSIO_CHIP_VERSION(adapter_type)) {
 744        case CHELSIO_T4:
 745                sizev4 = sizeof(struct cpl_act_open_req);
 746                sizev6 = sizeof(struct cpl_act_open_req6);
 747                break;
 748        case CHELSIO_T5:
 749                sizev4 = sizeof(struct cpl_t5_act_open_req);
 750                sizev6 = sizeof(struct cpl_t5_act_open_req6);
 751                break;
 752        case CHELSIO_T6:
 753                sizev4 = sizeof(struct cpl_t6_act_open_req);
 754                sizev6 = sizeof(struct cpl_t6_act_open_req6);
 755                break;
 756        default:
 757                pr_err("T%d Chip is not supported\n",
 758                       CHELSIO_CHIP_VERSION(adapter_type));
 759                return -EINVAL;
 760        }
 761
 762        wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
 763                        roundup(sizev4, 16) :
 764                        roundup(sizev6, 16);
 765
 766        pr_debug("ep %p atid %u\n", ep, ep->atid);
 767
 768        skb = get_skb(NULL, wrlen, GFP_KERNEL);
 769        if (!skb) {
 770                pr_err("%s - failed to alloc skb\n", __func__);
 771                return -ENOMEM;
 772        }
 773        set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
 774
 775        cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
 776                      enable_tcp_timestamps,
 777                      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
 778        wscale = cxgb_compute_wscale(rcv_win);
 779
 780        /*
 781         * Specify the largest window that will fit in opt0. The
 782         * remainder will be specified in the rx_data_ack.
 783         */
 784        win = ep->rcv_win >> 10;
 785        if (win > RCV_BUFSIZ_M)
 786                win = RCV_BUFSIZ_M;
 787
 788        opt0 = (nocong ? NO_CONG_F : 0) |
 789               KEEP_ALIVE_F |
 790               DELACK_F |
 791               WND_SCALE_V(wscale) |
 792               MSS_IDX_V(mtu_idx) |
 793               L2T_IDX_V(ep->l2t->idx) |
 794               TX_CHAN_V(ep->tx_chan) |
 795               SMAC_SEL_V(ep->smac_idx) |
 796               DSCP_V(ep->tos >> 2) |
 797               ULP_MODE_V(ULP_MODE_TCPDDP) |
 798               RCV_BUFSIZ_V(win);
 799        opt2 = RX_CHANNEL_V(0) |
 800               CCTRL_ECN_V(enable_ecn) |
 801               RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
 802        if (enable_tcp_timestamps)
 803                opt2 |= TSTAMPS_EN_F;
 804        if (enable_tcp_sack)
 805                opt2 |= SACK_EN_F;
 806        if (wscale && enable_tcp_window_scaling)
 807                opt2 |= WND_SCALE_EN_F;
 808        if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
 809                if (peer2peer)
 810                        isn += 4;
 811
 812                opt2 |= T5_OPT_2_VALID_F;
 813                opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
 814                opt2 |= T5_ISS_F;
 815        }
 816
 817        params = cxgb4_select_ntuple(netdev, ep->l2t);
 818
 819        if (ep->com.remote_addr.ss_family == AF_INET6)
 820                cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
 821                               (const u32 *)&la6->sin6_addr.s6_addr, 1);
 822
 823        t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
 824
 825        if (ep->com.remote_addr.ss_family == AF_INET) {
 826                switch (CHELSIO_CHIP_VERSION(adapter_type)) {
 827                case CHELSIO_T4:
 828                        req = skb_put(skb, wrlen);
 829                        INIT_TP_WR(req, 0);
 830                        break;
 831                case CHELSIO_T5:
 832                        t5req = skb_put(skb, wrlen);
 833                        INIT_TP_WR(t5req, 0);
 834                        req = (struct cpl_act_open_req *)t5req;
 835                        break;
 836                case CHELSIO_T6:
 837                        t6req = skb_put(skb, wrlen);
 838                        INIT_TP_WR(t6req, 0);
 839                        req = (struct cpl_act_open_req *)t6req;
 840                        t5req = (struct cpl_t5_act_open_req *)t6req;
 841                        break;
 842                default:
 843                        pr_err("T%d Chip is not supported\n",
 844                               CHELSIO_CHIP_VERSION(adapter_type));
 845                        ret = -EINVAL;
 846                        goto clip_release;
 847                }
 848
 849                OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
 850                                        ((ep->rss_qid<<14) | ep->atid)));
 851                req->local_port = la->sin_port;
 852                req->peer_port = ra->sin_port;
 853                req->local_ip = la->sin_addr.s_addr;
 854                req->peer_ip = ra->sin_addr.s_addr;
 855                req->opt0 = cpu_to_be64(opt0);
 856
 857                if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
 858                        req->params = cpu_to_be32(params);
 859                        req->opt2 = cpu_to_be32(opt2);
 860                } else {
 861                        if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
 862                                t5req->params =
 863                                          cpu_to_be64(FILTER_TUPLE_V(params));
 864                                t5req->rsvd = cpu_to_be32(isn);
 865                                pr_debug("snd_isn %u\n", t5req->rsvd);
 866                                t5req->opt2 = cpu_to_be32(opt2);
 867                        } else {
 868                                t6req->params =
 869                                          cpu_to_be64(FILTER_TUPLE_V(params));
 870                                t6req->rsvd = cpu_to_be32(isn);
 871                                pr_debug("snd_isn %u\n", t6req->rsvd);
 872                                t6req->opt2 = cpu_to_be32(opt2);
 873                        }
 874                }
 875        } else {
 876                switch (CHELSIO_CHIP_VERSION(adapter_type)) {
 877                case CHELSIO_T4:
 878                        req6 = skb_put(skb, wrlen);
 879                        INIT_TP_WR(req6, 0);
 880                        break;
 881                case CHELSIO_T5:
 882                        t5req6 = skb_put(skb, wrlen);
 883                        INIT_TP_WR(t5req6, 0);
 884                        req6 = (struct cpl_act_open_req6 *)t5req6;
 885                        break;
 886                case CHELSIO_T6:
 887                        t6req6 = skb_put(skb, wrlen);
 888                        INIT_TP_WR(t6req6, 0);
 889                        req6 = (struct cpl_act_open_req6 *)t6req6;
 890                        t5req6 = (struct cpl_t5_act_open_req6 *)t6req6;
 891                        break;
 892                default:
 893                        pr_err("T%d Chip is not supported\n",
 894                               CHELSIO_CHIP_VERSION(adapter_type));
 895                        ret = -EINVAL;
 896                        goto clip_release;
 897                }
 898
 899                OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
 900                                        ((ep->rss_qid<<14)|ep->atid)));
 901                req6->local_port = la6->sin6_port;
 902                req6->peer_port = ra6->sin6_port;
 903                req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr));
 904                req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8));
 905                req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr));
 906                req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8));
 907                req6->opt0 = cpu_to_be64(opt0);
 908
 909                if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
 910                        req6->params = cpu_to_be32(cxgb4_select_ntuple(netdev,
 911                                                                      ep->l2t));
 912                        req6->opt2 = cpu_to_be32(opt2);
 913                } else {
 914                        if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
 915                                t5req6->params =
 916                                            cpu_to_be64(FILTER_TUPLE_V(params));
 917                                t5req6->rsvd = cpu_to_be32(isn);
 918                                pr_debug("snd_isn %u\n", t5req6->rsvd);
 919                                t5req6->opt2 = cpu_to_be32(opt2);
 920                        } else {
 921                                t6req6->params =
 922                                            cpu_to_be64(FILTER_TUPLE_V(params));
 923                                t6req6->rsvd = cpu_to_be32(isn);
 924                                pr_debug("snd_isn %u\n", t6req6->rsvd);
 925                                t6req6->opt2 = cpu_to_be32(opt2);
 926                        }
 927
 928                }
 929        }
 930
 931        set_bit(ACT_OPEN_REQ, &ep->com.history);
 932        ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 933clip_release:
 934        if (ret && ep->com.remote_addr.ss_family == AF_INET6)
 935                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 936                                   (const u32 *)&la6->sin6_addr.s6_addr, 1);
 937        return ret;
 938}
 939
 940static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
 941                        u8 mpa_rev_to_use)
 942{
 943        int mpalen, wrlen, ret;
 944        struct fw_ofld_tx_data_wr *req;
 945        struct mpa_message *mpa;
 946        struct mpa_v2_conn_params mpa_v2_params;
 947
 948        pr_debug("ep %p tid %u pd_len %d\n",
 949                 ep, ep->hwtid, ep->plen);
 950
 951        mpalen = sizeof(*mpa) + ep->plen;
 952        if (mpa_rev_to_use == 2)
 953                mpalen += sizeof(struct mpa_v2_conn_params);
 954        wrlen = roundup(mpalen + sizeof(*req), 16);
 955        skb = get_skb(skb, wrlen, GFP_KERNEL);
 956        if (!skb) {
 957                connect_reply_upcall(ep, -ENOMEM);
 958                return -ENOMEM;
 959        }
 960        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
 961
 962        req = skb_put_zero(skb, wrlen);
 963        req->op_to_immdlen = cpu_to_be32(
 964                FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
 965                FW_WR_COMPL_F |
 966                FW_WR_IMMDLEN_V(mpalen));
 967        req->flowid_len16 = cpu_to_be32(
 968                FW_WR_FLOWID_V(ep->hwtid) |
 969                FW_WR_LEN16_V(wrlen >> 4));
 970        req->plen = cpu_to_be32(mpalen);
 971        req->tunnel_to_proxy = cpu_to_be32(
 972                FW_OFLD_TX_DATA_WR_FLUSH_F |
 973                FW_OFLD_TX_DATA_WR_SHOVE_F);
 974
 975        mpa = (struct mpa_message *)(req + 1);
 976        memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
 977
 978        mpa->flags = 0;
 979        if (crc_enabled)
 980                mpa->flags |= MPA_CRC;
 981        if (markers_enabled) {
 982                mpa->flags |= MPA_MARKERS;
 983                ep->mpa_attr.recv_marker_enabled = 1;
 984        } else {
 985                ep->mpa_attr.recv_marker_enabled = 0;
 986        }
 987        if (mpa_rev_to_use == 2)
 988                mpa->flags |= MPA_ENHANCED_RDMA_CONN;
 989
 990        mpa->private_data_size = htons(ep->plen);
 991        mpa->revision = mpa_rev_to_use;
 992        if (mpa_rev_to_use == 1) {
 993                ep->tried_with_mpa_v1 = 1;
 994                ep->retry_with_mpa_v1 = 0;
 995        }
 996
 997        if (mpa_rev_to_use == 2) {
 998                mpa->private_data_size =
 999                        htons(ntohs(mpa->private_data_size) +
1000                              sizeof(struct mpa_v2_conn_params));
1001                pr_debug("initiator ird %u ord %u\n", ep->ird,
1002                         ep->ord);
1003                mpa_v2_params.ird = htons((u16)ep->ird);
1004                mpa_v2_params.ord = htons((u16)ep->ord);
1005
1006                if (peer2peer) {
1007                        mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1008                        if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
1009                                mpa_v2_params.ord |=
1010                                        htons(MPA_V2_RDMA_WRITE_RTR);
1011                        else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
1012                                mpa_v2_params.ord |=
1013                                        htons(MPA_V2_RDMA_READ_RTR);
1014                }
1015                memcpy(mpa->private_data, &mpa_v2_params,
1016                       sizeof(struct mpa_v2_conn_params));
1017
1018                if (ep->plen)
1019                        memcpy(mpa->private_data +
1020                               sizeof(struct mpa_v2_conn_params),
1021                               ep->mpa_pkt + sizeof(*mpa), ep->plen);
1022        } else
1023                if (ep->plen)
1024                        memcpy(mpa->private_data,
1025                                        ep->mpa_pkt + sizeof(*mpa), ep->plen);
1026
1027        /*
1028         * Reference the mpa skb.  This ensures the data area
1029         * will remain in memory until the hw acks the tx.
1030         * Function fw4_ack() will deref it.
1031         */
1032        skb_get(skb);
1033        t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
1034        ep->mpa_skb = skb;
1035        ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1036        if (ret)
1037                return ret;
1038        start_ep_timer(ep);
1039        __state_set(&ep->com, MPA_REQ_SENT);
1040        ep->mpa_attr.initiator = 1;
1041        ep->snd_seq += mpalen;
1042        return ret;
1043}
1044
1045static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
1046{
1047        int mpalen, wrlen;
1048        struct fw_ofld_tx_data_wr *req;
1049        struct mpa_message *mpa;
1050        struct sk_buff *skb;
1051        struct mpa_v2_conn_params mpa_v2_params;
1052
1053        pr_debug("ep %p tid %u pd_len %d\n",
1054                 ep, ep->hwtid, ep->plen);
1055
1056        mpalen = sizeof(*mpa) + plen;
1057        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
1058                mpalen += sizeof(struct mpa_v2_conn_params);
1059        wrlen = roundup(mpalen + sizeof(*req), 16);
1060
1061        skb = get_skb(NULL, wrlen, GFP_KERNEL);
1062        if (!skb) {
1063                pr_err("%s - cannot alloc skb!\n", __func__);
1064                return -ENOMEM;
1065        }
1066        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1067
1068        req = skb_put_zero(skb, wrlen);
1069        req->op_to_immdlen = cpu_to_be32(
1070                FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
1071                FW_WR_COMPL_F |
1072                FW_WR_IMMDLEN_V(mpalen));
1073        req->flowid_len16 = cpu_to_be32(
1074                FW_WR_FLOWID_V(ep->hwtid) |
1075                FW_WR_LEN16_V(wrlen >> 4));
1076        req->plen = cpu_to_be32(mpalen);
1077        req->tunnel_to_proxy = cpu_to_be32(
1078                FW_OFLD_TX_DATA_WR_FLUSH_F |
1079                FW_OFLD_TX_DATA_WR_SHOVE_F);
1080
1081        mpa = (struct mpa_message *)(req + 1);
1082        memset(mpa, 0, sizeof(*mpa));
1083        memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1084        mpa->flags = MPA_REJECT;
1085        mpa->revision = ep->mpa_attr.version;
1086        mpa->private_data_size = htons(plen);
1087
1088        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1089                mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1090                mpa->private_data_size =
1091                        htons(ntohs(mpa->private_data_size) +
1092                              sizeof(struct mpa_v2_conn_params));
1093                mpa_v2_params.ird = htons(((u16)ep->ird) |
1094                                          (peer2peer ? MPA_V2_PEER2PEER_MODEL :
1095                                           0));
1096                mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
1097                                          (p2p_type ==
1098                                           FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
1099                                           MPA_V2_RDMA_WRITE_RTR : p2p_type ==
1100                                           FW_RI_INIT_P2PTYPE_READ_REQ ?
1101                                           MPA_V2_RDMA_READ_RTR : 0) : 0));
1102                memcpy(mpa->private_data, &mpa_v2_params,
1103                       sizeof(struct mpa_v2_conn_params));
1104
1105                if (ep->plen)
1106                        memcpy(mpa->private_data +
1107                               sizeof(struct mpa_v2_conn_params), pdata, plen);
1108        } else
1109                if (plen)
1110                        memcpy(mpa->private_data, pdata, plen);
1111
1112        /*
1113         * Reference the mpa skb again.  This ensures the data area
1114         * will remain in memory until the hw acks the tx.
1115         * Function fw4_ack() will deref it.
1116         */
1117        skb_get(skb);
1118        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1119        t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
1120        ep->mpa_skb = skb;
1121        ep->snd_seq += mpalen;
1122        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1123}
1124
1125static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
1126{
1127        int mpalen, wrlen;
1128        struct fw_ofld_tx_data_wr *req;
1129        struct mpa_message *mpa;
1130        struct sk_buff *skb;
1131        struct mpa_v2_conn_params mpa_v2_params;
1132
1133        pr_debug("ep %p tid %u pd_len %d\n",
1134                 ep, ep->hwtid, ep->plen);
1135
1136        mpalen = sizeof(*mpa) + plen;
1137        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
1138                mpalen += sizeof(struct mpa_v2_conn_params);
1139        wrlen = roundup(mpalen + sizeof(*req), 16);
1140
1141        skb = get_skb(NULL, wrlen, GFP_KERNEL);
1142        if (!skb) {
1143                pr_err("%s - cannot alloc skb!\n", __func__);
1144                return -ENOMEM;
1145        }
1146        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1147
1148        req = skb_put_zero(skb, wrlen);
1149        req->op_to_immdlen = cpu_to_be32(
1150                FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
1151                FW_WR_COMPL_F |
1152                FW_WR_IMMDLEN_V(mpalen));
1153        req->flowid_len16 = cpu_to_be32(
1154                FW_WR_FLOWID_V(ep->hwtid) |
1155                FW_WR_LEN16_V(wrlen >> 4));
1156        req->plen = cpu_to_be32(mpalen);
1157        req->tunnel_to_proxy = cpu_to_be32(
1158                FW_OFLD_TX_DATA_WR_FLUSH_F |
1159                FW_OFLD_TX_DATA_WR_SHOVE_F);
1160
1161        mpa = (struct mpa_message *)(req + 1);
1162        memset(mpa, 0, sizeof(*mpa));
1163        memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1164        mpa->flags = 0;
1165        if (ep->mpa_attr.crc_enabled)
1166                mpa->flags |= MPA_CRC;
1167        if (ep->mpa_attr.recv_marker_enabled)
1168                mpa->flags |= MPA_MARKERS;
1169        mpa->revision = ep->mpa_attr.version;
1170        mpa->private_data_size = htons(plen);
1171
1172        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1173                mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1174                mpa->private_data_size =
1175                        htons(ntohs(mpa->private_data_size) +
1176                              sizeof(struct mpa_v2_conn_params));
1177                mpa_v2_params.ird = htons((u16)ep->ird);
1178                mpa_v2_params.ord = htons((u16)ep->ord);
1179                if (peer2peer && (ep->mpa_attr.p2p_type !=
1180                                        FW_RI_INIT_P2PTYPE_DISABLED)) {
1181                        mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1182
1183                        if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
1184                                mpa_v2_params.ord |=
1185                                        htons(MPA_V2_RDMA_WRITE_RTR);
1186                        else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
1187                                mpa_v2_params.ord |=
1188                                        htons(MPA_V2_RDMA_READ_RTR);
1189                }
1190
1191                memcpy(mpa->private_data, &mpa_v2_params,
1192                       sizeof(struct mpa_v2_conn_params));
1193
1194                if (ep->plen)
1195                        memcpy(mpa->private_data +
1196                               sizeof(struct mpa_v2_conn_params), pdata, plen);
1197        } else
1198                if (plen)
1199                        memcpy(mpa->private_data, pdata, plen);
1200
1201        /*
1202         * Reference the mpa skb.  This ensures the data area
1203         * will remain in memory until the hw acks the tx.
1204         * Function fw4_ack() will deref it.
1205         */
1206        skb_get(skb);
1207        t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
1208        ep->mpa_skb = skb;
1209        __state_set(&ep->com, MPA_REP_SENT);
1210        ep->snd_seq += mpalen;
1211        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1212}
1213
1214static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1215{
1216        struct c4iw_ep *ep;
1217        struct cpl_act_establish *req = cplhdr(skb);
1218        unsigned short tcp_opt = ntohs(req->tcp_opt);
1219        unsigned int tid = GET_TID(req);
1220        unsigned int atid = TID_TID_G(ntohl(req->tos_atid));
1221        struct tid_info *t = dev->rdev.lldi.tids;
1222        int ret;
1223
1224        ep = lookup_atid(t, atid);
1225
1226        pr_debug("ep %p tid %u snd_isn %u rcv_isn %u\n", ep, tid,
1227                 be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
1228
1229        mutex_lock(&ep->com.mutex);
1230        dst_confirm(ep->dst);
1231
1232        /* setup the hwtid for this connection */
1233        ep->hwtid = tid;
1234        cxgb4_insert_tid(t, ep, tid, ep->com.local_addr.ss_family);
1235        insert_ep_tid(ep);
1236
1237        ep->snd_seq = be32_to_cpu(req->snd_isn);
1238        ep->rcv_seq = be32_to_cpu(req->rcv_isn);
1239        ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
1240
1241        set_emss(ep, tcp_opt);
1242
1243        /* dealloc the atid */
1244        xa_erase_irq(&ep->com.dev->atids, atid);
1245        cxgb4_free_atid(t, atid);
1246        set_bit(ACT_ESTAB, &ep->com.history);
1247
1248        /* start MPA negotiation */
1249        ret = send_flowc(ep);
1250        if (ret)
1251                goto err;
1252        if (ep->retry_with_mpa_v1)
1253                ret = send_mpa_req(ep, skb, 1);
1254        else
1255                ret = send_mpa_req(ep, skb, mpa_rev);
1256        if (ret)
1257                goto err;
1258        mutex_unlock(&ep->com.mutex);
1259        return 0;
1260err:
1261        mutex_unlock(&ep->com.mutex);
1262        connect_reply_upcall(ep, -ENOMEM);
1263        c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
1264        return 0;
1265}
1266
1267static void close_complete_upcall(struct c4iw_ep *ep, int status)
1268{
1269        struct iw_cm_event event;
1270
1271        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1272        memset(&event, 0, sizeof(event));
1273        event.event = IW_CM_EVENT_CLOSE;
1274        event.status = status;
1275        if (ep->com.cm_id) {
1276                pr_debug("close complete delivered ep %p cm_id %p tid %u\n",
1277                         ep, ep->com.cm_id, ep->hwtid);
1278                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1279                deref_cm_id(&ep->com);
1280                set_bit(CLOSE_UPCALL, &ep->com.history);
1281        }
1282}
1283
1284static void peer_close_upcall(struct c4iw_ep *ep)
1285{
1286        struct iw_cm_event event;
1287
1288        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1289        memset(&event, 0, sizeof(event));
1290        event.event = IW_CM_EVENT_DISCONNECT;
1291        if (ep->com.cm_id) {
1292                pr_debug("peer close delivered ep %p cm_id %p tid %u\n",
1293                         ep, ep->com.cm_id, ep->hwtid);
1294                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1295                set_bit(DISCONN_UPCALL, &ep->com.history);
1296        }
1297}
1298
1299static void peer_abort_upcall(struct c4iw_ep *ep)
1300{
1301        struct iw_cm_event event;
1302
1303        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1304        memset(&event, 0, sizeof(event));
1305        event.event = IW_CM_EVENT_CLOSE;
1306        event.status = -ECONNRESET;
1307        if (ep->com.cm_id) {
1308                pr_debug("abort delivered ep %p cm_id %p tid %u\n", ep,
1309                         ep->com.cm_id, ep->hwtid);
1310                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1311                deref_cm_id(&ep->com);
1312                set_bit(ABORT_UPCALL, &ep->com.history);
1313        }
1314}
1315
1316static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1317{
1318        struct iw_cm_event event;
1319
1320        pr_debug("ep %p tid %u status %d\n",
1321                 ep, ep->hwtid, status);
1322        memset(&event, 0, sizeof(event));
1323        event.event = IW_CM_EVENT_CONNECT_REPLY;
1324        event.status = status;
1325        memcpy(&event.local_addr, &ep->com.local_addr,
1326               sizeof(ep->com.local_addr));
1327        memcpy(&event.remote_addr, &ep->com.remote_addr,
1328               sizeof(ep->com.remote_addr));
1329
1330        if ((status == 0) || (status == -ECONNREFUSED)) {
1331                if (!ep->tried_with_mpa_v1) {
1332                        /* this means MPA_v2 is used */
1333                        event.ord = ep->ird;
1334                        event.ird = ep->ord;
1335                        event.private_data_len = ep->plen -
1336                                sizeof(struct mpa_v2_conn_params);
1337                        event.private_data = ep->mpa_pkt +
1338                                sizeof(struct mpa_message) +
1339                                sizeof(struct mpa_v2_conn_params);
1340                } else {
1341                        /* this means MPA_v1 is used */
1342                        event.ord = cur_max_read_depth(ep->com.dev);
1343                        event.ird = cur_max_read_depth(ep->com.dev);
1344                        event.private_data_len = ep->plen;
1345                        event.private_data = ep->mpa_pkt +
1346                                sizeof(struct mpa_message);
1347                }
1348        }
1349
1350        pr_debug("ep %p tid %u status %d\n", ep,
1351                 ep->hwtid, status);
1352        set_bit(CONN_RPL_UPCALL, &ep->com.history);
1353        ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1354
1355        if (status < 0)
1356                deref_cm_id(&ep->com);
1357}
1358
1359static int connect_request_upcall(struct c4iw_ep *ep)
1360{
1361        struct iw_cm_event event;
1362        int ret;
1363
1364        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1365        memset(&event, 0, sizeof(event));
1366        event.event = IW_CM_EVENT_CONNECT_REQUEST;
1367        memcpy(&event.local_addr, &ep->com.local_addr,
1368               sizeof(ep->com.local_addr));
1369        memcpy(&event.remote_addr, &ep->com.remote_addr,
1370               sizeof(ep->com.remote_addr));
1371        event.provider_data = ep;
1372        if (!ep->tried_with_mpa_v1) {
1373                /* this means MPA_v2 is used */
1374                event.ord = ep->ord;
1375                event.ird = ep->ird;
1376                event.private_data_len = ep->plen -
1377                        sizeof(struct mpa_v2_conn_params);
1378                event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
1379                        sizeof(struct mpa_v2_conn_params);
1380        } else {
1381                /* this means MPA_v1 is used. Send max supported */
1382                event.ord = cur_max_read_depth(ep->com.dev);
1383                event.ird = cur_max_read_depth(ep->com.dev);
1384                event.private_data_len = ep->plen;
1385                event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
1386        }
1387        c4iw_get_ep(&ep->com);
1388        ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
1389                                                      &event);
1390        if (ret)
1391                c4iw_put_ep(&ep->com);
1392        set_bit(CONNREQ_UPCALL, &ep->com.history);
1393        c4iw_put_ep(&ep->parent_ep->com);
1394        return ret;
1395}
1396
1397static void established_upcall(struct c4iw_ep *ep)
1398{
1399        struct iw_cm_event event;
1400
1401        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1402        memset(&event, 0, sizeof(event));
1403        event.event = IW_CM_EVENT_ESTABLISHED;
1404        event.ird = ep->ord;
1405        event.ord = ep->ird;
1406        if (ep->com.cm_id) {
1407                pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1408                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1409                set_bit(ESTAB_UPCALL, &ep->com.history);
1410        }
1411}
1412
1413static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
1414{
1415        struct sk_buff *skb;
1416        u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
1417        u32 credit_dack;
1418
1419        pr_debug("ep %p tid %u credits %u\n",
1420                 ep, ep->hwtid, credits);
1421        skb = get_skb(NULL, wrlen, GFP_KERNEL);
1422        if (!skb) {
1423                pr_err("update_rx_credits - cannot alloc skb!\n");
1424                return 0;
1425        }
1426
1427        /*
1428         * If we couldn't specify the entire rcv window at connection setup
1429         * due to the limit in the number of bits in the RCV_BUFSIZ field,
1430         * then add the overage in to the credits returned.
1431         */
1432        if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
1433                credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
1434
1435        credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F |
1436                      RX_DACK_MODE_V(dack_mode);
1437
1438        cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx,
1439                            credit_dack);
1440
1441        c4iw_ofld_send(&ep->com.dev->rdev, skb);
1442        return credits;
1443}
1444
1445#define RELAXED_IRD_NEGOTIATION 1
1446
1447/*
1448 * process_mpa_reply - process streaming mode MPA reply
1449 *
1450 * Returns:
1451 *
1452 * 0 upon success indicating a connect request was delivered to the ULP
1453 * or the mpa request is incomplete but valid so far.
1454 *
1455 * 1 if a failure requires the caller to close the connection.
1456 *
1457 * 2 if a failure requires the caller to abort the connection.
1458 */
1459static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1460{
1461        struct mpa_message *mpa;
1462        struct mpa_v2_conn_params *mpa_v2_params;
1463        u16 plen;
1464        u16 resp_ird, resp_ord;
1465        u8 rtr_mismatch = 0, insuff_ird = 0;
1466        struct c4iw_qp_attributes attrs;
1467        enum c4iw_qp_attr_mask mask;
1468        int err;
1469        int disconnect = 0;
1470
1471        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1472
1473        /*
1474         * If we get more than the supported amount of private data
1475         * then we must fail this connection.
1476         */
1477        if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
1478                err = -EINVAL;
1479                goto err_stop_timer;
1480        }
1481
1482        /*
1483         * copy the new data into our accumulation buffer.
1484         */
1485        skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1486                                  skb->len);
1487        ep->mpa_pkt_len += skb->len;
1488
1489        /*
1490         * if we don't even have the mpa message, then bail.
1491         */
1492        if (ep->mpa_pkt_len < sizeof(*mpa))
1493                return 0;
1494        mpa = (struct mpa_message *) ep->mpa_pkt;
1495
1496        /* Validate MPA header. */
1497        if (mpa->revision > mpa_rev) {
1498                pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
1499                       __func__, mpa_rev, mpa->revision);
1500                err = -EPROTO;
1501                goto err_stop_timer;
1502        }
1503        if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1504                err = -EPROTO;
1505                goto err_stop_timer;
1506        }
1507
1508        plen = ntohs(mpa->private_data_size);
1509
1510        /*
1511         * Fail if there's too much private data.
1512         */
1513        if (plen > MPA_MAX_PRIVATE_DATA) {
1514                err = -EPROTO;
1515                goto err_stop_timer;
1516        }
1517
1518        /*
1519         * If plen does not account for pkt size
1520         */
1521        if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1522                err = -EPROTO;
1523                goto err_stop_timer;
1524        }
1525
1526        ep->plen = (u8) plen;
1527
1528        /*
1529         * If we don't have all the pdata yet, then bail.
1530         * We'll continue process when more data arrives.
1531         */
1532        if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1533                return 0;
1534
1535        if (mpa->flags & MPA_REJECT) {
1536                err = -ECONNREFUSED;
1537                goto err_stop_timer;
1538        }
1539
1540        /*
1541         * Stop mpa timer.  If it expired, then
1542         * we ignore the MPA reply.  process_timeout()
1543         * will abort the connection.
1544         */
1545        if (stop_ep_timer(ep))
1546                return 0;
1547
1548        /*
1549         * If we get here we have accumulated the entire mpa
1550         * start reply message including private data. And
1551         * the MPA header is valid.
1552         */
1553        __state_set(&ep->com, FPDU_MODE);
1554        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1555        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1556        ep->mpa_attr.version = mpa->revision;
1557        ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1558
1559        if (mpa->revision == 2) {
1560                ep->mpa_attr.enhanced_rdma_conn =
1561                        mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1562                if (ep->mpa_attr.enhanced_rdma_conn) {
1563                        mpa_v2_params = (struct mpa_v2_conn_params *)
1564                                (ep->mpa_pkt + sizeof(*mpa));
1565                        resp_ird = ntohs(mpa_v2_params->ird) &
1566                                MPA_V2_IRD_ORD_MASK;
1567                        resp_ord = ntohs(mpa_v2_params->ord) &
1568                                MPA_V2_IRD_ORD_MASK;
1569                        pr_debug("responder ird %u ord %u ep ird %u ord %u\n",
1570                                 resp_ird, resp_ord, ep->ird, ep->ord);
1571
1572                        /*
1573                         * This is a double-check. Ideally, below checks are
1574                         * not required since ird/ord stuff has been taken
1575                         * care of in c4iw_accept_cr
1576                         */
1577                        if (ep->ird < resp_ord) {
1578                                if (RELAXED_IRD_NEGOTIATION && resp_ord <=
1579                                    ep->com.dev->rdev.lldi.max_ordird_qp)
1580                                        ep->ird = resp_ord;
1581                                else
1582                                        insuff_ird = 1;
1583                        } else if (ep->ird > resp_ord) {
1584                                ep->ird = resp_ord;
1585                        }
1586                        if (ep->ord > resp_ird) {
1587                                if (RELAXED_IRD_NEGOTIATION)
1588                                        ep->ord = resp_ird;
1589                                else
1590                                        insuff_ird = 1;
1591                        }
1592                        if (insuff_ird) {
1593                                err = -ENOMEM;
1594                                ep->ird = resp_ord;
1595                                ep->ord = resp_ird;
1596                        }
1597
1598                        if (ntohs(mpa_v2_params->ird) &
1599                                        MPA_V2_PEER2PEER_MODEL) {
1600                                if (ntohs(mpa_v2_params->ord) &
1601                                                MPA_V2_RDMA_WRITE_RTR)
1602                                        ep->mpa_attr.p2p_type =
1603                                                FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1604                                else if (ntohs(mpa_v2_params->ord) &
1605                                                MPA_V2_RDMA_READ_RTR)
1606                                        ep->mpa_attr.p2p_type =
1607                                                FW_RI_INIT_P2PTYPE_READ_REQ;
1608                        }
1609                }
1610        } else if (mpa->revision == 1)
1611                if (peer2peer)
1612                        ep->mpa_attr.p2p_type = p2p_type;
1613
1614        pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = %d\n",
1615                 ep->mpa_attr.crc_enabled,
1616                 ep->mpa_attr.recv_marker_enabled,
1617                 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1618                 ep->mpa_attr.p2p_type, p2p_type);
1619
1620        /*
1621         * If responder's RTR does not match with that of initiator, assign
1622         * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
1623         * generated when moving QP to RTS state.
1624         * A TERM message will be sent after QP has moved to RTS state
1625         */
1626        if ((ep->mpa_attr.version == 2) && peer2peer &&
1627                        (ep->mpa_attr.p2p_type != p2p_type)) {
1628                ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1629                rtr_mismatch = 1;
1630        }
1631
1632        attrs.mpa_attr = ep->mpa_attr;
1633        attrs.max_ird = ep->ird;
1634        attrs.max_ord = ep->ord;
1635        attrs.llp_stream_handle = ep;
1636        attrs.next_state = C4IW_QP_STATE_RTS;
1637
1638        mask = C4IW_QP_ATTR_NEXT_STATE |
1639            C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
1640            C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
1641
1642        /* bind QP and TID with INIT_WR */
1643        err = c4iw_modify_qp(ep->com.qp->rhp,
1644                             ep->com.qp, mask, &attrs, 1);
1645        if (err)
1646                goto err;
1647
1648        /*
1649         * If responder's RTR requirement did not match with what initiator
1650         * supports, generate TERM message
1651         */
1652        if (rtr_mismatch) {
1653                pr_err("%s: RTR mismatch, sending TERM\n", __func__);
1654                attrs.layer_etype = LAYER_MPA | DDP_LLP;
1655                attrs.ecode = MPA_NOMATCH_RTR;
1656                attrs.next_state = C4IW_QP_STATE_TERMINATE;
1657                attrs.send_term = 1;
1658                err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1659                                C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1660                err = -ENOMEM;
1661                disconnect = 1;
1662                goto out;
1663        }
1664
1665        /*
1666         * Generate TERM if initiator IRD is not sufficient for responder
1667         * provided ORD. Currently, we do the same behaviour even when
1668         * responder provided IRD is also not sufficient as regards to
1669         * initiator ORD.
1670         */
1671        if (insuff_ird) {
1672                pr_err("%s: Insufficient IRD, sending TERM\n", __func__);
1673                attrs.layer_etype = LAYER_MPA | DDP_LLP;
1674                attrs.ecode = MPA_INSUFF_IRD;
1675                attrs.next_state = C4IW_QP_STATE_TERMINATE;
1676                attrs.send_term = 1;
1677                err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1678                                C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1679                err = -ENOMEM;
1680                disconnect = 1;
1681                goto out;
1682        }
1683        goto out;
1684err_stop_timer:
1685        stop_ep_timer(ep);
1686err:
1687        disconnect = 2;
1688out:
1689        connect_reply_upcall(ep, err);
1690        return disconnect;
1691}
1692
1693/*
1694 * process_mpa_request - process streaming mode MPA request
1695 *
1696 * Returns:
1697 *
1698 * 0 upon success indicating a connect request was delivered to the ULP
1699 * or the mpa request is incomplete but valid so far.
1700 *
1701 * 1 if a failure requires the caller to close the connection.
1702 *
1703 * 2 if a failure requires the caller to abort the connection.
1704 */
1705static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1706{
1707        struct mpa_message *mpa;
1708        struct mpa_v2_conn_params *mpa_v2_params;
1709        u16 plen;
1710
1711        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1712
1713        /*
1714         * If we get more than the supported amount of private data
1715         * then we must fail this connection.
1716         */
1717        if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt))
1718                goto err_stop_timer;
1719
1720        pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
1721
1722        /*
1723         * Copy the new data into our accumulation buffer.
1724         */
1725        skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1726                                  skb->len);
1727        ep->mpa_pkt_len += skb->len;
1728
1729        /*
1730         * If we don't even have the mpa message, then bail.
1731         * We'll continue process when more data arrives.
1732         */
1733        if (ep->mpa_pkt_len < sizeof(*mpa))
1734                return 0;
1735
1736        pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
1737        mpa = (struct mpa_message *) ep->mpa_pkt;
1738
1739        /*
1740         * Validate MPA Header.
1741         */
1742        if (mpa->revision > mpa_rev) {
1743                pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
1744                       __func__, mpa_rev, mpa->revision);
1745                goto err_stop_timer;
1746        }
1747
1748        if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
1749                goto err_stop_timer;
1750
1751        plen = ntohs(mpa->private_data_size);
1752
1753        /*
1754         * Fail if there's too much private data.
1755         */
1756        if (plen > MPA_MAX_PRIVATE_DATA)
1757                goto err_stop_timer;
1758
1759        /*
1760         * If plen does not account for pkt size
1761         */
1762        if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
1763                goto err_stop_timer;
1764        ep->plen = (u8) plen;
1765
1766        /*
1767         * If we don't have all the pdata yet, then bail.
1768         */
1769        if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1770                return 0;
1771
1772        /*
1773         * If we get here we have accumulated the entire mpa
1774         * start reply message including private data.
1775         */
1776        ep->mpa_attr.initiator = 0;
1777        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1778        ep->mpa_attr.recv_marker_enabled = markers_enabled;
1779        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1780        ep->mpa_attr.version = mpa->revision;
1781        if (mpa->revision == 1)
1782                ep->tried_with_mpa_v1 = 1;
1783        ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1784
1785        if (mpa->revision == 2) {
1786                ep->mpa_attr.enhanced_rdma_conn =
1787                        mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1788                if (ep->mpa_attr.enhanced_rdma_conn) {
1789                        mpa_v2_params = (struct mpa_v2_conn_params *)
1790                                (ep->mpa_pkt + sizeof(*mpa));
1791                        ep->ird = ntohs(mpa_v2_params->ird) &
1792                                MPA_V2_IRD_ORD_MASK;
1793                        ep->ird = min_t(u32, ep->ird,
1794                                        cur_max_read_depth(ep->com.dev));
1795                        ep->ord = ntohs(mpa_v2_params->ord) &
1796                                MPA_V2_IRD_ORD_MASK;
1797                        ep->ord = min_t(u32, ep->ord,
1798                                        cur_max_read_depth(ep->com.dev));
1799                        pr_debug("initiator ird %u ord %u\n",
1800                                 ep->ird, ep->ord);
1801                        if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
1802                                if (peer2peer) {
1803                                        if (ntohs(mpa_v2_params->ord) &
1804                                                        MPA_V2_RDMA_WRITE_RTR)
1805                                                ep->mpa_attr.p2p_type =
1806                                                FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1807                                        else if (ntohs(mpa_v2_params->ord) &
1808                                                        MPA_V2_RDMA_READ_RTR)
1809                                                ep->mpa_attr.p2p_type =
1810                                                FW_RI_INIT_P2PTYPE_READ_REQ;
1811                                }
1812                }
1813        } else if (mpa->revision == 1)
1814                if (peer2peer)
1815                        ep->mpa_attr.p2p_type = p2p_type;
1816
1817        pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d\n",
1818                 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1819                 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1820                 ep->mpa_attr.p2p_type);
1821
1822        __state_set(&ep->com, MPA_REQ_RCVD);
1823
1824        /* drive upcall */
1825        mutex_lock_nested(&ep->parent_ep->com.mutex, SINGLE_DEPTH_NESTING);
1826        if (ep->parent_ep->com.state != DEAD) {
1827                if (connect_request_upcall(ep))
1828                        goto err_unlock_parent;
1829        } else {
1830                goto err_unlock_parent;
1831        }
1832        mutex_unlock(&ep->parent_ep->com.mutex);
1833        return 0;
1834
1835err_unlock_parent:
1836        mutex_unlock(&ep->parent_ep->com.mutex);
1837        goto err_out;
1838err_stop_timer:
1839        (void)stop_ep_timer(ep);
1840err_out:
1841        return 2;
1842}
1843
1844static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1845{
1846        struct c4iw_ep *ep;
1847        struct cpl_rx_data *hdr = cplhdr(skb);
1848        unsigned int dlen = ntohs(hdr->len);
1849        unsigned int tid = GET_TID(hdr);
1850        __u8 status = hdr->status;
1851        int disconnect = 0;
1852
1853        ep = get_ep_from_tid(dev, tid);
1854        if (!ep)
1855                return 0;
1856        pr_debug("ep %p tid %u dlen %u\n", ep, ep->hwtid, dlen);
1857        skb_pull(skb, sizeof(*hdr));
1858        skb_trim(skb, dlen);
1859        mutex_lock(&ep->com.mutex);
1860
1861        switch (ep->com.state) {
1862        case MPA_REQ_SENT:
1863                update_rx_credits(ep, dlen);
1864                ep->rcv_seq += dlen;
1865                disconnect = process_mpa_reply(ep, skb);
1866                break;
1867        case MPA_REQ_WAIT:
1868                update_rx_credits(ep, dlen);
1869                ep->rcv_seq += dlen;
1870                disconnect = process_mpa_request(ep, skb);
1871                break;
1872        case FPDU_MODE: {
1873                struct c4iw_qp_attributes attrs;
1874
1875                update_rx_credits(ep, dlen);
1876                if (status)
1877                        pr_err("%s Unexpected streaming data." \
1878                               " qpid %u ep %p state %d tid %u status %d\n",
1879                               __func__, ep->com.qp->wq.sq.qid, ep,
1880                               ep->com.state, ep->hwtid, status);
1881                attrs.next_state = C4IW_QP_STATE_TERMINATE;
1882                c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1883                               C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1884                disconnect = 1;
1885                break;
1886        }
1887        default:
1888                break;
1889        }
1890        mutex_unlock(&ep->com.mutex);
1891        if (disconnect)
1892                c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
1893        c4iw_put_ep(&ep->com);
1894        return 0;
1895}
1896
1897static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx)
1898{
1899        enum chip_type adapter_type;
1900
1901        adapter_type = ep->com.dev->rdev.lldi.adapter_type;
1902
1903        /*
1904         * If this TCB had a srq buffer cached, then we must complete
1905         * it. For user mode, that means saving the srqidx in the
1906         * user/kernel status page for this qp.  For kernel mode, just
1907         * synthesize the CQE now.
1908         */
1909        if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) {
1910                if (ep->com.qp->ibqp.uobject)
1911                        t4_set_wq_in_error(&ep->com.qp->wq, srqidx);
1912                else
1913                        c4iw_flush_srqidx(ep->com.qp, srqidx);
1914        }
1915}
1916
1917static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1918{
1919        u32 srqidx;
1920        struct c4iw_ep *ep;
1921        struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
1922        int release = 0;
1923        unsigned int tid = GET_TID(rpl);
1924
1925        ep = get_ep_from_tid(dev, tid);
1926        if (!ep) {
1927                pr_warn("Abort rpl to freed endpoint\n");
1928                return 0;
1929        }
1930
1931        if (ep->com.qp && ep->com.qp->srq) {
1932                srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(rpl->srqidx_status));
1933                complete_cached_srq_buffers(ep, srqidx ? srqidx : ep->srqe_idx);
1934        }
1935
1936        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1937        mutex_lock(&ep->com.mutex);
1938        switch (ep->com.state) {
1939        case ABORTING:
1940                c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
1941                __state_set(&ep->com, DEAD);
1942                release = 1;
1943                break;
1944        default:
1945                pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
1946                break;
1947        }
1948        mutex_unlock(&ep->com.mutex);
1949
1950        if (release) {
1951                close_complete_upcall(ep, -ECONNRESET);
1952                release_ep_resources(ep);
1953        }
1954        c4iw_put_ep(&ep->com);
1955        return 0;
1956}
1957
1958static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
1959{
1960        struct sk_buff *skb;
1961        struct fw_ofld_connection_wr *req;
1962        unsigned int mtu_idx;
1963        u32 wscale;
1964        struct sockaddr_in *sin;
1965        int win;
1966
1967        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1968        req = __skb_put_zero(skb, sizeof(*req));
1969        req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR));
1970        req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
1971        req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
1972                                     ep->com.dev->rdev.lldi.ports[0],
1973                                     ep->l2t));
1974        sin = (struct sockaddr_in *)&ep->com.local_addr;
1975        req->le.lport = sin->sin_port;
1976        req->le.u.ipv4.lip = sin->sin_addr.s_addr;
1977        sin = (struct sockaddr_in *)&ep->com.remote_addr;
1978        req->le.pport = sin->sin_port;
1979        req->le.u.ipv4.pip = sin->sin_addr.s_addr;
1980        req->tcb.t_state_to_astid =
1981                        htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_SENT) |
1982                        FW_OFLD_CONNECTION_WR_ASTID_V(atid));
1983        req->tcb.cplrxdataack_cplpassacceptrpl =
1984                        htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F);
1985        req->tcb.tx_max = (__force __be32) jiffies;
1986        req->tcb.rcv_adv = htons(1);
1987        cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
1988                      enable_tcp_timestamps,
1989                      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
1990        wscale = cxgb_compute_wscale(rcv_win);
1991
1992        /*
1993         * Specify the largest window that will fit in opt0. The
1994         * remainder will be specified in the rx_data_ack.
1995         */
1996        win = ep->rcv_win >> 10;
1997        if (win > RCV_BUFSIZ_M)
1998                win = RCV_BUFSIZ_M;
1999
2000        req->tcb.opt0 = (__force __be64) (TCAM_BYPASS_F |
2001                (nocong ? NO_CONG_F : 0) |
2002                KEEP_ALIVE_F |
2003                DELACK_F |
2004                WND_SCALE_V(wscale) |
2005                MSS_IDX_V(mtu_idx) |
2006                L2T_IDX_V(ep->l2t->idx) |
2007                TX_CHAN_V(ep->tx_chan) |
2008                SMAC_SEL_V(ep->smac_idx) |
2009                DSCP_V(ep->tos >> 2) |
2010                ULP_MODE_V(ULP_MODE_TCPDDP) |
2011                RCV_BUFSIZ_V(win));
2012        req->tcb.opt2 = (__force __be32) (PACE_V(1) |
2013                TX_QUEUE_V(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) |
2014                RX_CHANNEL_V(0) |
2015                CCTRL_ECN_V(enable_ecn) |
2016                RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid));
2017        if (enable_tcp_timestamps)
2018                req->tcb.opt2 |= (__force __be32)TSTAMPS_EN_F;
2019        if (enable_tcp_sack)
2020                req->tcb.opt2 |= (__force __be32)SACK_EN_F;
2021        if (wscale && enable_tcp_window_scaling)
2022                req->tcb.opt2 |= (__force __be32)WND_SCALE_EN_F;
2023        req->tcb.opt0 = cpu_to_be64((__force u64)req->tcb.opt0);
2024        req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2);
2025        set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
2026        set_bit(ACT_OFLD_CONN, &ep->com.history);
2027        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
2028}
2029
2030/*
2031 * Some of the error codes above implicitly indicate that there is no TID
2032 * allocated with the result of an ACT_OPEN.  We use this predicate to make
2033 * that explicit.
2034 */
2035static inline int act_open_has_tid(int status)
2036{
2037        return (status != CPL_ERR_TCAM_PARITY &&
2038                status != CPL_ERR_TCAM_MISS &&
2039                status != CPL_ERR_TCAM_FULL &&
2040                status != CPL_ERR_CONN_EXIST_SYNRECV &&
2041                status != CPL_ERR_CONN_EXIST);
2042}
2043
2044static char *neg_adv_str(unsigned int status)
2045{
2046        switch (status) {
2047        case CPL_ERR_RTX_NEG_ADVICE:
2048                return "Retransmit timeout";
2049        case CPL_ERR_PERSIST_NEG_ADVICE:
2050                return "Persist timeout";
2051        case CPL_ERR_KEEPALV_NEG_ADVICE:
2052                return "Keepalive timeout";
2053        default:
2054                return "Unknown";
2055        }
2056}
2057
2058static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
2059{
2060        ep->snd_win = snd_win;
2061        ep->rcv_win = rcv_win;
2062        pr_debug("snd_win %d rcv_win %d\n",
2063                 ep->snd_win, ep->rcv_win);
2064}
2065
2066#define ACT_OPEN_RETRY_COUNT 2
2067
2068static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
2069                     struct dst_entry *dst, struct c4iw_dev *cdev,
2070                     bool clear_mpa_v1, enum chip_type adapter_type, u8 tos)
2071{
2072        struct neighbour *n;
2073        int err, step;
2074        struct net_device *pdev;
2075
2076        n = dst_neigh_lookup(dst, peer_ip);
2077        if (!n)
2078                return -ENODEV;
2079
2080        rcu_read_lock();
2081        err = -ENOMEM;
2082        if (n->dev->flags & IFF_LOOPBACK) {
2083                if (iptype == 4)
2084                        pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip);
2085                else if (IS_ENABLED(CONFIG_IPV6))
2086                        for_each_netdev(&init_net, pdev) {
2087                                if (ipv6_chk_addr(&init_net,
2088                                                  (struct in6_addr *)peer_ip,
2089                                                  pdev, 1))
2090                                        break;
2091                        }
2092                else
2093                        pdev = NULL;
2094
2095                if (!pdev) {
2096                        err = -ENODEV;
2097                        goto out;
2098                }
2099                ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
2100                                        n, pdev, rt_tos2priority(tos));
2101                if (!ep->l2t) {
2102                        dev_put(pdev);
2103                        goto out;
2104                }
2105                ep->mtu = pdev->mtu;
2106                ep->tx_chan = cxgb4_port_chan(pdev);
2107                ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
2108                step = cdev->rdev.lldi.ntxq /
2109                        cdev->rdev.lldi.nchan;
2110                ep->txq_idx = cxgb4_port_idx(pdev) * step;
2111                step = cdev->rdev.lldi.nrxq /
2112                        cdev->rdev.lldi.nchan;
2113                ep->ctrlq_idx = cxgb4_port_idx(pdev);
2114                ep->rss_qid = cdev->rdev.lldi.rxq_ids[
2115                        cxgb4_port_idx(pdev) * step];
2116                set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
2117                dev_put(pdev);
2118        } else {
2119                pdev = get_real_dev(n->dev);
2120                ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
2121                                        n, pdev, rt_tos2priority(tos));
2122                if (!ep->l2t)
2123                        goto out;
2124                ep->mtu = dst_mtu(dst);
2125                ep->tx_chan = cxgb4_port_chan(pdev);
2126                ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
2127                step = cdev->rdev.lldi.ntxq /
2128                        cdev->rdev.lldi.nchan;
2129                ep->txq_idx = cxgb4_port_idx(pdev) * step;
2130                ep->ctrlq_idx = cxgb4_port_idx(pdev);
2131                step = cdev->rdev.lldi.nrxq /
2132                        cdev->rdev.lldi.nchan;
2133                ep->rss_qid = cdev->rdev.lldi.rxq_ids[
2134                        cxgb4_port_idx(pdev) * step];
2135                set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
2136
2137                if (clear_mpa_v1) {
2138                        ep->retry_with_mpa_v1 = 0;
2139                        ep->tried_with_mpa_v1 = 0;
2140                }
2141        }
2142        err = 0;
2143out:
2144        rcu_read_unlock();
2145
2146        neigh_release(n);
2147
2148        return err;
2149}
2150
2151static int c4iw_reconnect(struct c4iw_ep *ep)
2152{
2153        int err = 0;
2154        int size = 0;
2155        struct sockaddr_in *laddr = (struct sockaddr_in *)
2156                                    &ep->com.cm_id->m_local_addr;
2157        struct sockaddr_in *raddr = (struct sockaddr_in *)
2158                                    &ep->com.cm_id->m_remote_addr;
2159        struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)
2160                                      &ep->com.cm_id->m_local_addr;
2161        struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
2162                                      &ep->com.cm_id->m_remote_addr;
2163        int iptype;
2164        __u8 *ra;
2165
2166        pr_debug("qp %p cm_id %p\n", ep->com.qp, ep->com.cm_id);
2167        c4iw_init_wr_wait(ep->com.wr_waitp);
2168
2169        /* When MPA revision is different on nodes, the node with MPA_rev=2
2170         * tries to reconnect with MPA_rev 1 for the same EP through
2171         * c4iw_reconnect(), where the same EP is assigned with new tid for
2172         * further connection establishment. As we are using the same EP pointer
2173         * for reconnect, few skbs are used during the previous c4iw_connect(),
2174         * which leaves the EP with inadequate skbs for further
2175         * c4iw_reconnect(), Further causing a crash due to an empty
2176         * skb_list() during peer_abort(). Allocate skbs which is already used.
2177         */
2178        size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list));
2179        if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) {
2180                err = -ENOMEM;
2181                goto fail1;
2182        }
2183
2184        /*
2185         * Allocate an active TID to initiate a TCP connection.
2186         */
2187        ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
2188        if (ep->atid == -1) {
2189                pr_err("%s - cannot alloc atid\n", __func__);
2190                err = -ENOMEM;
2191                goto fail2;
2192        }
2193        err = xa_insert_irq(&ep->com.dev->atids, ep->atid, ep, GFP_KERNEL);
2194        if (err)
2195                goto fail2a;
2196
2197        /* find a route */
2198        if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
2199                ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev,
2200                                          laddr->sin_addr.s_addr,
2201                                          raddr->sin_addr.s_addr,
2202                                          laddr->sin_port,
2203                                          raddr->sin_port, ep->com.cm_id->tos);
2204                iptype = 4;
2205                ra = (__u8 *)&raddr->sin_addr;
2206        } else {
2207                ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi,
2208                                           get_real_dev,
2209                                           laddr6->sin6_addr.s6_addr,
2210                                           raddr6->sin6_addr.s6_addr,
2211                                           laddr6->sin6_port,
2212                                           raddr6->sin6_port,
2213                                           ep->com.cm_id->tos,
2214                                           raddr6->sin6_scope_id);
2215                iptype = 6;
2216                ra = (__u8 *)&raddr6->sin6_addr;
2217        }
2218        if (!ep->dst) {
2219                pr_err("%s - cannot find route\n", __func__);
2220                err = -EHOSTUNREACH;
2221                goto fail3;
2222        }
2223        err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false,
2224                        ep->com.dev->rdev.lldi.adapter_type,
2225                        ep->com.cm_id->tos);
2226        if (err) {
2227                pr_err("%s - cannot alloc l2e\n", __func__);
2228                goto fail4;
2229        }
2230
2231        pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
2232                 ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
2233                 ep->l2t->idx);
2234
2235        state_set(&ep->com, CONNECTING);
2236        ep->tos = ep->com.cm_id->tos;
2237
2238        /* send connect request to rnic */
2239        err = send_connect(ep);
2240        if (!err)
2241                goto out;
2242
2243        cxgb4_l2t_release(ep->l2t);
2244fail4:
2245        dst_release(ep->dst);
2246fail3:
2247        xa_erase_irq(&ep->com.dev->atids, ep->atid);
2248fail2a:
2249        cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
2250fail2:
2251        /*
2252         * remember to send notification to upper layer.
2253         * We are in here so the upper layer is not aware that this is
2254         * re-connect attempt and so, upper layer is still waiting for
2255         * response of 1st connect request.
2256         */
2257        connect_reply_upcall(ep, -ECONNRESET);
2258fail1:
2259        c4iw_put_ep(&ep->com);
2260out:
2261        return err;
2262}
2263
2264static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2265{
2266        struct c4iw_ep *ep;
2267        struct cpl_act_open_rpl *rpl = cplhdr(skb);
2268        unsigned int atid = TID_TID_G(AOPEN_ATID_G(
2269                                      ntohl(rpl->atid_status)));
2270        struct tid_info *t = dev->rdev.lldi.tids;
2271        int status = AOPEN_STATUS_G(ntohl(rpl->atid_status));
2272        struct sockaddr_in *la;
2273        struct sockaddr_in *ra;
2274        struct sockaddr_in6 *la6;
2275        struct sockaddr_in6 *ra6;
2276        int ret = 0;
2277
2278        ep = lookup_atid(t, atid);
2279        la = (struct sockaddr_in *)&ep->com.local_addr;
2280        ra = (struct sockaddr_in *)&ep->com.remote_addr;
2281        la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
2282        ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
2283
2284        pr_debug("ep %p atid %u status %u errno %d\n", ep, atid,
2285                 status, status2errno(status));
2286
2287        if (cxgb_is_neg_adv(status)) {
2288                pr_debug("Connection problems for atid %u status %u (%s)\n",
2289                         atid, status, neg_adv_str(status));
2290                ep->stats.connect_neg_adv++;
2291                mutex_lock(&dev->rdev.stats.lock);
2292                dev->rdev.stats.neg_adv++;
2293                mutex_unlock(&dev->rdev.stats.lock);
2294                return 0;
2295        }
2296
2297        set_bit(ACT_OPEN_RPL, &ep->com.history);
2298
2299        /*
2300         * Log interesting failures.
2301         */
2302        switch (status) {
2303        case CPL_ERR_CONN_RESET:
2304        case CPL_ERR_CONN_TIMEDOUT:
2305                break;
2306        case CPL_ERR_TCAM_FULL:
2307                mutex_lock(&dev->rdev.stats.lock);
2308                dev->rdev.stats.tcam_full++;
2309                mutex_unlock(&dev->rdev.stats.lock);
2310                if (ep->com.local_addr.ss_family == AF_INET &&
2311                    dev->rdev.lldi.enable_fw_ofld_conn) {
2312                        ret = send_fw_act_open_req(ep, TID_TID_G(AOPEN_ATID_G(
2313                                                   ntohl(rpl->atid_status))));
2314                        if (ret)
2315                                goto fail;
2316                        return 0;
2317                }
2318                break;
2319        case CPL_ERR_CONN_EXIST:
2320                if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
2321                        set_bit(ACT_RETRY_INUSE, &ep->com.history);
2322                        if (ep->com.remote_addr.ss_family == AF_INET6) {
2323                                struct sockaddr_in6 *sin6 =
2324                                                (struct sockaddr_in6 *)
2325                                                &ep->com.local_addr;
2326                                cxgb4_clip_release(
2327                                                ep->com.dev->rdev.lldi.ports[0],
2328                                                (const u32 *)
2329                                                &sin6->sin6_addr.s6_addr, 1);
2330                        }
2331                        xa_erase_irq(&ep->com.dev->atids, atid);
2332                        cxgb4_free_atid(t, atid);
2333                        dst_release(ep->dst);
2334                        cxgb4_l2t_release(ep->l2t);
2335                        c4iw_reconnect(ep);
2336                        return 0;
2337                }
2338                break;
2339        default:
2340                if (ep->com.local_addr.ss_family == AF_INET) {
2341                        pr_info("Active open failure - atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
2342                                atid, status, status2errno(status),
2343                                &la->sin_addr.s_addr, ntohs(la->sin_port),
2344                                &ra->sin_addr.s_addr, ntohs(ra->sin_port));
2345                } else {
2346                        pr_info("Active open failure - atid %u status %u errno %d %pI6:%u->%pI6:%u\n",
2347                                atid, status, status2errno(status),
2348                                la6->sin6_addr.s6_addr, ntohs(la6->sin6_port),
2349                                ra6->sin6_addr.s6_addr, ntohs(ra6->sin6_port));
2350                }
2351                break;
2352        }
2353
2354fail:
2355        connect_reply_upcall(ep, status2errno(status));
2356        state_set(&ep->com, DEAD);
2357
2358        if (ep->com.remote_addr.ss_family == AF_INET6) {
2359                struct sockaddr_in6 *sin6 =
2360                        (struct sockaddr_in6 *)&ep->com.local_addr;
2361                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
2362                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2363        }
2364        if (status && act_open_has_tid(status))
2365                cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl),
2366                                 ep->com.local_addr.ss_family);
2367
2368        xa_erase_irq(&ep->com.dev->atids, atid);
2369        cxgb4_free_atid(t, atid);
2370        dst_release(ep->dst);
2371        cxgb4_l2t_release(ep->l2t);
2372        c4iw_put_ep(&ep->com);
2373
2374        return 0;
2375}
2376
2377static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2378{
2379        struct cpl_pass_open_rpl *rpl = cplhdr(skb);
2380        unsigned int stid = GET_TID(rpl);
2381        struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
2382
2383        if (!ep) {
2384                pr_warn("%s stid %d lookup failure!\n", __func__, stid);
2385                goto out;
2386        }
2387        pr_debug("ep %p status %d error %d\n", ep,
2388                 rpl->status, status2errno(rpl->status));
2389        c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status));
2390        c4iw_put_ep(&ep->com);
2391out:
2392        return 0;
2393}
2394
2395static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2396{
2397        struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
2398        unsigned int stid = GET_TID(rpl);
2399        struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
2400
2401        if (!ep) {
2402                pr_warn("%s stid %d lookup failure!\n", __func__, stid);
2403                goto out;
2404        }
2405        pr_debug("ep %p\n", ep);
2406        c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status));
2407        c4iw_put_ep(&ep->com);
2408out:
2409        return 0;
2410}
2411
2412static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
2413                     struct cpl_pass_accept_req *req)
2414{
2415        struct cpl_pass_accept_rpl *rpl;
2416        unsigned int mtu_idx;
2417        u64 opt0;
2418        u32 opt2;
2419        u32 wscale;
2420        struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
2421        int win;
2422        enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
2423
2424        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2425        cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
2426                      enable_tcp_timestamps && req->tcpopt.tstamp,
2427                      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
2428        wscale = cxgb_compute_wscale(rcv_win);
2429
2430        /*
2431         * Specify the largest window that will fit in opt0. The
2432         * remainder will be specified in the rx_data_ack.
2433         */
2434        win = ep->rcv_win >> 10;
2435        if (win > RCV_BUFSIZ_M)
2436                win = RCV_BUFSIZ_M;
2437        opt0 = (nocong ? NO_CONG_F : 0) |
2438               KEEP_ALIVE_F |
2439               DELACK_F |
2440               WND_SCALE_V(wscale) |
2441               MSS_IDX_V(mtu_idx) |
2442               L2T_IDX_V(ep->l2t->idx) |
2443               TX_CHAN_V(ep->tx_chan) |
2444               SMAC_SEL_V(ep->smac_idx) |
2445               DSCP_V(ep->tos >> 2) |
2446               ULP_MODE_V(ULP_MODE_TCPDDP) |
2447               RCV_BUFSIZ_V(win);
2448        opt2 = RX_CHANNEL_V(0) |
2449               RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
2450
2451        if (enable_tcp_timestamps && req->tcpopt.tstamp)
2452                opt2 |= TSTAMPS_EN_F;
2453        if (enable_tcp_sack && req->tcpopt.sack)
2454                opt2 |= SACK_EN_F;
2455        if (wscale && enable_tcp_window_scaling)
2456                opt2 |= WND_SCALE_EN_F;
2457        if (enable_ecn) {
2458                const struct tcphdr *tcph;
2459                u32 hlen = ntohl(req->hdr_len);
2460
2461                if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5)
2462                        tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
2463                                IP_HDR_LEN_G(hlen);
2464                else
2465                        tcph = (const void *)(req + 1) +
2466                                T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen);
2467                if (tcph->ece && tcph->cwr)
2468                        opt2 |= CCTRL_ECN_V(1);
2469        }
2470
2471        skb_get(skb);
2472        rpl = cplhdr(skb);
2473        if (!is_t4(adapter_type)) {
2474                skb_trim(skb, roundup(sizeof(*rpl5), 16));
2475                rpl5 = (void *)rpl;
2476                INIT_TP_WR(rpl5, ep->hwtid);
2477        } else {
2478                skb_trim(skb, sizeof(*rpl));
2479                INIT_TP_WR(rpl, ep->hwtid);
2480        }
2481        OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
2482                                                    ep->hwtid));
2483
2484        if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
2485                u32 isn = (prandom_u32() & ~7UL) - 1;
2486                opt2 |= T5_OPT_2_VALID_F;
2487                opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
2488                opt2 |= T5_ISS_F;
2489                rpl5 = (void *)rpl;
2490                memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
2491                if (peer2peer)
2492                        isn += 4;
2493                rpl5->iss = cpu_to_be32(isn);
2494                pr_debug("iss %u\n", be32_to_cpu(rpl5->iss));
2495        }
2496
2497        rpl->opt0 = cpu_to_be64(opt0);
2498        rpl->opt2 = cpu_to_be32(opt2);
2499        set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
2500        t4_set_arp_err_handler(skb, ep, pass_accept_rpl_arp_failure);
2501
2502        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
2503}
2504
2505static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
2506{
2507        pr_debug("c4iw_dev %p tid %u\n", dev, hwtid);
2508        skb_trim(skb, sizeof(struct cpl_tid_release));
2509        release_tid(&dev->rdev, hwtid, skb);
2510        return;
2511}
2512
2513static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2514{
2515        struct c4iw_ep *child_ep = NULL, *parent_ep;
2516        struct cpl_pass_accept_req *req = cplhdr(skb);
2517        unsigned int stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
2518        struct tid_info *t = dev->rdev.lldi.tids;
2519        unsigned int hwtid = GET_TID(req);
2520        struct dst_entry *dst;
2521        __u8 local_ip[16], peer_ip[16];
2522        __be16 local_port, peer_port;
2523        struct sockaddr_in6 *sin6;
2524        int err;
2525        u16 peer_mss = ntohs(req->tcpopt.mss);
2526        int iptype;
2527        unsigned short hdrs;
2528        u8 tos;
2529
2530        parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
2531        if (!parent_ep) {
2532                pr_err("%s connect request on invalid stid %d\n",
2533                       __func__, stid);
2534                goto reject;
2535        }
2536
2537        if (state_read(&parent_ep->com) != LISTEN) {
2538                pr_err("%s - listening ep not in LISTEN\n", __func__);
2539                goto reject;
2540        }
2541
2542        if (parent_ep->com.cm_id->tos_set)
2543                tos = parent_ep->com.cm_id->tos;
2544        else
2545                tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
2546
2547        cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
2548                        &iptype, local_ip, peer_ip, &local_port, &peer_port);
2549
2550        /* Find output route */
2551        if (iptype == 4)  {
2552                pr_debug("parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
2553                         , parent_ep, hwtid,
2554                         local_ip, peer_ip, ntohs(local_port),
2555                         ntohs(peer_port), peer_mss);
2556                dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
2557                                      *(__be32 *)local_ip, *(__be32 *)peer_ip,
2558                                      local_port, peer_port, tos);
2559        } else {
2560                pr_debug("parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
2561                         , parent_ep, hwtid,
2562                         local_ip, peer_ip, ntohs(local_port),
2563                         ntohs(peer_port), peer_mss);
2564                dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
2565                                local_ip, peer_ip, local_port, peer_port,
2566                                tos,
2567                                ((struct sockaddr_in6 *)
2568                                 &parent_ep->com.local_addr)->sin6_scope_id);
2569        }
2570        if (!dst) {
2571                pr_err("%s - failed to find dst entry!\n", __func__);
2572                goto reject;
2573        }
2574
2575        child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
2576        if (!child_ep) {
2577                pr_err("%s - failed to allocate ep entry!\n", __func__);
2578                dst_release(dst);
2579                goto reject;
2580        }
2581
2582        err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
2583                        parent_ep->com.dev->rdev.lldi.adapter_type, tos);
2584        if (err) {
2585                pr_err("%s - failed to allocate l2t entry!\n", __func__);
2586                dst_release(dst);
2587                kfree(child_ep);
2588                goto reject;
2589        }
2590
2591        hdrs = ((iptype == 4) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) +
2592               sizeof(struct tcphdr) +
2593               ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
2594        if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
2595                child_ep->mtu = peer_mss + hdrs;
2596
2597        skb_queue_head_init(&child_ep->com.ep_skb_list);
2598        if (alloc_ep_skb_list(&child_ep->com.ep_skb_list, CN_MAX_CON_BUF))
2599                goto fail;
2600
2601        state_set(&child_ep->com, CONNECTING);
2602        child_ep->com.dev = dev;
2603        child_ep->com.cm_id = NULL;
2604
2605        if (iptype == 4) {
2606                struct sockaddr_in *sin = (struct sockaddr_in *)
2607                        &child_ep->com.local_addr;
2608
2609                sin->sin_family = AF_INET;
2610                sin->sin_port = local_port;
2611                sin->sin_addr.s_addr = *(__be32 *)local_ip;
2612
2613                sin = (struct sockaddr_in *)&child_ep->com.local_addr;
2614                sin->sin_family = AF_INET;
2615                sin->sin_port = ((struct sockaddr_in *)
2616                                 &parent_ep->com.local_addr)->sin_port;
2617                sin->sin_addr.s_addr = *(__be32 *)local_ip;
2618
2619                sin = (struct sockaddr_in *)&child_ep->com.remote_addr;
2620                sin->sin_family = AF_INET;
2621                sin->sin_port = peer_port;
2622                sin->sin_addr.s_addr = *(__be32 *)peer_ip;
2623        } else {
2624                sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2625                sin6->sin6_family = PF_INET6;
2626                sin6->sin6_port = local_port;
2627                memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2628
2629                sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2630                sin6->sin6_family = PF_INET6;
2631                sin6->sin6_port = ((struct sockaddr_in6 *)
2632                                   &parent_ep->com.local_addr)->sin6_port;
2633                memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2634
2635                sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr;
2636                sin6->sin6_family = PF_INET6;
2637                sin6->sin6_port = peer_port;
2638                memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
2639        }
2640
2641        c4iw_get_ep(&parent_ep->com);
2642        child_ep->parent_ep = parent_ep;
2643        child_ep->tos = tos;
2644        child_ep->dst = dst;
2645        child_ep->hwtid = hwtid;
2646
2647        pr_debug("tx_chan %u smac_idx %u rss_qid %u\n",
2648                 child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
2649
2650        timer_setup(&child_ep->timer, ep_timeout, 0);
2651        cxgb4_insert_tid(t, child_ep, hwtid,
2652                         child_ep->com.local_addr.ss_family);
2653        insert_ep_tid(child_ep);
2654        if (accept_cr(child_ep, skb, req)) {
2655                c4iw_put_ep(&parent_ep->com);
2656                release_ep_resources(child_ep);
2657        } else {
2658                set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
2659        }
2660        if (iptype == 6) {
2661                sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2662                cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0],
2663                               (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2664        }
2665        goto out;
2666fail:
2667        c4iw_put_ep(&child_ep->com);
2668reject:
2669        reject_cr(dev, hwtid, skb);
2670out:
2671        if (parent_ep)
2672                c4iw_put_ep(&parent_ep->com);
2673        return 0;
2674}
2675
2676static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
2677{
2678        struct c4iw_ep *ep;
2679        struct cpl_pass_establish *req = cplhdr(skb);
2680        unsigned int tid = GET_TID(req);
2681        int ret;
2682        u16 tcp_opt = ntohs(req->tcp_opt);
2683
2684        ep = get_ep_from_tid(dev, tid);
2685        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2686        ep->snd_seq = be32_to_cpu(req->snd_isn);
2687        ep->rcv_seq = be32_to_cpu(req->rcv_isn);
2688        ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
2689
2690        pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, tcp_opt);
2691
2692        set_emss(ep, tcp_opt);
2693
2694        dst_confirm(ep->dst);
2695        mutex_lock(&ep->com.mutex);
2696        ep->com.state = MPA_REQ_WAIT;
2697        start_ep_timer(ep);
2698        set_bit(PASS_ESTAB, &ep->com.history);
2699        ret = send_flowc(ep);
2700        mutex_unlock(&ep->com.mutex);
2701        if (ret)
2702                c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
2703        c4iw_put_ep(&ep->com);
2704
2705        return 0;
2706}
2707
2708static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
2709{
2710        struct cpl_peer_close *hdr = cplhdr(skb);
2711        struct c4iw_ep *ep;
2712        struct c4iw_qp_attributes attrs;
2713        int disconnect = 1;
2714        int release = 0;
2715        unsigned int tid = GET_TID(hdr);
2716        int ret;
2717
2718        ep = get_ep_from_tid(dev, tid);
2719        if (!ep)
2720                return 0;
2721
2722        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2723        dst_confirm(ep->dst);
2724
2725        set_bit(PEER_CLOSE, &ep->com.history);
2726        mutex_lock(&ep->com.mutex);
2727        switch (ep->com.state) {
2728        case MPA_REQ_WAIT:
2729                __state_set(&ep->com, CLOSING);
2730                break;
2731        case MPA_REQ_SENT:
2732                __state_set(&ep->com, CLOSING);
2733                connect_reply_upcall(ep, -ECONNRESET);
2734                break;
2735        case MPA_REQ_RCVD:
2736
2737                /*
2738                 * We're gonna mark this puppy DEAD, but keep
2739                 * the reference on it until the ULP accepts or
2740                 * rejects the CR. Also wake up anyone waiting
2741                 * in rdma connection migration (see c4iw_accept_cr()).
2742                 */
2743                __state_set(&ep->com, CLOSING);
2744                pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
2745                c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2746                break;
2747        case MPA_REP_SENT:
2748                __state_set(&ep->com, CLOSING);
2749                pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
2750                c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2751                break;
2752        case FPDU_MODE:
2753                start_ep_timer(ep);
2754                __state_set(&ep->com, CLOSING);
2755                attrs.next_state = C4IW_QP_STATE_CLOSING;
2756                ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2757                                       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2758                if (ret != -ECONNRESET) {
2759                        peer_close_upcall(ep);
2760                        disconnect = 1;
2761                }
2762                break;
2763        case ABORTING:
2764                disconnect = 0;
2765                break;
2766        case CLOSING:
2767                __state_set(&ep->com, MORIBUND);
2768                disconnect = 0;
2769                break;
2770        case MORIBUND:
2771                (void)stop_ep_timer(ep);
2772                if (ep->com.cm_id && ep->com.qp) {
2773                        attrs.next_state = C4IW_QP_STATE_IDLE;
2774                        c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2775                                       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2776                }
2777                close_complete_upcall(ep, 0);
2778                __state_set(&ep->com, DEAD);
2779                release = 1;
2780                disconnect = 0;
2781                break;
2782        case DEAD:
2783                disconnect = 0;
2784                break;
2785        default:
2786                WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
2787        }
2788        mutex_unlock(&ep->com.mutex);
2789        if (disconnect)
2790                c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
2791        if (release)
2792                release_ep_resources(ep);
2793        c4iw_put_ep(&ep->com);
2794        return 0;
2795}
2796
2797static void finish_peer_abort(struct c4iw_dev *dev, struct c4iw_ep *ep)
2798{
2799        complete_cached_srq_buffers(ep, ep->srqe_idx);
2800        if (ep->com.cm_id && ep->com.qp) {
2801                struct c4iw_qp_attributes attrs;
2802
2803                attrs.next_state = C4IW_QP_STATE_ERROR;
2804                c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2805                               C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2806        }
2807        peer_abort_upcall(ep);
2808        release_ep_resources(ep);
2809        c4iw_put_ep(&ep->com);
2810}
2811
2812static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2813{
2814        struct cpl_abort_req_rss6 *req = cplhdr(skb);
2815        struct c4iw_ep *ep;
2816        struct sk_buff *rpl_skb;
2817        struct c4iw_qp_attributes attrs;
2818        int ret;
2819        int release = 0;
2820        unsigned int tid = GET_TID(req);
2821        u8 status;
2822        u32 srqidx;
2823
2824        u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
2825
2826        ep = get_ep_from_tid(dev, tid);
2827        if (!ep)
2828                return 0;
2829
2830        status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status));
2831
2832        if (cxgb_is_neg_adv(status)) {
2833                pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
2834                         ep->hwtid, status, neg_adv_str(status));
2835                ep->stats.abort_neg_adv++;
2836                mutex_lock(&dev->rdev.stats.lock);
2837                dev->rdev.stats.neg_adv++;
2838                mutex_unlock(&dev->rdev.stats.lock);
2839                goto deref_ep;
2840        }
2841
2842        pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
2843                 ep->com.state);
2844        set_bit(PEER_ABORT, &ep->com.history);
2845
2846        /*
2847         * Wake up any threads in rdma_init() or rdma_fini().
2848         * However, this is not needed if com state is just
2849         * MPA_REQ_SENT
2850         */
2851        if (ep->com.state != MPA_REQ_SENT)
2852                c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2853
2854        mutex_lock(&ep->com.mutex);
2855        switch (ep->com.state) {
2856        case CONNECTING:
2857                c4iw_put_ep(&ep->parent_ep->com);
2858                break;
2859        case MPA_REQ_WAIT:
2860                (void)stop_ep_timer(ep);
2861                break;
2862        case MPA_REQ_SENT:
2863                (void)stop_ep_timer(ep);
2864                if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 ||
2865                    (mpa_rev == 2 && ep->tried_with_mpa_v1))
2866                        connect_reply_upcall(ep, -ECONNRESET);
2867                else {
2868                        /*
2869                         * we just don't send notification upwards because we
2870                         * want to retry with mpa_v1 without upper layers even
2871                         * knowing it.
2872                         *
2873                         * do some housekeeping so as to re-initiate the
2874                         * connection
2875                         */
2876                        pr_info("%s: mpa_rev=%d. Retrying with mpav1\n",
2877                                __func__, mpa_rev);
2878                        ep->retry_with_mpa_v1 = 1;
2879                }
2880                break;
2881        case MPA_REP_SENT:
2882                break;
2883        case MPA_REQ_RCVD:
2884                break;
2885        case MORIBUND:
2886        case CLOSING:
2887                stop_ep_timer(ep);
2888                /*FALLTHROUGH*/
2889        case FPDU_MODE:
2890                if (ep->com.qp && ep->com.qp->srq) {
2891                        srqidx = ABORT_RSS_SRQIDX_G(
2892                                        be32_to_cpu(req->srqidx_status));
2893                        if (srqidx) {
2894                                complete_cached_srq_buffers(ep,
2895                                                            req->srqidx_status);
2896                        } else {
2897                                /* Hold ep ref until finish_peer_abort() */
2898                                c4iw_get_ep(&ep->com);
2899                                __state_set(&ep->com, ABORTING);
2900                                set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags);
2901                                read_tcb(ep);
2902                                break;
2903
2904                        }
2905                }
2906
2907                if (ep->com.cm_id && ep->com.qp) {
2908                        attrs.next_state = C4IW_QP_STATE_ERROR;
2909                        ret = c4iw_modify_qp(ep->com.qp->rhp,
2910                                     ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
2911                                     &attrs, 1);
2912                        if (ret)
2913                                pr_err("%s - qp <- error failed!\n", __func__);
2914                }
2915                peer_abort_upcall(ep);
2916                break;
2917        case ABORTING:
2918                break;
2919        case DEAD:
2920                pr_warn("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
2921                mutex_unlock(&ep->com.mutex);
2922                goto deref_ep;
2923        default:
2924                WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
2925                break;
2926        }
2927        dst_confirm(ep->dst);
2928        if (ep->com.state != ABORTING) {
2929                __state_set(&ep->com, DEAD);
2930                /* we don't release if we want to retry with mpa_v1 */
2931                if (!ep->retry_with_mpa_v1)
2932                        release = 1;
2933        }
2934        mutex_unlock(&ep->com.mutex);
2935
2936        rpl_skb = skb_dequeue(&ep->com.ep_skb_list);
2937        if (WARN_ON(!rpl_skb)) {
2938                release = 1;
2939                goto out;
2940        }
2941
2942        cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx);
2943
2944        c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
2945out:
2946        if (release)
2947                release_ep_resources(ep);
2948        else if (ep->retry_with_mpa_v1) {
2949                if (ep->com.remote_addr.ss_family == AF_INET6) {
2950                        struct sockaddr_in6 *sin6 =
2951                                        (struct sockaddr_in6 *)
2952                                        &ep->com.local_addr;
2953                        cxgb4_clip_release(
2954                                        ep->com.dev->rdev.lldi.ports[0],
2955                                        (const u32 *)&sin6->sin6_addr.s6_addr,
2956                                        1);
2957                }
2958                xa_erase_irq(&ep->com.dev->hwtids, ep->hwtid);
2959                cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
2960                                 ep->com.local_addr.ss_family);
2961                dst_release(ep->dst);
2962                cxgb4_l2t_release(ep->l2t);
2963                c4iw_reconnect(ep);
2964        }
2965
2966deref_ep:
2967        c4iw_put_ep(&ep->com);
2968        /* Dereferencing ep, referenced in peer_abort_intr() */
2969        c4iw_put_ep(&ep->com);
2970        return 0;
2971}
2972
2973static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2974{
2975        struct c4iw_ep *ep;
2976        struct c4iw_qp_attributes attrs;
2977        struct cpl_close_con_rpl *rpl = cplhdr(skb);
2978        int release = 0;
2979        unsigned int tid = GET_TID(rpl);
2980
2981        ep = get_ep_from_tid(dev, tid);
2982        if (!ep)
2983                return 0;
2984
2985        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2986
2987        /* The cm_id may be null if we failed to connect */
2988        mutex_lock(&ep->com.mutex);
2989        set_bit(CLOSE_CON_RPL, &ep->com.history);
2990        switch (ep->com.state) {
2991        case CLOSING:
2992                __state_set(&ep->com, MORIBUND);
2993                break;
2994        case MORIBUND:
2995                (void)stop_ep_timer(ep);
2996                if ((ep->com.cm_id) && (ep->com.qp)) {
2997                        attrs.next_state = C4IW_QP_STATE_IDLE;
2998                        c4iw_modify_qp(ep->com.qp->rhp,
2999                                             ep->com.qp,
3000                                             C4IW_QP_ATTR_NEXT_STATE,
3001                                             &attrs, 1);
3002                }
3003                close_complete_upcall(ep, 0);
3004                __state_set(&ep->com, DEAD);
3005                release = 1;
3006                break;
3007        case ABORTING:
3008        case DEAD:
3009                break;
3010        default:
3011                WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
3012                break;
3013        }
3014        mutex_unlock(&ep->com.mutex);
3015        if (release)
3016                release_ep_resources(ep);
3017        c4iw_put_ep(&ep->com);
3018        return 0;
3019}
3020
3021static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
3022{
3023        struct cpl_rdma_terminate *rpl = cplhdr(skb);
3024        unsigned int tid = GET_TID(rpl);
3025        struct c4iw_ep *ep;
3026        struct c4iw_qp_attributes attrs;
3027
3028        ep = get_ep_from_tid(dev, tid);
3029
3030        if (ep) {
3031                if (ep->com.qp) {
3032                        pr_warn("TERM received tid %u qpid %u\n", tid,
3033                                ep->com.qp->wq.sq.qid);
3034                        attrs.next_state = C4IW_QP_STATE_TERMINATE;
3035                        c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
3036                                       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
3037                }
3038
3039                c4iw_put_ep(&ep->com);
3040        } else
3041                pr_warn("TERM received tid %u no ep/qp\n", tid);
3042
3043        return 0;
3044}
3045
3046/*
3047 * Upcall from the adapter indicating data has been transmitted.
3048 * For us its just the single MPA request or reply.  We can now free
3049 * the skb holding the mpa message.
3050 */
3051static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
3052{
3053        struct c4iw_ep *ep;
3054        struct cpl_fw4_ack *hdr = cplhdr(skb);
3055        u8 credits = hdr->credits;
3056        unsigned int tid = GET_TID(hdr);
3057
3058
3059        ep = get_ep_from_tid(dev, tid);
3060        if (!ep)
3061                return 0;
3062        pr_debug("ep %p tid %u credits %u\n",
3063                 ep, ep->hwtid, credits);
3064        if (credits == 0) {
3065                pr_debug("0 credit ack ep %p tid %u state %u\n",
3066                         ep, ep->hwtid, state_read(&ep->com));
3067                goto out;
3068        }
3069
3070        dst_confirm(ep->dst);
3071        if (ep->mpa_skb) {
3072                pr_debug("last streaming msg ack ep %p tid %u state %u initiator %u freeing skb\n",
3073                         ep, ep->hwtid, state_read(&ep->com),
3074                         ep->mpa_attr.initiator ? 1 : 0);
3075                mutex_lock(&ep->com.mutex);
3076                kfree_skb(ep->mpa_skb);
3077                ep->mpa_skb = NULL;
3078                if (test_bit(STOP_MPA_TIMER, &ep->com.flags))
3079                        stop_ep_timer(ep);
3080                mutex_unlock(&ep->com.mutex);
3081        }
3082out:
3083        c4iw_put_ep(&ep->com);
3084        return 0;
3085}
3086
3087int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
3088{
3089        int abort;
3090        struct c4iw_ep *ep = to_ep(cm_id);
3091
3092        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
3093
3094        mutex_lock(&ep->com.mutex);
3095        if (ep->com.state != MPA_REQ_RCVD) {
3096                mutex_unlock(&ep->com.mutex);
3097                c4iw_put_ep(&ep->com);
3098                return -ECONNRESET;
3099        }
3100        set_bit(ULP_REJECT, &ep->com.history);
3101        if (mpa_rev == 0)
3102                abort = 1;
3103        else
3104                abort = send_mpa_reject(ep, pdata, pdata_len);
3105        mutex_unlock(&ep->com.mutex);
3106
3107        stop_ep_timer(ep);
3108        c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL);
3109        c4iw_put_ep(&ep->com);
3110        return 0;
3111}
3112
3113int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3114{
3115        int err;
3116        struct c4iw_qp_attributes attrs;
3117        enum c4iw_qp_attr_mask mask;
3118        struct c4iw_ep *ep = to_ep(cm_id);
3119        struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
3120        struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
3121        int abort = 0;
3122
3123        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
3124
3125        mutex_lock(&ep->com.mutex);
3126        if (ep->com.state != MPA_REQ_RCVD) {
3127                err = -ECONNRESET;
3128                goto err_out;
3129        }
3130
3131        if (!qp) {
3132                err = -EINVAL;
3133                goto err_out;
3134        }
3135
3136        set_bit(ULP_ACCEPT, &ep->com.history);
3137        if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) ||
3138            (conn_param->ird > cur_max_read_depth(ep->com.dev))) {
3139                err = -EINVAL;
3140                goto err_abort;
3141        }
3142
3143        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
3144                if (conn_param->ord > ep->ird) {
3145                        if (RELAXED_IRD_NEGOTIATION) {
3146                                conn_param->ord = ep->ird;
3147                        } else {
3148                                ep->ird = conn_param->ird;
3149                                ep->ord = conn_param->ord;
3150                                send_mpa_reject(ep, conn_param->private_data,
3151                                                conn_param->private_data_len);
3152                                err = -ENOMEM;
3153                                goto err_abort;
3154                        }
3155                }
3156                if (conn_param->ird < ep->ord) {
3157                        if (RELAXED_IRD_NEGOTIATION &&
3158                            ep->ord <= h->rdev.lldi.max_ordird_qp) {
3159                                conn_param->ird = ep->ord;
3160                        } else {
3161                                err = -ENOMEM;
3162                                goto err_abort;
3163                        }
3164                }
3165        }
3166        ep->ird = conn_param->ird;
3167        ep->ord = conn_param->ord;
3168
3169        if (ep->mpa_attr.version == 1) {
3170                if (peer2peer && ep->ird == 0)
3171                        ep->ird = 1;
3172        } else {
3173                if (peer2peer &&
3174                    (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) &&
3175                    (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0)
3176                        ep->ird = 1;
3177        }
3178
3179        pr_debug("ird %d ord %d\n", ep->ird, ep->ord);
3180
3181        ep->com.cm_id = cm_id;
3182        ref_cm_id(&ep->com);
3183        ep->com.qp = qp;
3184        ref_qp(ep);
3185
3186        /* bind QP to EP and move to RTS */
3187        attrs.mpa_attr = ep->mpa_attr;
3188        attrs.max_ird = ep->ird;
3189        attrs.max_ord = ep->ord;
3190        attrs.llp_stream_handle = ep;
3191        attrs.next_state = C4IW_QP_STATE_RTS;
3192
3193        /* bind QP and TID with INIT_WR */
3194        mask = C4IW_QP_ATTR_NEXT_STATE |
3195                             C4IW_QP_ATTR_LLP_STREAM_HANDLE |
3196                             C4IW_QP_ATTR_MPA_ATTR |
3197                             C4IW_QP_ATTR_MAX_IRD |
3198                             C4IW_QP_ATTR_MAX_ORD;
3199
3200        err = c4iw_modify_qp(ep->com.qp->rhp,
3201                             ep->com.qp, mask, &attrs, 1);
3202        if (err)
3203                goto err_deref_cm_id;
3204
3205        set_bit(STOP_MPA_TIMER, &ep->com.flags);
3206        err = send_mpa_reply(ep, conn_param->private_data,
3207                             conn_param->private_data_len);
3208        if (err)
3209                goto err_deref_cm_id;
3210
3211        __state_set(&ep->com, FPDU_MODE);
3212        established_upcall(ep);
3213        mutex_unlock(&ep->com.mutex);
3214        c4iw_put_ep(&ep->com);
3215        return 0;
3216err_deref_cm_id:
3217        deref_cm_id(&ep->com);
3218err_abort:
3219        abort = 1;
3220err_out:
3221        mutex_unlock(&ep->com.mutex);
3222        if (abort)
3223                c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
3224        c4iw_put_ep(&ep->com);
3225        return err;
3226}
3227
3228static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
3229{
3230        struct in_device *ind;
3231        int found = 0;
3232        struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
3233        struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
3234        const struct in_ifaddr *ifa;
3235
3236        ind = in_dev_get(dev->rdev.lldi.ports[0]);
3237        if (!ind)
3238                return -EADDRNOTAVAIL;
3239        rcu_read_lock();
3240        in_dev_for_each_ifa_rcu(ifa, ind) {
3241                if (ifa->ifa_flags & IFA_F_SECONDARY)
3242                        continue;
3243                laddr->sin_addr.s_addr = ifa->ifa_address;
3244                raddr->sin_addr.s_addr = ifa->ifa_address;
3245                found = 1;
3246                break;
3247        }
3248        rcu_read_unlock();
3249
3250        in_dev_put(ind);
3251        return found ? 0 : -EADDRNOTAVAIL;
3252}
3253
3254static int get_lladdr(struct net_device *dev, struct in6_addr *addr,
3255                      unsigned char banned_flags)
3256{
3257        struct inet6_dev *idev;
3258        int err = -EADDRNOTAVAIL;
3259
3260        rcu_read_lock();
3261        idev = __in6_dev_get(dev);
3262        if (idev != NULL) {
3263                struct inet6_ifaddr *ifp;
3264
3265                read_lock_bh(&idev->lock);
3266                list_for_each_entry(ifp, &idev->addr_list, if_list) {
3267                        if (ifp->scope == IFA_LINK &&
3268                            !(ifp->flags & banned_flags)) {
3269                                memcpy(addr, &ifp->addr, 16);
3270                                err = 0;
3271                                break;
3272                        }
3273                }
3274                read_unlock_bh(&idev->lock);
3275        }
3276        rcu_read_unlock();
3277        return err;
3278}
3279
3280static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
3281{
3282        struct in6_addr uninitialized_var(addr);
3283        struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
3284        struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
3285
3286        if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) {
3287                memcpy(la6->sin6_addr.s6_addr, &addr, 16);
3288                memcpy(ra6->sin6_addr.s6_addr, &addr, 16);
3289                return 0;
3290        }
3291        return -EADDRNOTAVAIL;
3292}
3293
3294int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3295{
3296        struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
3297        struct c4iw_ep *ep;
3298        int err = 0;
3299        struct sockaddr_in *laddr;
3300        struct sockaddr_in *raddr;
3301        struct sockaddr_in6 *laddr6;
3302        struct sockaddr_in6 *raddr6;
3303        __u8 *ra;
3304        int iptype;
3305
3306        if ((conn_param->ord > cur_max_read_depth(dev)) ||
3307            (conn_param->ird > cur_max_read_depth(dev))) {
3308                err = -EINVAL;
3309                goto out;
3310        }
3311        ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
3312        if (!ep) {
3313                pr_err("%s - cannot alloc ep\n", __func__);
3314                err = -ENOMEM;
3315                goto out;
3316        }
3317
3318        skb_queue_head_init(&ep->com.ep_skb_list);
3319        if (alloc_ep_skb_list(&ep->com.ep_skb_list, CN_MAX_CON_BUF)) {
3320                err = -ENOMEM;
3321                goto fail1;
3322        }
3323
3324        timer_setup(&ep->timer, ep_timeout, 0);
3325        ep->plen = conn_param->private_data_len;
3326        if (ep->plen)
3327                memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
3328                       conn_param->private_data, ep->plen);
3329        ep->ird = conn_param->ird;
3330        ep->ord = conn_param->ord;
3331
3332        if (peer2peer && ep->ord == 0)
3333                ep->ord = 1;
3334
3335        ep->com.cm_id = cm_id;
3336        ref_cm_id(&ep->com);
3337        cm_id->provider_data = ep;
3338        ep->com.dev = dev;
3339        ep->com.qp = get_qhp(dev, conn_param->qpn);
3340        if (!ep->com.qp) {
3341                pr_warn("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
3342                err = -EINVAL;
3343                goto fail2;
3344        }
3345        ref_qp(ep);
3346        pr_debug("qpn 0x%x qp %p cm_id %p\n", conn_param->qpn,
3347                 ep->com.qp, cm_id);
3348
3349        /*
3350         * Allocate an active TID to initiate a TCP connection.
3351         */
3352        ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep);
3353        if (ep->atid == -1) {
3354                pr_err("%s - cannot alloc atid\n", __func__);
3355                err = -ENOMEM;
3356                goto fail2;
3357        }
3358        err = xa_insert_irq(&dev->atids, ep->atid, ep, GFP_KERNEL);
3359        if (err)
3360                goto fail5;
3361
3362        memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3363               sizeof(ep->com.local_addr));
3364        memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
3365               sizeof(ep->com.remote_addr));
3366
3367        laddr = (struct sockaddr_in *)&ep->com.local_addr;
3368        raddr = (struct sockaddr_in *)&ep->com.remote_addr;
3369        laddr6 = (struct sockaddr_in6 *)&ep->com.local_addr;
3370        raddr6 = (struct sockaddr_in6 *) &ep->com.remote_addr;
3371
3372        if (cm_id->m_remote_addr.ss_family == AF_INET) {
3373                iptype = 4;
3374                ra = (__u8 *)&raddr->sin_addr;
3375
3376                /*
3377                 * Handle loopback requests to INADDR_ANY.
3378                 */
3379                if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
3380                        err = pick_local_ipaddrs(dev, cm_id);
3381                        if (err)
3382                                goto fail2;
3383                }
3384
3385                /* find a route */
3386                pr_debug("saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
3387                         &laddr->sin_addr, ntohs(laddr->sin_port),
3388                         ra, ntohs(raddr->sin_port));
3389                ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
3390                                          laddr->sin_addr.s_addr,
3391                                          raddr->sin_addr.s_addr,
3392                                          laddr->sin_port,
3393                                          raddr->sin_port, cm_id->tos);
3394        } else {
3395                iptype = 6;
3396                ra = (__u8 *)&raddr6->sin6_addr;
3397
3398                /*
3399                 * Handle loopback requests to INADDR_ANY.
3400                 */
3401                if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
3402                        err = pick_local_ip6addrs(dev, cm_id);
3403                        if (err)
3404                                goto fail2;
3405                }
3406
3407                /* find a route */
3408                pr_debug("saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
3409                         laddr6->sin6_addr.s6_addr,
3410                         ntohs(laddr6->sin6_port),
3411                         raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
3412                ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
3413                                           laddr6->sin6_addr.s6_addr,
3414                                           raddr6->sin6_addr.s6_addr,
3415                                           laddr6->sin6_port,
3416                                           raddr6->sin6_port, cm_id->tos,
3417                                           raddr6->sin6_scope_id);
3418        }
3419        if (!ep->dst) {
3420                pr_err("%s - cannot find route\n", __func__);
3421                err = -EHOSTUNREACH;
3422                goto fail3;
3423        }
3424
3425        err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
3426                        ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
3427        if (err) {
3428                pr_err("%s - cannot alloc l2e\n", __func__);
3429                goto fail4;
3430        }
3431
3432        pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
3433                 ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
3434                 ep->l2t->idx);
3435
3436        state_set(&ep->com, CONNECTING);
3437        ep->tos = cm_id->tos;
3438
3439        /* send connect request to rnic */
3440        err = send_connect(ep);
3441        if (!err)
3442                goto out;
3443
3444        cxgb4_l2t_release(ep->l2t);
3445fail4:
3446        dst_release(ep->dst);
3447fail3:
3448        xa_erase_irq(&ep->com.dev->atids, ep->atid);
3449fail5:
3450        cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
3451fail2:
3452        skb_queue_purge(&ep->com.ep_skb_list);
3453        deref_cm_id(&ep->com);
3454fail1:
3455        c4iw_put_ep(&ep->com);
3456out:
3457        return err;
3458}
3459
3460static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3461{
3462        int err;
3463        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
3464                                    &ep->com.local_addr;
3465
3466        if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
3467                err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
3468                                     (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3469                if (err)
3470                        return err;
3471        }
3472        c4iw_init_wr_wait(ep->com.wr_waitp);
3473        err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
3474                                   ep->stid, &sin6->sin6_addr,
3475                                   sin6->sin6_port,
3476                                   ep->com.dev->rdev.lldi.rxq_ids[0]);
3477        if (!err)
3478                err = c4iw_wait_for_reply(&ep->com.dev->rdev,
3479                                          ep->com.wr_waitp,
3480                                          0, 0, __func__);
3481        else if (err > 0)
3482                err = net_xmit_errno(err);
3483        if (err) {
3484                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3485                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3486                pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
3487                       err, ep->stid,
3488                       sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
3489        }
3490        return err;
3491}
3492
3493static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3494{
3495        int err;
3496        struct sockaddr_in *sin = (struct sockaddr_in *)
3497                                  &ep->com.local_addr;
3498
3499        if (dev->rdev.lldi.enable_fw_ofld_conn) {
3500                do {
3501                        err = cxgb4_create_server_filter(
3502                                ep->com.dev->rdev.lldi.ports[0], ep->stid,
3503                                sin->sin_addr.s_addr, sin->sin_port, 0,
3504                                ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0);
3505                        if (err == -EBUSY) {
3506                                if (c4iw_fatal_error(&ep->com.dev->rdev)) {
3507                                        err = -EIO;
3508                                        break;
3509                                }
3510                                set_current_state(TASK_UNINTERRUPTIBLE);
3511                                schedule_timeout(usecs_to_jiffies(100));
3512                        }
3513                } while (err == -EBUSY);
3514        } else {
3515                c4iw_init_wr_wait(ep->com.wr_waitp);
3516                err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0],
3517                                ep->stid, sin->sin_addr.s_addr, sin->sin_port,
3518                                0, ep->com.dev->rdev.lldi.rxq_ids[0]);
3519                if (!err)
3520                        err = c4iw_wait_for_reply(&ep->com.dev->rdev,
3521                                                  ep->com.wr_waitp,
3522                                                  0, 0, __func__);
3523                else if (err > 0)
3524                        err = net_xmit_errno(err);
3525        }
3526        if (err)
3527                pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n"
3528                       , err, ep->stid,
3529                       &sin->sin_addr, ntohs(sin->sin_port));
3530        return err;
3531}
3532
3533int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3534{
3535        int err = 0;
3536        struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
3537        struct c4iw_listen_ep *ep;
3538
3539        might_sleep();
3540
3541        ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
3542        if (!ep) {
3543                pr_err("%s - cannot alloc ep\n", __func__);
3544                err = -ENOMEM;
3545                goto fail1;
3546        }
3547        skb_queue_head_init(&ep->com.ep_skb_list);
3548        pr_debug("ep %p\n", ep);
3549        ep->com.cm_id = cm_id;
3550        ref_cm_id(&ep->com);
3551        ep->com.dev = dev;
3552        ep->backlog = backlog;
3553        memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3554               sizeof(ep->com.local_addr));
3555
3556        /*
3557         * Allocate a server TID.
3558         */
3559        if (dev->rdev.lldi.enable_fw_ofld_conn &&
3560            ep->com.local_addr.ss_family == AF_INET)
3561                ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids,
3562                                             cm_id->m_local_addr.ss_family, ep);
3563        else
3564                ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids,
3565                                            cm_id->m_local_addr.ss_family, ep);
3566
3567        if (ep->stid == -1) {
3568                pr_err("%s - cannot alloc stid\n", __func__);
3569                err = -ENOMEM;
3570                goto fail2;
3571        }
3572        err = xa_insert_irq(&dev->stids, ep->stid, ep, GFP_KERNEL);
3573        if (err)
3574                goto fail3;
3575
3576        state_set(&ep->com, LISTEN);
3577        if (ep->com.local_addr.ss_family == AF_INET)
3578                err = create_server4(dev, ep);
3579        else
3580                err = create_server6(dev, ep);
3581        if (!err) {
3582                cm_id->provider_data = ep;
3583                goto out;
3584        }
3585        xa_erase_irq(&ep->com.dev->stids, ep->stid);
3586fail3:
3587        cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3588                        ep->com.local_addr.ss_family);
3589fail2:
3590        deref_cm_id(&ep->com);
3591        c4iw_put_ep(&ep->com);
3592fail1:
3593out:
3594        return err;
3595}
3596
3597int c4iw_destroy_listen(struct iw_cm_id *cm_id)
3598{
3599        int err;
3600        struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
3601
3602        pr_debug("ep %p\n", ep);
3603
3604        might_sleep();
3605        state_set(&ep->com, DEAD);
3606        if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn &&
3607            ep->com.local_addr.ss_family == AF_INET) {
3608                err = cxgb4_remove_server_filter(
3609                        ep->com.dev->rdev.lldi.ports[0], ep->stid,
3610                        ep->com.dev->rdev.lldi.rxq_ids[0], 0);
3611        } else {
3612                struct sockaddr_in6 *sin6;
3613                c4iw_init_wr_wait(ep->com.wr_waitp);
3614                err = cxgb4_remove_server(
3615                                ep->com.dev->rdev.lldi.ports[0], ep->stid,
3616                                ep->com.dev->rdev.lldi.rxq_ids[0], 0);
3617                if (err)
3618                        goto done;
3619                err = c4iw_wait_for_reply(&ep->com.dev->rdev, ep->com.wr_waitp,
3620                                          0, 0, __func__);
3621                sin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
3622                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3623                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3624        }
3625        xa_erase_irq(&ep->com.dev->stids, ep->stid);
3626        cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3627                        ep->com.local_addr.ss_family);
3628done:
3629        deref_cm_id(&ep->com);
3630        c4iw_put_ep(&ep->com);
3631        return err;
3632}
3633
3634int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
3635{
3636        int ret = 0;
3637        int close = 0;
3638        int fatal = 0;
3639        struct c4iw_rdev *rdev;
3640
3641        mutex_lock(&ep->com.mutex);
3642
3643        pr_debug("ep %p state %s, abrupt %d\n", ep,
3644                 states[ep->com.state], abrupt);
3645
3646        /*
3647         * Ref the ep here in case we have fatal errors causing the
3648         * ep to be released and freed.
3649         */
3650        c4iw_get_ep(&ep->com);
3651
3652        rdev = &ep->com.dev->rdev;
3653        if (c4iw_fatal_error(rdev)) {
3654                fatal = 1;
3655                close_complete_upcall(ep, -EIO);
3656                ep->com.state = DEAD;
3657        }
3658        switch (ep->com.state) {
3659        case MPA_REQ_WAIT:
3660        case MPA_REQ_SENT:
3661        case MPA_REQ_RCVD:
3662        case MPA_REP_SENT:
3663        case FPDU_MODE:
3664        case CONNECTING:
3665                close = 1;
3666                if (abrupt)
3667                        ep->com.state = ABORTING;
3668                else {
3669                        ep->com.state = CLOSING;
3670
3671                        /*
3672                         * if we close before we see the fw4_ack() then we fix
3673                         * up the timer state since we're reusing it.
3674                         */
3675                        if (ep->mpa_skb &&
3676                            test_bit(STOP_MPA_TIMER, &ep->com.flags)) {
3677                                clear_bit(STOP_MPA_TIMER, &ep->com.flags);
3678                                stop_ep_timer(ep);
3679                        }
3680                        start_ep_timer(ep);
3681                }
3682                set_bit(CLOSE_SENT, &ep->com.flags);
3683                break;
3684        case CLOSING:
3685                if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
3686                        close = 1;
3687                        if (abrupt) {
3688                                (void)stop_ep_timer(ep);
3689                                ep->com.state = ABORTING;
3690                        } else
3691                                ep->com.state = MORIBUND;
3692                }
3693                break;
3694        case MORIBUND:
3695        case ABORTING:
3696        case DEAD:
3697                pr_debug("ignoring disconnect ep %p state %u\n",
3698                         ep, ep->com.state);
3699                break;
3700        default:
3701                WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
3702                break;
3703        }
3704
3705        if (close) {
3706                if (abrupt) {
3707                        set_bit(EP_DISC_ABORT, &ep->com.history);
3708                        ret = send_abort(ep);
3709                } else {
3710                        set_bit(EP_DISC_CLOSE, &ep->com.history);
3711                        ret = send_halfclose(ep);
3712                }
3713                if (ret) {
3714                        set_bit(EP_DISC_FAIL, &ep->com.history);
3715                        if (!abrupt) {
3716                                stop_ep_timer(ep);
3717                                close_complete_upcall(ep, -EIO);
3718                        }
3719                        if (ep->com.qp) {
3720                                struct c4iw_qp_attributes attrs;
3721
3722                                attrs.next_state = C4IW_QP_STATE_ERROR;
3723                                ret = c4iw_modify_qp(ep->com.qp->rhp,
3724                                                     ep->com.qp,
3725                                                     C4IW_QP_ATTR_NEXT_STATE,
3726                                                     &attrs, 1);
3727                                if (ret)
3728                                        pr_err("%s - qp <- error failed!\n",
3729                                               __func__);
3730                        }
3731                        fatal = 1;
3732                }
3733        }
3734        mutex_unlock(&ep->com.mutex);
3735        c4iw_put_ep(&ep->com);
3736        if (fatal)
3737                release_ep_resources(ep);
3738        return ret;
3739}
3740
3741static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3742                        struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
3743{
3744        struct c4iw_ep *ep;
3745        int atid = be32_to_cpu(req->tid);
3746
3747        ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids,
3748                                           (__force u32) req->tid);
3749        if (!ep)
3750                return;
3751
3752        switch (req->retval) {
3753        case FW_ENOMEM:
3754                set_bit(ACT_RETRY_NOMEM, &ep->com.history);
3755                if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
3756                        send_fw_act_open_req(ep, atid);
3757                        return;
3758                }
3759                /* fall through */
3760        case FW_EADDRINUSE:
3761                set_bit(ACT_RETRY_INUSE, &ep->com.history);
3762                if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
3763                        send_fw_act_open_req(ep, atid);
3764                        return;
3765                }
3766                break;
3767        default:
3768                pr_info("%s unexpected ofld conn wr retval %d\n",
3769                       __func__, req->retval);
3770                break;
3771        }
3772        pr_err("active ofld_connect_wr failure %d atid %d\n",
3773               req->retval, atid);
3774        mutex_lock(&dev->rdev.stats.lock);
3775        dev->rdev.stats.act_ofld_conn_fails++;
3776        mutex_unlock(&dev->rdev.stats.lock);
3777        connect_reply_upcall(ep, status2errno(req->retval));
3778        state_set(&ep->com, DEAD);
3779        if (ep->com.remote_addr.ss_family == AF_INET6) {
3780                struct sockaddr_in6 *sin6 =
3781                        (struct sockaddr_in6 *)&ep->com.local_addr;
3782                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3783                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3784        }
3785        xa_erase_irq(&dev->atids, atid);
3786        cxgb4_free_atid(dev->rdev.lldi.tids, atid);
3787        dst_release(ep->dst);
3788        cxgb4_l2t_release(ep->l2t);
3789        c4iw_put_ep(&ep->com);
3790}
3791
3792static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3793                        struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
3794{
3795        struct sk_buff *rpl_skb;
3796        struct cpl_pass_accept_req *cpl;
3797        int ret;
3798
3799        rpl_skb = (struct sk_buff *)(unsigned long)req->cookie;
3800        if (req->retval) {
3801                pr_err("%s passive open failure %d\n", __func__, req->retval);
3802                mutex_lock(&dev->rdev.stats.lock);
3803                dev->rdev.stats.pas_ofld_conn_fails++;
3804                mutex_unlock(&dev->rdev.stats.lock);
3805                kfree_skb(rpl_skb);
3806        } else {
3807                cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb);
3808                OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ,
3809                                        (__force u32) htonl(
3810                                        (__force u32) req->tid)));
3811                ret = pass_accept_req(dev, rpl_skb);
3812                if (!ret)
3813                        kfree_skb(rpl_skb);
3814        }
3815        return;
3816}
3817
3818static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word)
3819{
3820        u64 tlo = be64_to_cpu(tcb[((31 - word) / 2)]);
3821        u64 thi = be64_to_cpu(tcb[((31 - word) / 2) - 1]);
3822        u64 t;
3823        u32 shift = 32;
3824
3825        t = (thi << shift) | (tlo >> shift);
3826
3827        return t;
3828}
3829
3830static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift)
3831{
3832        u32 v;
3833        u64 t = be64_to_cpu(tcb[(31 - word) / 2]);
3834
3835        if (word & 0x1)
3836                shift += 32;
3837        v = (t >> shift) & mask;
3838        return v;
3839}
3840
3841static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
3842{
3843        struct cpl_get_tcb_rpl *rpl = cplhdr(skb);
3844        __be64 *tcb = (__be64 *)(rpl + 1);
3845        unsigned int tid = GET_TID(rpl);
3846        struct c4iw_ep *ep;
3847        u64 t_flags_64;
3848        u32 rx_pdu_out;
3849
3850        ep = get_ep_from_tid(dev, tid);
3851        if (!ep)
3852                return 0;
3853        /* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to
3854         * determine if there's a rx PDU feedback event pending.
3855         *
3856         * If that bit is set, it means we'll need to re-read the TCB's
3857         * rq_start value. The final value is the one present in a TCB
3858         * with the TF_RX_PDU_OUT bit cleared.
3859         */
3860
3861        t_flags_64 = t4_tcb_get_field64(tcb, TCB_T_FLAGS_W);
3862        rx_pdu_out = (t_flags_64 & TF_RX_PDU_OUT_V(1)) >> TF_RX_PDU_OUT_S;
3863
3864        c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */
3865        c4iw_put_ep(&ep->com); /* from read_tcb() */
3866
3867        /* If TF_RX_PDU_OUT bit is set, re-read the TCB */
3868        if (rx_pdu_out) {
3869                if (++ep->rx_pdu_out_cnt >= 2) {
3870                        WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n");
3871                        goto cleanup;
3872                }
3873                read_tcb(ep);
3874                return 0;
3875        }
3876
3877        ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_W,
3878                        TCB_RQ_START_S);
3879cleanup:
3880        pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx);
3881
3882        if (test_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags))
3883                finish_peer_abort(dev, ep);
3884        else if (test_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags))
3885                send_abort_req(ep);
3886        else
3887                WARN_ONCE(1, "unexpected state!");
3888
3889        return 0;
3890}
3891
3892static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
3893{
3894        struct cpl_fw6_msg *rpl = cplhdr(skb);
3895        struct cpl_fw6_msg_ofld_connection_wr_rpl *req;
3896
3897        switch (rpl->type) {
3898        case FW6_TYPE_CQE:
3899                c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]);
3900                break;
3901        case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
3902                req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data;
3903                switch (req->t_state) {
3904                case TCP_SYN_SENT:
3905                        active_ofld_conn_reply(dev, skb, req);
3906                        break;
3907                case TCP_SYN_RECV:
3908                        passive_ofld_conn_reply(dev, skb, req);
3909                        break;
3910                default:
3911                        pr_err("%s unexpected ofld conn wr state %d\n",
3912                               __func__, req->t_state);
3913                        break;
3914                }
3915                break;
3916        }
3917        return 0;
3918}
3919
3920static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
3921{
3922        __be32 l2info;
3923        __be16 hdr_len, vlantag, len;
3924        u16 eth_hdr_len;
3925        int tcp_hdr_len, ip_hdr_len;
3926        u8 intf;
3927        struct cpl_rx_pkt *cpl = cplhdr(skb);
3928        struct cpl_pass_accept_req *req;
3929        struct tcp_options_received tmp_opt;
3930        struct c4iw_dev *dev;
3931        enum chip_type type;
3932
3933        dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
3934        /* Store values from cpl_rx_pkt in temporary location. */
3935        vlantag = cpl->vlan;
3936        len = cpl->len;
3937        l2info  = cpl->l2info;
3938        hdr_len = cpl->hdr_len;
3939        intf = cpl->iff;
3940
3941        __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header));
3942
3943        /*
3944         * We need to parse the TCP options from SYN packet.
3945         * to generate cpl_pass_accept_req.
3946         */
3947        memset(&tmp_opt, 0, sizeof(tmp_opt));
3948        tcp_clear_options(&tmp_opt);
3949        tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL);
3950
3951        req = __skb_push(skb, sizeof(*req));
3952        memset(req, 0, sizeof(*req));
3953        req->l2info = cpu_to_be16(SYN_INTF_V(intf) |
3954                         SYN_MAC_IDX_V(RX_MACIDX_G(
3955                         be32_to_cpu(l2info))) |
3956                         SYN_XACT_MATCH_F);
3957        type = dev->rdev.lldi.adapter_type;
3958        tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len));
3959        ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len));
3960        req->hdr_len =
3961                cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info))));
3962        if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) {
3963                eth_hdr_len = is_t4(type) ?
3964                                RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) :
3965                                RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info));
3966                req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) |
3967                                            IP_HDR_LEN_V(ip_hdr_len) |
3968                                            ETH_HDR_LEN_V(eth_hdr_len));
3969        } else { /* T6 and later */
3970                eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info));
3971                req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) |
3972                                            T6_IP_HDR_LEN_V(ip_hdr_len) |
3973                                            T6_ETH_HDR_LEN_V(eth_hdr_len));
3974        }
3975        req->vlan = vlantag;
3976        req->len = len;
3977        req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) |
3978                                    PASS_OPEN_TOS_V(tos));
3979        req->tcpopt.mss = htons(tmp_opt.mss_clamp);
3980        if (tmp_opt.wscale_ok)
3981                req->tcpopt.wsf = tmp_opt.snd_wscale;
3982        req->tcpopt.tstamp = tmp_opt.saw_tstamp;
3983        if (tmp_opt.sack_ok)
3984                req->tcpopt.sack = 1;
3985        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0));
3986        return;
3987}
3988
3989static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
3990                                  __be32 laddr, __be16 lport,
3991                                  __be32 raddr, __be16 rport,
3992                                  u32 rcv_isn, u32 filter, u16 window,
3993                                  u32 rss_qid, u8 port_id)
3994{
3995        struct sk_buff *req_skb;
3996        struct fw_ofld_connection_wr *req;
3997        struct cpl_pass_accept_req *cpl = cplhdr(skb);
3998        int ret;
3999
4000        req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
4001        if (!req_skb)
4002                return;
4003        req = __skb_put_zero(req_skb, sizeof(*req));
4004        req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F);
4005        req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
4006        req->le.version_cpl = htonl(FW_OFLD_CONNECTION_WR_CPL_F);
4007        req->le.filter = (__force __be32) filter;
4008        req->le.lport = lport;
4009        req->le.pport = rport;
4010        req->le.u.ipv4.lip = laddr;
4011        req->le.u.ipv4.pip = raddr;
4012        req->tcb.rcv_nxt = htonl(rcv_isn + 1);
4013        req->tcb.rcv_adv = htons(window);
4014        req->tcb.t_state_to_astid =
4015                 htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_RECV) |
4016                        FW_OFLD_CONNECTION_WR_RCV_SCALE_V(cpl->tcpopt.wsf) |
4017                        FW_OFLD_CONNECTION_WR_ASTID_V(
4018                        PASS_OPEN_TID_G(ntohl(cpl->tos_stid))));
4019
4020        /*
4021         * We store the qid in opt2 which will be used by the firmware
4022         * to send us the wr response.
4023         */
4024        req->tcb.opt2 = htonl(RSS_QUEUE_V(rss_qid));
4025
4026        /*
4027         * We initialize the MSS index in TCB to 0xF.
4028         * So that when driver sends cpl_pass_accept_rpl
4029         * TCB picks up the correct value. If this was 0
4030         * TP will ignore any value > 0 for MSS index.
4031         */
4032        req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF));
4033        req->cookie = (uintptr_t)skb;
4034
4035        set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
4036        ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
4037        if (ret < 0) {
4038                pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__,
4039                       ret);
4040                kfree_skb(skb);
4041                kfree_skb(req_skb);
4042        }
4043}
4044
4045/*
4046 * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt
4047 * messages when a filter is being used instead of server to
4048 * redirect a syn packet. When packets hit filter they are redirected
4049 * to the offload queue and driver tries to establish the connection
4050 * using firmware work request.
4051 */
4052static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
4053{
4054        int stid;
4055        unsigned int filter;
4056        struct ethhdr *eh = NULL;
4057        struct vlan_ethhdr *vlan_eh = NULL;
4058        struct iphdr *iph;
4059        struct tcphdr *tcph;
4060        struct rss_header *rss = (void *)skb->data;
4061        struct cpl_rx_pkt *cpl = (void *)skb->data;
4062        struct cpl_pass_accept_req *req = (void *)(rss + 1);
4063        struct l2t_entry *e;
4064        struct dst_entry *dst;
4065        struct c4iw_ep *lep = NULL;
4066        u16 window;
4067        struct port_info *pi;
4068        struct net_device *pdev;
4069        u16 rss_qid, eth_hdr_len;
4070        int step;
4071        struct neighbour *neigh;
4072
4073        /* Drop all non-SYN packets */
4074        if (!(cpl->l2info & cpu_to_be32(RXF_SYN_F)))
4075                goto reject;
4076
4077        /*
4078         * Drop all packets which did not hit the filter.
4079         * Unlikely to happen.
4080         */
4081        if (!(rss->filter_hit && rss->filter_tid))
4082                goto reject;
4083
4084        /*
4085         * Calculate the server tid from filter hit index from cpl_rx_pkt.
4086         */
4087        stid = (__force int) cpu_to_be32((__force u32) rss->hash_val);
4088
4089        lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
4090        if (!lep) {
4091                pr_warn("%s connect request on invalid stid %d\n",
4092                        __func__, stid);
4093                goto reject;
4094        }
4095
4096        switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) {
4097        case CHELSIO_T4:
4098                eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4099                break;
4100        case CHELSIO_T5:
4101                eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4102                break;
4103        case CHELSIO_T6:
4104                eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4105                break;
4106        default:
4107                pr_err("T%d Chip is not supported\n",
4108                       CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type));
4109                goto reject;
4110        }
4111
4112        if (eth_hdr_len == ETH_HLEN) {
4113                eh = (struct ethhdr *)(req + 1);
4114                iph = (struct iphdr *)(eh + 1);
4115        } else {
4116                vlan_eh = (struct vlan_ethhdr *)(req + 1);
4117                iph = (struct iphdr *)(vlan_eh + 1);
4118                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan));
4119        }
4120
4121        if (iph->version != 0x4)
4122                goto reject;
4123
4124        tcph = (struct tcphdr *)(iph + 1);
4125        skb_set_network_header(skb, (void *)iph - (void *)rss);
4126        skb_set_transport_header(skb, (void *)tcph - (void *)rss);
4127        skb_get(skb);
4128
4129        pr_debug("lip 0x%x lport %u pip 0x%x pport %u tos %d\n",
4130                 ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
4131                 ntohs(tcph->source), iph->tos);
4132
4133        dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
4134                              iph->daddr, iph->saddr, tcph->dest,
4135                              tcph->source, iph->tos);
4136        if (!dst) {
4137                pr_err("%s - failed to find dst entry!\n", __func__);
4138                goto reject;
4139        }
4140        neigh = dst_neigh_lookup_skb(dst, skb);
4141
4142        if (!neigh) {
4143                pr_err("%s - failed to allocate neigh!\n", __func__);
4144                goto free_dst;
4145        }
4146
4147        if (neigh->dev->flags & IFF_LOOPBACK) {
4148                pdev = ip_dev_find(&init_net, iph->daddr);
4149                e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
4150                                    pdev, 0);
4151                pi = (struct port_info *)netdev_priv(pdev);
4152                dev_put(pdev);
4153        } else {
4154                pdev = get_real_dev(neigh->dev);
4155                e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
4156                                        pdev, 0);
4157                pi = (struct port_info *)netdev_priv(pdev);
4158        }
4159        neigh_release(neigh);
4160        if (!e) {
4161                pr_err("%s - failed to allocate l2t entry!\n",
4162                       __func__);
4163                goto free_dst;
4164        }
4165
4166        step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan;
4167        rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step];
4168        window = (__force u16) htons((__force u16)tcph->window);
4169
4170        /* Calcuate filter portion for LE region. */
4171        filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple(
4172                                                    dev->rdev.lldi.ports[0],
4173                                                    e));
4174
4175        /*
4176         * Synthesize the cpl_pass_accept_req. We have everything except the
4177         * TID. Once firmware sends a reply with TID we update the TID field
4178         * in cpl and pass it through the regular cpl_pass_accept_req path.
4179         */
4180        build_cpl_pass_accept_req(skb, stid, iph->tos);
4181        send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr,
4182                              tcph->source, ntohl(tcph->seq), filter, window,
4183                              rss_qid, pi->port_id);
4184        cxgb4_l2t_release(e);
4185free_dst:
4186        dst_release(dst);
4187reject:
4188        if (lep)
4189                c4iw_put_ep(&lep->com);
4190        return 0;
4191}
4192
4193/*
4194 * These are the real handlers that are called from a
4195 * work queue.
4196 */
4197static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = {
4198        [CPL_ACT_ESTABLISH] = act_establish,
4199        [CPL_ACT_OPEN_RPL] = act_open_rpl,
4200        [CPL_RX_DATA] = rx_data,
4201        [CPL_ABORT_RPL_RSS] = abort_rpl,
4202        [CPL_ABORT_RPL] = abort_rpl,
4203        [CPL_PASS_OPEN_RPL] = pass_open_rpl,
4204        [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
4205        [CPL_PASS_ACCEPT_REQ] = pass_accept_req,
4206        [CPL_PASS_ESTABLISH] = pass_establish,
4207        [CPL_PEER_CLOSE] = peer_close,
4208        [CPL_ABORT_REQ_RSS] = peer_abort,
4209        [CPL_CLOSE_CON_RPL] = close_con_rpl,
4210        [CPL_RDMA_TERMINATE] = terminate,
4211        [CPL_FW4_ACK] = fw4_ack,
4212        [CPL_GET_TCB_RPL] = read_tcb_rpl,
4213        [CPL_FW6_MSG] = deferred_fw6_msg,
4214        [CPL_RX_PKT] = rx_pkt,
4215        [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe,
4216        [FAKE_CPL_PASS_PUT_EP_SAFE] = _put_pass_ep_safe
4217};
4218
4219static void process_timeout(struct c4iw_ep *ep)
4220{
4221        struct c4iw_qp_attributes attrs;
4222        int abort = 1;
4223
4224        mutex_lock(&ep->com.mutex);
4225        pr_debug("ep %p tid %u state %d\n", ep, ep->hwtid, ep->com.state);
4226        set_bit(TIMEDOUT, &ep->com.history);
4227        switch (ep->com.state) {
4228        case MPA_REQ_SENT:
4229                connect_reply_upcall(ep, -ETIMEDOUT);
4230                break;
4231        case MPA_REQ_WAIT:
4232        case MPA_REQ_RCVD:
4233        case MPA_REP_SENT:
4234        case FPDU_MODE:
4235                break;
4236        case CLOSING:
4237        case MORIBUND:
4238                if (ep->com.cm_id && ep->com.qp) {
4239                        attrs.next_state = C4IW_QP_STATE_ERROR;
4240                        c4iw_modify_qp(ep->com.qp->rhp,
4241                                     ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
4242                                     &attrs, 1);
4243                }
4244                close_complete_upcall(ep, -ETIMEDOUT);
4245                break;
4246        case ABORTING:
4247        case DEAD:
4248
4249                /*
4250                 * These states are expected if the ep timed out at the same
4251                 * time as another thread was calling stop_ep_timer().
4252                 * So we silently do nothing for these states.
4253                 */
4254                abort = 0;
4255                break;
4256        default:
4257                WARN(1, "%s unexpected state ep %p tid %u state %u\n",
4258                        __func__, ep, ep->hwtid, ep->com.state);
4259                abort = 0;
4260        }
4261        mutex_unlock(&ep->com.mutex);
4262        if (abort)
4263                c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
4264        c4iw_put_ep(&ep->com);
4265}
4266
4267static void process_timedout_eps(void)
4268{
4269        struct c4iw_ep *ep;
4270
4271        spin_lock_irq(&timeout_lock);
4272        while (!list_empty(&timeout_list)) {
4273                struct list_head *tmp;
4274
4275                tmp = timeout_list.next;
4276                list_del(tmp);
4277                tmp->next = NULL;
4278                tmp->prev = NULL;
4279                spin_unlock_irq(&timeout_lock);
4280                ep = list_entry(tmp, struct c4iw_ep, entry);
4281                process_timeout(ep);
4282                spin_lock_irq(&timeout_lock);
4283        }
4284        spin_unlock_irq(&timeout_lock);
4285}
4286
4287static void process_work(struct work_struct *work)
4288{
4289        struct sk_buff *skb = NULL;
4290        struct c4iw_dev *dev;
4291        struct cpl_act_establish *rpl;
4292        unsigned int opcode;
4293        int ret;
4294
4295        process_timedout_eps();
4296        while ((skb = skb_dequeue(&rxq))) {
4297                rpl = cplhdr(skb);
4298                dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
4299                opcode = rpl->ot.opcode;
4300
4301                if (opcode >= ARRAY_SIZE(work_handlers) ||
4302                    !work_handlers[opcode]) {
4303                        pr_err("No handler for opcode 0x%x.\n", opcode);
4304                        kfree_skb(skb);
4305                } else {
4306                        ret = work_handlers[opcode](dev, skb);
4307                        if (!ret)
4308                                kfree_skb(skb);
4309                }
4310                process_timedout_eps();
4311        }
4312}
4313
4314static DECLARE_WORK(skb_work, process_work);
4315
4316static void ep_timeout(struct timer_list *t)
4317{
4318        struct c4iw_ep *ep = from_timer(ep, t, timer);
4319        int kickit = 0;
4320
4321        spin_lock(&timeout_lock);
4322        if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
4323                /*
4324                 * Only insert if it is not already on the list.
4325                 */
4326                if (!ep->entry.next) {
4327                        list_add_tail(&ep->entry, &timeout_list);
4328                        kickit = 1;
4329                }
4330        }
4331        spin_unlock(&timeout_lock);
4332        if (kickit)
4333                queue_work(workq, &skb_work);
4334}
4335
4336/*
4337 * All the CM events are handled on a work queue to have a safe context.
4338 */
4339static int sched(struct c4iw_dev *dev, struct sk_buff *skb)
4340{
4341
4342        /*
4343         * Save dev in the skb->cb area.
4344         */
4345        *((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev;
4346
4347        /*
4348         * Queue the skb and schedule the worker thread.
4349         */
4350        skb_queue_tail(&rxq, skb);
4351        queue_work(workq, &skb_work);
4352        return 0;
4353}
4354
4355static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
4356{
4357        struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
4358
4359        if (rpl->status != CPL_ERR_NONE) {
4360                pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
4361                       rpl->status, GET_TID(rpl));
4362        }
4363        kfree_skb(skb);
4364        return 0;
4365}
4366
4367static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
4368{
4369        struct cpl_fw6_msg *rpl = cplhdr(skb);
4370        struct c4iw_wr_wait *wr_waitp;
4371        int ret;
4372
4373        pr_debug("type %u\n", rpl->type);
4374
4375        switch (rpl->type) {
4376        case FW6_TYPE_WR_RPL:
4377                ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
4378                wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
4379                pr_debug("wr_waitp %p ret %u\n", wr_waitp, ret);
4380                if (wr_waitp)
4381                        c4iw_wake_up_deref(wr_waitp, ret ? -ret : 0);
4382                kfree_skb(skb);
4383                break;
4384        case FW6_TYPE_CQE:
4385        case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
4386                sched(dev, skb);
4387                break;
4388        default:
4389                pr_err("%s unexpected fw6 msg type %u\n",
4390                       __func__, rpl->type);
4391                kfree_skb(skb);
4392                break;
4393        }
4394        return 0;
4395}
4396
4397static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
4398{
4399        struct cpl_abort_req_rss *req = cplhdr(skb);
4400        struct c4iw_ep *ep;
4401        unsigned int tid = GET_TID(req);
4402
4403        ep = get_ep_from_tid(dev, tid);
4404        /* This EP will be dereferenced in peer_abort() */
4405        if (!ep) {
4406                pr_warn("Abort on non-existent endpoint, tid %d\n", tid);
4407                kfree_skb(skb);
4408                return 0;
4409        }
4410        if (cxgb_is_neg_adv(req->status)) {
4411                pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
4412                         ep->hwtid, req->status,
4413                         neg_adv_str(req->status));
4414                goto out;
4415        }
4416        pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state);
4417
4418        c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
4419out:
4420        sched(dev, skb);
4421        return 0;
4422}
4423
4424/*
4425 * Most upcalls from the T4 Core go to sched() to
4426 * schedule the processing on a work queue.
4427 */
4428c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
4429        [CPL_ACT_ESTABLISH] = sched,
4430        [CPL_ACT_OPEN_RPL] = sched,
4431        [CPL_RX_DATA] = sched,
4432        [CPL_ABORT_RPL_RSS] = sched,
4433        [CPL_ABORT_RPL] = sched,
4434        [CPL_PASS_OPEN_RPL] = sched,
4435        [CPL_CLOSE_LISTSRV_RPL] = sched,
4436        [CPL_PASS_ACCEPT_REQ] = sched,
4437        [CPL_PASS_ESTABLISH] = sched,
4438        [CPL_PEER_CLOSE] = sched,
4439        [CPL_CLOSE_CON_RPL] = sched,
4440        [CPL_ABORT_REQ_RSS] = peer_abort_intr,
4441        [CPL_RDMA_TERMINATE] = sched,
4442        [CPL_FW4_ACK] = sched,
4443        [CPL_SET_TCB_RPL] = set_tcb_rpl,
4444        [CPL_GET_TCB_RPL] = sched,
4445        [CPL_FW6_MSG] = fw6_msg,
4446        [CPL_RX_PKT] = sched
4447};
4448
4449int __init c4iw_cm_init(void)
4450{
4451        spin_lock_init(&timeout_lock);
4452        skb_queue_head_init(&rxq);
4453
4454        workq = alloc_ordered_workqueue("iw_cxgb4", WQ_MEM_RECLAIM);
4455        if (!workq)
4456                return -ENOMEM;
4457
4458        return 0;
4459}
4460
4461void c4iw_cm_term(void)
4462{
4463        WARN_ON(!list_empty(&timeout_list));
4464        flush_workqueue(workq);
4465        destroy_workqueue(workq);
4466}
4467