linux/drivers/infiniband/hw/cxgb4/cm.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/module.h>
  33#include <linux/list.h>
  34#include <linux/workqueue.h>
  35#include <linux/skbuff.h>
  36#include <linux/timer.h>
  37#include <linux/notifier.h>
  38#include <linux/inetdevice.h>
  39#include <linux/ip.h>
  40#include <linux/tcp.h>
  41#include <linux/if_vlan.h>
  42
  43#include <net/neighbour.h>
  44#include <net/netevent.h>
  45#include <net/route.h>
  46#include <net/tcp.h>
  47#include <net/ip6_route.h>
  48#include <net/addrconf.h>
  49
  50#include <rdma/ib_addr.h>
  51
  52#include <libcxgb_cm.h>
  53#include "iw_cxgb4.h"
  54#include "clip_tbl.h"
  55
  56static char *states[] = {
  57        "idle",
  58        "listen",
  59        "connecting",
  60        "mpa_wait_req",
  61        "mpa_req_sent",
  62        "mpa_req_rcvd",
  63        "mpa_rep_sent",
  64        "fpdu_mode",
  65        "aborting",
  66        "closing",
  67        "moribund",
  68        "dead",
  69        NULL,
  70};
  71
  72static int nocong;
  73module_param(nocong, int, 0644);
  74MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)");
  75
  76static int enable_ecn;
  77module_param(enable_ecn, int, 0644);
  78MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)");
  79
  80static int dack_mode;
  81module_param(dack_mode, int, 0644);
  82MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)");
  83
  84uint c4iw_max_read_depth = 32;
  85module_param(c4iw_max_read_depth, int, 0644);
  86MODULE_PARM_DESC(c4iw_max_read_depth,
  87                 "Per-connection max ORD/IRD (default=32)");
  88
  89static int enable_tcp_timestamps;
  90module_param(enable_tcp_timestamps, int, 0644);
  91MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)");
  92
  93static int enable_tcp_sack;
  94module_param(enable_tcp_sack, int, 0644);
  95MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)");
  96
  97static int enable_tcp_window_scaling = 1;
  98module_param(enable_tcp_window_scaling, int, 0644);
  99MODULE_PARM_DESC(enable_tcp_window_scaling,
 100                 "Enable tcp window scaling (default=1)");
 101
 102static int peer2peer = 1;
 103module_param(peer2peer, int, 0644);
 104MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)");
 105
 106static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
 107module_param(p2p_type, int, 0644);
 108MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: "
 109                           "1=RDMA_READ 0=RDMA_WRITE (default 1)");
 110
 111static int ep_timeout_secs = 60;
 112module_param(ep_timeout_secs, int, 0644);
 113MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
 114                                   "in seconds (default=60)");
 115
 116static int mpa_rev = 2;
 117module_param(mpa_rev, int, 0644);
 118MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
 119                "1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft"
 120                " compliant (default=2)");
 121
 122static int markers_enabled;
 123module_param(markers_enabled, int, 0644);
 124MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
 125
 126static int crc_enabled = 1;
 127module_param(crc_enabled, int, 0644);
 128MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
 129
 130static int rcv_win = 256 * 1024;
 131module_param(rcv_win, int, 0644);
 132MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)");
 133
 134static int snd_win = 128 * 1024;
 135module_param(snd_win, int, 0644);
 136MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)");
 137
 138static struct workqueue_struct *workq;
 139
 140static struct sk_buff_head rxq;
 141
 142static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
 143static void ep_timeout(struct timer_list *t);
 144static void connect_reply_upcall(struct c4iw_ep *ep, int status);
 145static int sched(struct c4iw_dev *dev, struct sk_buff *skb);
 146
 147static LIST_HEAD(timeout_list);
 148static DEFINE_SPINLOCK(timeout_lock);
 149
 150static void deref_cm_id(struct c4iw_ep_common *epc)
 151{
 152        epc->cm_id->rem_ref(epc->cm_id);
 153        epc->cm_id = NULL;
 154        set_bit(CM_ID_DEREFED, &epc->history);
 155}
 156
 157static void ref_cm_id(struct c4iw_ep_common *epc)
 158{
 159        set_bit(CM_ID_REFED, &epc->history);
 160        epc->cm_id->add_ref(epc->cm_id);
 161}
 162
 163static void deref_qp(struct c4iw_ep *ep)
 164{
 165        c4iw_qp_rem_ref(&ep->com.qp->ibqp);
 166        clear_bit(QP_REFERENCED, &ep->com.flags);
 167        set_bit(QP_DEREFED, &ep->com.history);
 168}
 169
 170static void ref_qp(struct c4iw_ep *ep)
 171{
 172        set_bit(QP_REFERENCED, &ep->com.flags);
 173        set_bit(QP_REFED, &ep->com.history);
 174        c4iw_qp_add_ref(&ep->com.qp->ibqp);
 175}
 176
 177static void start_ep_timer(struct c4iw_ep *ep)
 178{
 179        pr_debug("ep %p\n", ep);
 180        if (timer_pending(&ep->timer)) {
 181                pr_err("%s timer already started! ep %p\n",
 182                       __func__, ep);
 183                return;
 184        }
 185        clear_bit(TIMEOUT, &ep->com.flags);
 186        c4iw_get_ep(&ep->com);
 187        ep->timer.expires = jiffies + ep_timeout_secs * HZ;
 188        add_timer(&ep->timer);
 189}
 190
 191static int stop_ep_timer(struct c4iw_ep *ep)
 192{
 193        pr_debug("ep %p stopping\n", ep);
 194        del_timer_sync(&ep->timer);
 195        if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
 196                c4iw_put_ep(&ep->com);
 197                return 0;
 198        }
 199        return 1;
 200}
 201
 202static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
 203                  struct l2t_entry *l2e)
 204{
 205        int     error = 0;
 206
 207        if (c4iw_fatal_error(rdev)) {
 208                kfree_skb(skb);
 209                pr_err("%s - device in error state - dropping\n", __func__);
 210                return -EIO;
 211        }
 212        error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
 213        if (error < 0)
 214                kfree_skb(skb);
 215        else if (error == NET_XMIT_DROP)
 216                return -ENOMEM;
 217        return error < 0 ? error : 0;
 218}
 219
 220int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
 221{
 222        int     error = 0;
 223
 224        if (c4iw_fatal_error(rdev)) {
 225                kfree_skb(skb);
 226                pr_err("%s - device in error state - dropping\n", __func__);
 227                return -EIO;
 228        }
 229        error = cxgb4_ofld_send(rdev->lldi.ports[0], skb);
 230        if (error < 0)
 231                kfree_skb(skb);
 232        return error < 0 ? error : 0;
 233}
 234
 235static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
 236{
 237        u32 len = roundup(sizeof(struct cpl_tid_release), 16);
 238
 239        skb = get_skb(skb, len, GFP_KERNEL);
 240        if (!skb)
 241                return;
 242
 243        cxgb_mk_tid_release(skb, len, hwtid, 0);
 244        c4iw_ofld_send(rdev, skb);
 245        return;
 246}
 247
 248static void set_emss(struct c4iw_ep *ep, u16 opt)
 249{
 250        ep->emss = ep->com.dev->rdev.lldi.mtus[TCPOPT_MSS_G(opt)] -
 251                   ((AF_INET == ep->com.remote_addr.ss_family) ?
 252                    sizeof(struct iphdr) : sizeof(struct ipv6hdr)) -
 253                   sizeof(struct tcphdr);
 254        ep->mss = ep->emss;
 255        if (TCPOPT_TSTAMP_G(opt))
 256                ep->emss -= round_up(TCPOLEN_TIMESTAMP, 4);
 257        if (ep->emss < 128)
 258                ep->emss = 128;
 259        if (ep->emss & 7)
 260                pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n",
 261                         TCPOPT_MSS_G(opt), ep->mss, ep->emss);
 262        pr_debug("mss_idx %u mss %u emss=%u\n", TCPOPT_MSS_G(opt), ep->mss,
 263                 ep->emss);
 264}
 265
 266static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc)
 267{
 268        enum c4iw_ep_state state;
 269
 270        mutex_lock(&epc->mutex);
 271        state = epc->state;
 272        mutex_unlock(&epc->mutex);
 273        return state;
 274}
 275
 276static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
 277{
 278        epc->state = new;
 279}
 280
 281static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
 282{
 283        mutex_lock(&epc->mutex);
 284        pr_debug("%s -> %s\n", states[epc->state], states[new]);
 285        __state_set(epc, new);
 286        mutex_unlock(&epc->mutex);
 287        return;
 288}
 289
 290static int alloc_ep_skb_list(struct sk_buff_head *ep_skb_list, int size)
 291{
 292        struct sk_buff *skb;
 293        unsigned int i;
 294        size_t len;
 295
 296        len = roundup(sizeof(union cpl_wr_size), 16);
 297        for (i = 0; i < size; i++) {
 298                skb = alloc_skb(len, GFP_KERNEL);
 299                if (!skb)
 300                        goto fail;
 301                skb_queue_tail(ep_skb_list, skb);
 302        }
 303        return 0;
 304fail:
 305        skb_queue_purge(ep_skb_list);
 306        return -ENOMEM;
 307}
 308
 309static void *alloc_ep(int size, gfp_t gfp)
 310{
 311        struct c4iw_ep_common *epc;
 312
 313        epc = kzalloc(size, gfp);
 314        if (epc) {
 315                epc->wr_waitp = c4iw_alloc_wr_wait(gfp);
 316                if (!epc->wr_waitp) {
 317                        kfree(epc);
 318                        epc = NULL;
 319                        goto out;
 320                }
 321                kref_init(&epc->kref);
 322                mutex_init(&epc->mutex);
 323                c4iw_init_wr_wait(epc->wr_waitp);
 324        }
 325        pr_debug("alloc ep %p\n", epc);
 326out:
 327        return epc;
 328}
 329
 330static void remove_ep_tid(struct c4iw_ep *ep)
 331{
 332        unsigned long flags;
 333
 334        xa_lock_irqsave(&ep->com.dev->hwtids, flags);
 335        __xa_erase(&ep->com.dev->hwtids, ep->hwtid);
 336        if (xa_empty(&ep->com.dev->hwtids))
 337                wake_up(&ep->com.dev->wait);
 338        xa_unlock_irqrestore(&ep->com.dev->hwtids, flags);
 339}
 340
 341static int insert_ep_tid(struct c4iw_ep *ep)
 342{
 343        unsigned long flags;
 344        int err;
 345
 346        xa_lock_irqsave(&ep->com.dev->hwtids, flags);
 347        err = __xa_insert(&ep->com.dev->hwtids, ep->hwtid, ep, GFP_KERNEL);
 348        xa_unlock_irqrestore(&ep->com.dev->hwtids, flags);
 349
 350        return err;
 351}
 352
 353/*
 354 * Atomically lookup the ep ptr given the tid and grab a reference on the ep.
 355 */
 356static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid)
 357{
 358        struct c4iw_ep *ep;
 359        unsigned long flags;
 360
 361        xa_lock_irqsave(&dev->hwtids, flags);
 362        ep = xa_load(&dev->hwtids, tid);
 363        if (ep)
 364                c4iw_get_ep(&ep->com);
 365        xa_unlock_irqrestore(&dev->hwtids, flags);
 366        return ep;
 367}
 368
 369/*
 370 * Atomically lookup the ep ptr given the stid and grab a reference on the ep.
 371 */
 372static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev,
 373                                               unsigned int stid)
 374{
 375        struct c4iw_listen_ep *ep;
 376        unsigned long flags;
 377
 378        xa_lock_irqsave(&dev->stids, flags);
 379        ep = xa_load(&dev->stids, stid);
 380        if (ep)
 381                c4iw_get_ep(&ep->com);
 382        xa_unlock_irqrestore(&dev->stids, flags);
 383        return ep;
 384}
 385
 386void _c4iw_free_ep(struct kref *kref)
 387{
 388        struct c4iw_ep *ep;
 389
 390        ep = container_of(kref, struct c4iw_ep, com.kref);
 391        pr_debug("ep %p state %s\n", ep, states[ep->com.state]);
 392        if (test_bit(QP_REFERENCED, &ep->com.flags))
 393                deref_qp(ep);
 394        if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
 395                if (ep->com.remote_addr.ss_family == AF_INET6) {
 396                        struct sockaddr_in6 *sin6 =
 397                                        (struct sockaddr_in6 *)
 398                                        &ep->com.local_addr;
 399
 400                        cxgb4_clip_release(
 401                                        ep->com.dev->rdev.lldi.ports[0],
 402                                        (const u32 *)&sin6->sin6_addr.s6_addr,
 403                                        1);
 404                }
 405                cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
 406                                 ep->com.local_addr.ss_family);
 407                dst_release(ep->dst);
 408                cxgb4_l2t_release(ep->l2t);
 409                kfree_skb(ep->mpa_skb);
 410        }
 411        if (!skb_queue_empty(&ep->com.ep_skb_list))
 412                skb_queue_purge(&ep->com.ep_skb_list);
 413        c4iw_put_wr_wait(ep->com.wr_waitp);
 414        kfree(ep);
 415}
 416
 417static void release_ep_resources(struct c4iw_ep *ep)
 418{
 419        set_bit(RELEASE_RESOURCES, &ep->com.flags);
 420
 421        /*
 422         * If we have a hwtid, then remove it from the idr table
 423         * so lookups will no longer find this endpoint.  Otherwise
 424         * we have a race where one thread finds the ep ptr just
 425         * before the other thread is freeing the ep memory.
 426         */
 427        if (ep->hwtid != -1)
 428                remove_ep_tid(ep);
 429        c4iw_put_ep(&ep->com);
 430}
 431
 432static int status2errno(int status)
 433{
 434        switch (status) {
 435        case CPL_ERR_NONE:
 436                return 0;
 437        case CPL_ERR_CONN_RESET:
 438                return -ECONNRESET;
 439        case CPL_ERR_ARP_MISS:
 440                return -EHOSTUNREACH;
 441        case CPL_ERR_CONN_TIMEDOUT:
 442                return -ETIMEDOUT;
 443        case CPL_ERR_TCAM_FULL:
 444                return -ENOMEM;
 445        case CPL_ERR_CONN_EXIST:
 446                return -EADDRINUSE;
 447        default:
 448                return -EIO;
 449        }
 450}
 451
 452/*
 453 * Try and reuse skbs already allocated...
 454 */
 455static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
 456{
 457        if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
 458                skb_trim(skb, 0);
 459                skb_get(skb);
 460                skb_reset_transport_header(skb);
 461        } else {
 462                skb = alloc_skb(len, gfp);
 463                if (!skb)
 464                        return NULL;
 465        }
 466        t4_set_arp_err_handler(skb, NULL, NULL);
 467        return skb;
 468}
 469
 470static struct net_device *get_real_dev(struct net_device *egress_dev)
 471{
 472        return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
 473}
 474
 475static void arp_failure_discard(void *handle, struct sk_buff *skb)
 476{
 477        pr_err("ARP failure\n");
 478        kfree_skb(skb);
 479}
 480
 481static void mpa_start_arp_failure(void *handle, struct sk_buff *skb)
 482{
 483        pr_err("ARP failure during MPA Negotiation - Closing Connection\n");
 484}
 485
 486enum {
 487        NUM_FAKE_CPLS = 2,
 488        FAKE_CPL_PUT_EP_SAFE = NUM_CPL_CMDS + 0,
 489        FAKE_CPL_PASS_PUT_EP_SAFE = NUM_CPL_CMDS + 1,
 490};
 491
 492static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
 493{
 494        struct c4iw_ep *ep;
 495
 496        ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
 497        release_ep_resources(ep);
 498        return 0;
 499}
 500
 501static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
 502{
 503        struct c4iw_ep *ep;
 504
 505        ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
 506        c4iw_put_ep(&ep->parent_ep->com);
 507        release_ep_resources(ep);
 508        return 0;
 509}
 510
 511/*
 512 * Fake up a special CPL opcode and call sched() so process_work() will call
 513 * _put_ep_safe() in a safe context to free the ep resources.  This is needed
 514 * because ARP error handlers are called in an ATOMIC context, and
 515 * _c4iw_free_ep() needs to block.
 516 */
 517static void queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb,
 518                                  int cpl)
 519{
 520        struct cpl_act_establish *rpl = cplhdr(skb);
 521
 522        /* Set our special ARP_FAILURE opcode */
 523        rpl->ot.opcode = cpl;
 524
 525        /*
 526         * Save ep in the skb->cb area, after where sched() will save the dev
 527         * ptr.
 528         */
 529        *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))) = ep;
 530        sched(ep->com.dev, skb);
 531}
 532
 533/* Handle an ARP failure for an accept */
 534static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb)
 535{
 536        struct c4iw_ep *ep = handle;
 537
 538        pr_err("ARP failure during accept - tid %u - dropping connection\n",
 539               ep->hwtid);
 540
 541        __state_set(&ep->com, DEAD);
 542        queue_arp_failure_cpl(ep, skb, FAKE_CPL_PASS_PUT_EP_SAFE);
 543}
 544
 545/*
 546 * Handle an ARP failure for an active open.
 547 */
 548static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
 549{
 550        struct c4iw_ep *ep = handle;
 551
 552        pr_err("ARP failure during connect\n");
 553        connect_reply_upcall(ep, -EHOSTUNREACH);
 554        __state_set(&ep->com, DEAD);
 555        if (ep->com.remote_addr.ss_family == AF_INET6) {
 556                struct sockaddr_in6 *sin6 =
 557                        (struct sockaddr_in6 *)&ep->com.local_addr;
 558                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 559                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 560        }
 561        xa_erase_irq(&ep->com.dev->atids, ep->atid);
 562        cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
 563        queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
 564}
 565
 566/*
 567 * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
 568 * and send it along.
 569 */
 570static void abort_arp_failure(void *handle, struct sk_buff *skb)
 571{
 572        int ret;
 573        struct c4iw_ep *ep = handle;
 574        struct c4iw_rdev *rdev = &ep->com.dev->rdev;
 575        struct cpl_abort_req *req = cplhdr(skb);
 576
 577        pr_debug("rdev %p\n", rdev);
 578        req->cmd = CPL_ABORT_NO_RST;
 579        skb_get(skb);
 580        ret = c4iw_ofld_send(rdev, skb);
 581        if (ret) {
 582                __state_set(&ep->com, DEAD);
 583                queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
 584        } else
 585                kfree_skb(skb);
 586}
 587
 588static int send_flowc(struct c4iw_ep *ep)
 589{
 590        struct fw_flowc_wr *flowc;
 591        struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
 592        u16 vlan = ep->l2t->vlan;
 593        int nparams;
 594        int flowclen, flowclen16;
 595
 596        if (WARN_ON(!skb))
 597                return -ENOMEM;
 598
 599        if (vlan == CPL_L2T_VLAN_NONE)
 600                nparams = 9;
 601        else
 602                nparams = 10;
 603
 604        flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
 605        flowclen16 = DIV_ROUND_UP(flowclen, 16);
 606        flowclen = flowclen16 * 16;
 607
 608        flowc = __skb_put(skb, flowclen);
 609        memset(flowc, 0, flowclen);
 610
 611        flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
 612                                           FW_FLOWC_WR_NPARAMS_V(nparams));
 613        flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
 614                                          FW_WR_FLOWID_V(ep->hwtid));
 615
 616        flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
 617        flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V
 618                                            (ep->com.dev->rdev.lldi.pf));
 619        flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
 620        flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan);
 621        flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
 622        flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan);
 623        flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
 624        flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid);
 625        flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT;
 626        flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq);
 627        flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
 628        flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq);
 629        flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
 630        flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
 631        flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
 632        flowc->mnemval[7].val = cpu_to_be32(ep->emss);
 633        flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_RCV_SCALE;
 634        flowc->mnemval[8].val = cpu_to_be32(ep->snd_wscale);
 635        if (nparams == 10) {
 636                u16 pri;
 637                pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
 638                flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
 639                flowc->mnemval[9].val = cpu_to_be32(pri);
 640        }
 641
 642        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
 643        return c4iw_ofld_send(&ep->com.dev->rdev, skb);
 644}
 645
 646static int send_halfclose(struct c4iw_ep *ep)
 647{
 648        struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
 649        u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
 650
 651        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
 652        if (WARN_ON(!skb))
 653                return -ENOMEM;
 654
 655        cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx,
 656                              NULL, arp_failure_discard);
 657
 658        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 659}
 660
 661static void read_tcb(struct c4iw_ep *ep)
 662{
 663        struct sk_buff *skb;
 664        struct cpl_get_tcb *req;
 665        int wrlen = roundup(sizeof(*req), 16);
 666
 667        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 668        if (WARN_ON(!skb))
 669                return;
 670
 671        set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
 672        req = (struct cpl_get_tcb *) skb_put(skb, wrlen);
 673        memset(req, 0, wrlen);
 674        INIT_TP_WR(req, ep->hwtid);
 675        OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_GET_TCB, ep->hwtid));
 676        req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(ep->rss_qid));
 677
 678        /*
 679         * keep a ref on the ep so the tcb is not unlocked before this
 680         * cpl completes. The ref is released in read_tcb_rpl().
 681         */
 682        c4iw_get_ep(&ep->com);
 683        if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb)))
 684                c4iw_put_ep(&ep->com);
 685}
 686
 687static int send_abort_req(struct c4iw_ep *ep)
 688{
 689        u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
 690        struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
 691
 692        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
 693        if (WARN_ON(!req_skb))
 694                return -ENOMEM;
 695
 696        cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx,
 697                          ep, abort_arp_failure);
 698
 699        return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
 700}
 701
 702static int send_abort(struct c4iw_ep *ep)
 703{
 704        if (!ep->com.qp || !ep->com.qp->srq) {
 705                send_abort_req(ep);
 706                return 0;
 707        }
 708        set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags);
 709        read_tcb(ep);
 710        return 0;
 711}
 712
 713static int send_connect(struct c4iw_ep *ep)
 714{
 715        struct cpl_act_open_req *req = NULL;
 716        struct cpl_t5_act_open_req *t5req = NULL;
 717        struct cpl_t6_act_open_req *t6req = NULL;
 718        struct cpl_act_open_req6 *req6 = NULL;
 719        struct cpl_t5_act_open_req6 *t5req6 = NULL;
 720        struct cpl_t6_act_open_req6 *t6req6 = NULL;
 721        struct sk_buff *skb;
 722        u64 opt0;
 723        u32 opt2;
 724        unsigned int mtu_idx;
 725        u32 wscale;
 726        int win, sizev4, sizev6, wrlen;
 727        struct sockaddr_in *la = (struct sockaddr_in *)
 728                                 &ep->com.local_addr;
 729        struct sockaddr_in *ra = (struct sockaddr_in *)
 730                                 &ep->com.remote_addr;
 731        struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)
 732                                   &ep->com.local_addr;
 733        struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
 734                                   &ep->com.remote_addr;
 735        int ret;
 736        enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
 737        u32 isn = (prandom_u32() & ~7UL) - 1;
 738        struct net_device *netdev;
 739        u64 params;
 740
 741        netdev = ep->com.dev->rdev.lldi.ports[0];
 742
 743        switch (CHELSIO_CHIP_VERSION(adapter_type)) {
 744        case CHELSIO_T4:
 745                sizev4 = sizeof(struct cpl_act_open_req);
 746                sizev6 = sizeof(struct cpl_act_open_req6);
 747                break;
 748        case CHELSIO_T5:
 749                sizev4 = sizeof(struct cpl_t5_act_open_req);
 750                sizev6 = sizeof(struct cpl_t5_act_open_req6);
 751                break;
 752        case CHELSIO_T6:
 753                sizev4 = sizeof(struct cpl_t6_act_open_req);
 754                sizev6 = sizeof(struct cpl_t6_act_open_req6);
 755                break;
 756        default:
 757                pr_err("T%d Chip is not supported\n",
 758                       CHELSIO_CHIP_VERSION(adapter_type));
 759                return -EINVAL;
 760        }
 761
 762        wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
 763                        roundup(sizev4, 16) :
 764                        roundup(sizev6, 16);
 765
 766        pr_debug("ep %p atid %u\n", ep, ep->atid);
 767
 768        skb = get_skb(NULL, wrlen, GFP_KERNEL);
 769        if (!skb) {
 770                pr_err("%s - failed to alloc skb\n", __func__);
 771                return -ENOMEM;
 772        }
 773        set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
 774
 775        cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
 776                      enable_tcp_timestamps,
 777                      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
 778        wscale = cxgb_compute_wscale(rcv_win);
 779
 780        /*
 781         * Specify the largest window that will fit in opt0. The
 782         * remainder will be specified in the rx_data_ack.
 783         */
 784        win = ep->rcv_win >> 10;
 785        if (win > RCV_BUFSIZ_M)
 786                win = RCV_BUFSIZ_M;
 787
 788        opt0 = (nocong ? NO_CONG_F : 0) |
 789               KEEP_ALIVE_F |
 790               DELACK_F |
 791               WND_SCALE_V(wscale) |
 792               MSS_IDX_V(mtu_idx) |
 793               L2T_IDX_V(ep->l2t->idx) |
 794               TX_CHAN_V(ep->tx_chan) |
 795               SMAC_SEL_V(ep->smac_idx) |
 796               DSCP_V(ep->tos >> 2) |
 797               ULP_MODE_V(ULP_MODE_TCPDDP) |
 798               RCV_BUFSIZ_V(win);
 799        opt2 = RX_CHANNEL_V(0) |
 800               CCTRL_ECN_V(enable_ecn) |
 801               RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
 802        if (enable_tcp_timestamps)
 803                opt2 |= TSTAMPS_EN_F;
 804        if (enable_tcp_sack)
 805                opt2 |= SACK_EN_F;
 806        if (wscale && enable_tcp_window_scaling)
 807                opt2 |= WND_SCALE_EN_F;
 808        if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
 809                if (peer2peer)
 810                        isn += 4;
 811
 812                opt2 |= T5_OPT_2_VALID_F;
 813                opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
 814                opt2 |= T5_ISS_F;
 815        }
 816
 817        params = cxgb4_select_ntuple(netdev, ep->l2t);
 818
 819        if (ep->com.remote_addr.ss_family == AF_INET6)
 820                cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
 821                               (const u32 *)&la6->sin6_addr.s6_addr, 1);
 822
 823        t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
 824
 825        if (ep->com.remote_addr.ss_family == AF_INET) {
 826                switch (CHELSIO_CHIP_VERSION(adapter_type)) {
 827                case CHELSIO_T4:
 828                        req = skb_put(skb, wrlen);
 829                        INIT_TP_WR(req, 0);
 830                        break;
 831                case CHELSIO_T5:
 832                        t5req = skb_put(skb, wrlen);
 833                        INIT_TP_WR(t5req, 0);
 834                        req = (struct cpl_act_open_req *)t5req;
 835                        break;
 836                case CHELSIO_T6:
 837                        t6req = skb_put(skb, wrlen);
 838                        INIT_TP_WR(t6req, 0);
 839                        req = (struct cpl_act_open_req *)t6req;
 840                        t5req = (struct cpl_t5_act_open_req *)t6req;
 841                        break;
 842                default:
 843                        pr_err("T%d Chip is not supported\n",
 844                               CHELSIO_CHIP_VERSION(adapter_type));
 845                        ret = -EINVAL;
 846                        goto clip_release;
 847                }
 848
 849                OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
 850                                        ((ep->rss_qid<<14) | ep->atid)));
 851                req->local_port = la->sin_port;
 852                req->peer_port = ra->sin_port;
 853                req->local_ip = la->sin_addr.s_addr;
 854                req->peer_ip = ra->sin_addr.s_addr;
 855                req->opt0 = cpu_to_be64(opt0);
 856
 857                if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
 858                        req->params = cpu_to_be32(params);
 859                        req->opt2 = cpu_to_be32(opt2);
 860                } else {
 861                        if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
 862                                t5req->params =
 863                                          cpu_to_be64(FILTER_TUPLE_V(params));
 864                                t5req->rsvd = cpu_to_be32(isn);
 865                                pr_debug("snd_isn %u\n", t5req->rsvd);
 866                                t5req->opt2 = cpu_to_be32(opt2);
 867                        } else {
 868                                t6req->params =
 869                                          cpu_to_be64(FILTER_TUPLE_V(params));
 870                                t6req->rsvd = cpu_to_be32(isn);
 871                                pr_debug("snd_isn %u\n", t6req->rsvd);
 872                                t6req->opt2 = cpu_to_be32(opt2);
 873                        }
 874                }
 875        } else {
 876                switch (CHELSIO_CHIP_VERSION(adapter_type)) {
 877                case CHELSIO_T4:
 878                        req6 = skb_put(skb, wrlen);
 879                        INIT_TP_WR(req6, 0);
 880                        break;
 881                case CHELSIO_T5:
 882                        t5req6 = skb_put(skb, wrlen);
 883                        INIT_TP_WR(t5req6, 0);
 884                        req6 = (struct cpl_act_open_req6 *)t5req6;
 885                        break;
 886                case CHELSIO_T6:
 887                        t6req6 = skb_put(skb, wrlen);
 888                        INIT_TP_WR(t6req6, 0);
 889                        req6 = (struct cpl_act_open_req6 *)t6req6;
 890                        t5req6 = (struct cpl_t5_act_open_req6 *)t6req6;
 891                        break;
 892                default:
 893                        pr_err("T%d Chip is not supported\n",
 894                               CHELSIO_CHIP_VERSION(adapter_type));
 895                        ret = -EINVAL;
 896                        goto clip_release;
 897                }
 898
 899                OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
 900                                        ((ep->rss_qid<<14)|ep->atid)));
 901                req6->local_port = la6->sin6_port;
 902                req6->peer_port = ra6->sin6_port;
 903                req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr));
 904                req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8));
 905                req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr));
 906                req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8));
 907                req6->opt0 = cpu_to_be64(opt0);
 908
 909                if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
 910                        req6->params = cpu_to_be32(cxgb4_select_ntuple(netdev,
 911                                                                      ep->l2t));
 912                        req6->opt2 = cpu_to_be32(opt2);
 913                } else {
 914                        if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
 915                                t5req6->params =
 916                                            cpu_to_be64(FILTER_TUPLE_V(params));
 917                                t5req6->rsvd = cpu_to_be32(isn);
 918                                pr_debug("snd_isn %u\n", t5req6->rsvd);
 919                                t5req6->opt2 = cpu_to_be32(opt2);
 920                        } else {
 921                                t6req6->params =
 922                                            cpu_to_be64(FILTER_TUPLE_V(params));
 923                                t6req6->rsvd = cpu_to_be32(isn);
 924                                pr_debug("snd_isn %u\n", t6req6->rsvd);
 925                                t6req6->opt2 = cpu_to_be32(opt2);
 926                        }
 927
 928                }
 929        }
 930
 931        set_bit(ACT_OPEN_REQ, &ep->com.history);
 932        ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 933clip_release:
 934        if (ret && ep->com.remote_addr.ss_family == AF_INET6)
 935                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 936                                   (const u32 *)&la6->sin6_addr.s6_addr, 1);
 937        return ret;
 938}
 939
 940static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
 941                        u8 mpa_rev_to_use)
 942{
 943        int mpalen, wrlen, ret;
 944        struct fw_ofld_tx_data_wr *req;
 945        struct mpa_message *mpa;
 946        struct mpa_v2_conn_params mpa_v2_params;
 947
 948        pr_debug("ep %p tid %u pd_len %d\n",
 949                 ep, ep->hwtid, ep->plen);
 950
 951        mpalen = sizeof(*mpa) + ep->plen;
 952        if (mpa_rev_to_use == 2)
 953                mpalen += sizeof(struct mpa_v2_conn_params);
 954        wrlen = roundup(mpalen + sizeof(*req), 16);
 955        skb = get_skb(skb, wrlen, GFP_KERNEL);
 956        if (!skb) {
 957                connect_reply_upcall(ep, -ENOMEM);
 958                return -ENOMEM;
 959        }
 960        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
 961
 962        req = skb_put_zero(skb, wrlen);
 963        req->op_to_immdlen = cpu_to_be32(
 964                FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
 965                FW_WR_COMPL_F |
 966                FW_WR_IMMDLEN_V(mpalen));
 967        req->flowid_len16 = cpu_to_be32(
 968                FW_WR_FLOWID_V(ep->hwtid) |
 969                FW_WR_LEN16_V(wrlen >> 4));
 970        req->plen = cpu_to_be32(mpalen);
 971        req->tunnel_to_proxy = cpu_to_be32(
 972                FW_OFLD_TX_DATA_WR_FLUSH_F |
 973                FW_OFLD_TX_DATA_WR_SHOVE_F);
 974
 975        mpa = (struct mpa_message *)(req + 1);
 976        memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
 977
 978        mpa->flags = 0;
 979        if (crc_enabled)
 980                mpa->flags |= MPA_CRC;
 981        if (markers_enabled) {
 982                mpa->flags |= MPA_MARKERS;
 983                ep->mpa_attr.recv_marker_enabled = 1;
 984        } else {
 985                ep->mpa_attr.recv_marker_enabled = 0;
 986        }
 987        if (mpa_rev_to_use == 2)
 988                mpa->flags |= MPA_ENHANCED_RDMA_CONN;
 989
 990        mpa->private_data_size = htons(ep->plen);
 991        mpa->revision = mpa_rev_to_use;
 992        if (mpa_rev_to_use == 1) {
 993                ep->tried_with_mpa_v1 = 1;
 994                ep->retry_with_mpa_v1 = 0;
 995        }
 996
 997        if (mpa_rev_to_use == 2) {
 998                mpa->private_data_size =
 999                        htons(ntohs(mpa->private_data_size) +
1000                              sizeof(struct mpa_v2_conn_params));
1001                pr_debug("initiator ird %u ord %u\n", ep->ird,
1002                         ep->ord);
1003                mpa_v2_params.ird = htons((u16)ep->ird);
1004                mpa_v2_params.ord = htons((u16)ep->ord);
1005
1006                if (peer2peer) {
1007                        mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1008                        if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
1009                                mpa_v2_params.ord |=
1010                                        htons(MPA_V2_RDMA_WRITE_RTR);
1011                        else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
1012                                mpa_v2_params.ord |=
1013                                        htons(MPA_V2_RDMA_READ_RTR);
1014                }
1015                memcpy(mpa->private_data, &mpa_v2_params,
1016                       sizeof(struct mpa_v2_conn_params));
1017
1018                if (ep->plen)
1019                        memcpy(mpa->private_data +
1020                               sizeof(struct mpa_v2_conn_params),
1021                               ep->mpa_pkt + sizeof(*mpa), ep->plen);
1022        } else
1023                if (ep->plen)
1024                        memcpy(mpa->private_data,
1025                                        ep->mpa_pkt + sizeof(*mpa), ep->plen);
1026
1027        /*
1028         * Reference the mpa skb.  This ensures the data area
1029         * will remain in memory until the hw acks the tx.
1030         * Function fw4_ack() will deref it.
1031         */
1032        skb_get(skb);
1033        t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
1034        ep->mpa_skb = skb;
1035        ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1036        if (ret)
1037                return ret;
1038        start_ep_timer(ep);
1039        __state_set(&ep->com, MPA_REQ_SENT);
1040        ep->mpa_attr.initiator = 1;
1041        ep->snd_seq += mpalen;
1042        return ret;
1043}
1044
1045static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
1046{
1047        int mpalen, wrlen;
1048        struct fw_ofld_tx_data_wr *req;
1049        struct mpa_message *mpa;
1050        struct sk_buff *skb;
1051        struct mpa_v2_conn_params mpa_v2_params;
1052
1053        pr_debug("ep %p tid %u pd_len %d\n",
1054                 ep, ep->hwtid, ep->plen);
1055
1056        mpalen = sizeof(*mpa) + plen;
1057        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
1058                mpalen += sizeof(struct mpa_v2_conn_params);
1059        wrlen = roundup(mpalen + sizeof(*req), 16);
1060
1061        skb = get_skb(NULL, wrlen, GFP_KERNEL);
1062        if (!skb) {
1063                pr_err("%s - cannot alloc skb!\n", __func__);
1064                return -ENOMEM;
1065        }
1066        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1067
1068        req = skb_put_zero(skb, wrlen);
1069        req->op_to_immdlen = cpu_to_be32(
1070                FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
1071                FW_WR_COMPL_F |
1072                FW_WR_IMMDLEN_V(mpalen));
1073        req->flowid_len16 = cpu_to_be32(
1074                FW_WR_FLOWID_V(ep->hwtid) |
1075                FW_WR_LEN16_V(wrlen >> 4));
1076        req->plen = cpu_to_be32(mpalen);
1077        req->tunnel_to_proxy = cpu_to_be32(
1078                FW_OFLD_TX_DATA_WR_FLUSH_F |
1079                FW_OFLD_TX_DATA_WR_SHOVE_F);
1080
1081        mpa = (struct mpa_message *)(req + 1);
1082        memset(mpa, 0, sizeof(*mpa));
1083        memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1084        mpa->flags = MPA_REJECT;
1085        mpa->revision = ep->mpa_attr.version;
1086        mpa->private_data_size = htons(plen);
1087
1088        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1089                mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1090                mpa->private_data_size =
1091                        htons(ntohs(mpa->private_data_size) +
1092                              sizeof(struct mpa_v2_conn_params));
1093                mpa_v2_params.ird = htons(((u16)ep->ird) |
1094                                          (peer2peer ? MPA_V2_PEER2PEER_MODEL :
1095                                           0));
1096                mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
1097                                          (p2p_type ==
1098                                           FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
1099                                           MPA_V2_RDMA_WRITE_RTR : p2p_type ==
1100                                           FW_RI_INIT_P2PTYPE_READ_REQ ?
1101                                           MPA_V2_RDMA_READ_RTR : 0) : 0));
1102                memcpy(mpa->private_data, &mpa_v2_params,
1103                       sizeof(struct mpa_v2_conn_params));
1104
1105                if (ep->plen)
1106                        memcpy(mpa->private_data +
1107                               sizeof(struct mpa_v2_conn_params), pdata, plen);
1108        } else
1109                if (plen)
1110                        memcpy(mpa->private_data, pdata, plen);
1111
1112        /*
1113         * Reference the mpa skb again.  This ensures the data area
1114         * will remain in memory until the hw acks the tx.
1115         * Function fw4_ack() will deref it.
1116         */
1117        skb_get(skb);
1118        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1119        t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
1120        ep->mpa_skb = skb;
1121        ep->snd_seq += mpalen;
1122        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1123}
1124
1125static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
1126{
1127        int mpalen, wrlen;
1128        struct fw_ofld_tx_data_wr *req;
1129        struct mpa_message *mpa;
1130        struct sk_buff *skb;
1131        struct mpa_v2_conn_params mpa_v2_params;
1132
1133        pr_debug("ep %p tid %u pd_len %d\n",
1134                 ep, ep->hwtid, ep->plen);
1135
1136        mpalen = sizeof(*mpa) + plen;
1137        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
1138                mpalen += sizeof(struct mpa_v2_conn_params);
1139        wrlen = roundup(mpalen + sizeof(*req), 16);
1140
1141        skb = get_skb(NULL, wrlen, GFP_KERNEL);
1142        if (!skb) {
1143                pr_err("%s - cannot alloc skb!\n", __func__);
1144                return -ENOMEM;
1145        }
1146        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1147
1148        req = skb_put_zero(skb, wrlen);
1149        req->op_to_immdlen = cpu_to_be32(
1150                FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
1151                FW_WR_COMPL_F |
1152                FW_WR_IMMDLEN_V(mpalen));
1153        req->flowid_len16 = cpu_to_be32(
1154                FW_WR_FLOWID_V(ep->hwtid) |
1155                FW_WR_LEN16_V(wrlen >> 4));
1156        req->plen = cpu_to_be32(mpalen);
1157        req->tunnel_to_proxy = cpu_to_be32(
1158                FW_OFLD_TX_DATA_WR_FLUSH_F |
1159                FW_OFLD_TX_DATA_WR_SHOVE_F);
1160
1161        mpa = (struct mpa_message *)(req + 1);
1162        memset(mpa, 0, sizeof(*mpa));
1163        memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1164        mpa->flags = 0;
1165        if (ep->mpa_attr.crc_enabled)
1166                mpa->flags |= MPA_CRC;
1167        if (ep->mpa_attr.recv_marker_enabled)
1168                mpa->flags |= MPA_MARKERS;
1169        mpa->revision = ep->mpa_attr.version;
1170        mpa->private_data_size = htons(plen);
1171
1172        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1173                mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1174                mpa->private_data_size =
1175                        htons(ntohs(mpa->private_data_size) +
1176                              sizeof(struct mpa_v2_conn_params));
1177                mpa_v2_params.ird = htons((u16)ep->ird);
1178                mpa_v2_params.ord = htons((u16)ep->ord);
1179                if (peer2peer && (ep->mpa_attr.p2p_type !=
1180                                        FW_RI_INIT_P2PTYPE_DISABLED)) {
1181                        mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1182
1183                        if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
1184                                mpa_v2_params.ord |=
1185                                        htons(MPA_V2_RDMA_WRITE_RTR);
1186                        else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
1187                                mpa_v2_params.ord |=
1188                                        htons(MPA_V2_RDMA_READ_RTR);
1189                }
1190
1191                memcpy(mpa->private_data, &mpa_v2_params,
1192                       sizeof(struct mpa_v2_conn_params));
1193
1194                if (ep->plen)
1195                        memcpy(mpa->private_data +
1196                               sizeof(struct mpa_v2_conn_params), pdata, plen);
1197        } else
1198                if (plen)
1199                        memcpy(mpa->private_data, pdata, plen);
1200
1201        /*
1202         * Reference the mpa skb.  This ensures the data area
1203         * will remain in memory until the hw acks the tx.
1204         * Function fw4_ack() will deref it.
1205         */
1206        skb_get(skb);
1207        t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
1208        ep->mpa_skb = skb;
1209        __state_set(&ep->com, MPA_REP_SENT);
1210        ep->snd_seq += mpalen;
1211        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1212}
1213
1214static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1215{
1216        struct c4iw_ep *ep;
1217        struct cpl_act_establish *req = cplhdr(skb);
1218        unsigned short tcp_opt = ntohs(req->tcp_opt);
1219        unsigned int tid = GET_TID(req);
1220        unsigned int atid = TID_TID_G(ntohl(req->tos_atid));
1221        struct tid_info *t = dev->rdev.lldi.tids;
1222        int ret;
1223
1224        ep = lookup_atid(t, atid);
1225
1226        pr_debug("ep %p tid %u snd_isn %u rcv_isn %u\n", ep, tid,
1227                 be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
1228
1229        mutex_lock(&ep->com.mutex);
1230        dst_confirm(ep->dst);
1231
1232        /* setup the hwtid for this connection */
1233        ep->hwtid = tid;
1234        cxgb4_insert_tid(t, ep, tid, ep->com.local_addr.ss_family);
1235        insert_ep_tid(ep);
1236
1237        ep->snd_seq = be32_to_cpu(req->snd_isn);
1238        ep->rcv_seq = be32_to_cpu(req->rcv_isn);
1239        ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
1240
1241        set_emss(ep, tcp_opt);
1242
1243        /* dealloc the atid */
1244        xa_erase_irq(&ep->com.dev->atids, atid);
1245        cxgb4_free_atid(t, atid);
1246        set_bit(ACT_ESTAB, &ep->com.history);
1247
1248        /* start MPA negotiation */
1249        ret = send_flowc(ep);
1250        if (ret)
1251                goto err;
1252        if (ep->retry_with_mpa_v1)
1253                ret = send_mpa_req(ep, skb, 1);
1254        else
1255                ret = send_mpa_req(ep, skb, mpa_rev);
1256        if (ret)
1257                goto err;
1258        mutex_unlock(&ep->com.mutex);
1259        return 0;
1260err:
1261        mutex_unlock(&ep->com.mutex);
1262        connect_reply_upcall(ep, -ENOMEM);
1263        c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
1264        return 0;
1265}
1266
1267static void close_complete_upcall(struct c4iw_ep *ep, int status)
1268{
1269        struct iw_cm_event event;
1270
1271        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1272        memset(&event, 0, sizeof(event));
1273        event.event = IW_CM_EVENT_CLOSE;
1274        event.status = status;
1275        if (ep->com.cm_id) {
1276                pr_debug("close complete delivered ep %p cm_id %p tid %u\n",
1277                         ep, ep->com.cm_id, ep->hwtid);
1278                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1279                deref_cm_id(&ep->com);
1280                set_bit(CLOSE_UPCALL, &ep->com.history);
1281        }
1282}
1283
1284static void peer_close_upcall(struct c4iw_ep *ep)
1285{
1286        struct iw_cm_event event;
1287
1288        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1289        memset(&event, 0, sizeof(event));
1290        event.event = IW_CM_EVENT_DISCONNECT;
1291        if (ep->com.cm_id) {
1292                pr_debug("peer close delivered ep %p cm_id %p tid %u\n",
1293                         ep, ep->com.cm_id, ep->hwtid);
1294                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1295                set_bit(DISCONN_UPCALL, &ep->com.history);
1296        }
1297}
1298
1299static void peer_abort_upcall(struct c4iw_ep *ep)
1300{
1301        struct iw_cm_event event;
1302
1303        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1304        memset(&event, 0, sizeof(event));
1305        event.event = IW_CM_EVENT_CLOSE;
1306        event.status = -ECONNRESET;
1307        if (ep->com.cm_id) {
1308                pr_debug("abort delivered ep %p cm_id %p tid %u\n", ep,
1309                         ep->com.cm_id, ep->hwtid);
1310                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1311                deref_cm_id(&ep->com);
1312                set_bit(ABORT_UPCALL, &ep->com.history);
1313        }
1314}
1315
1316static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1317{
1318        struct iw_cm_event event;
1319
1320        pr_debug("ep %p tid %u status %d\n",
1321                 ep, ep->hwtid, status);
1322        memset(&event, 0, sizeof(event));
1323        event.event = IW_CM_EVENT_CONNECT_REPLY;
1324        event.status = status;
1325        memcpy(&event.local_addr, &ep->com.local_addr,
1326               sizeof(ep->com.local_addr));
1327        memcpy(&event.remote_addr, &ep->com.remote_addr,
1328               sizeof(ep->com.remote_addr));
1329
1330        if ((status == 0) || (status == -ECONNREFUSED)) {
1331                if (!ep->tried_with_mpa_v1) {
1332                        /* this means MPA_v2 is used */
1333                        event.ord = ep->ird;
1334                        event.ird = ep->ord;
1335                        event.private_data_len = ep->plen -
1336                                sizeof(struct mpa_v2_conn_params);
1337                        event.private_data = ep->mpa_pkt +
1338                                sizeof(struct mpa_message) +
1339                                sizeof(struct mpa_v2_conn_params);
1340                } else {
1341                        /* this means MPA_v1 is used */
1342                        event.ord = cur_max_read_depth(ep->com.dev);
1343                        event.ird = cur_max_read_depth(ep->com.dev);
1344                        event.private_data_len = ep->plen;
1345                        event.private_data = ep->mpa_pkt +
1346                                sizeof(struct mpa_message);
1347                }
1348        }
1349
1350        pr_debug("ep %p tid %u status %d\n", ep,
1351                 ep->hwtid, status);
1352        set_bit(CONN_RPL_UPCALL, &ep->com.history);
1353        ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1354
1355        if (status < 0)
1356                deref_cm_id(&ep->com);
1357}
1358
1359static int connect_request_upcall(struct c4iw_ep *ep)
1360{
1361        struct iw_cm_event event;
1362        int ret;
1363
1364        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1365        memset(&event, 0, sizeof(event));
1366        event.event = IW_CM_EVENT_CONNECT_REQUEST;
1367        memcpy(&event.local_addr, &ep->com.local_addr,
1368               sizeof(ep->com.local_addr));
1369        memcpy(&event.remote_addr, &ep->com.remote_addr,
1370               sizeof(ep->com.remote_addr));
1371        event.provider_data = ep;
1372        if (!ep->tried_with_mpa_v1) {
1373                /* this means MPA_v2 is used */
1374                event.ord = ep->ord;
1375                event.ird = ep->ird;
1376                event.private_data_len = ep->plen -
1377                        sizeof(struct mpa_v2_conn_params);
1378                event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
1379                        sizeof(struct mpa_v2_conn_params);
1380        } else {
1381                /* this means MPA_v1 is used. Send max supported */
1382                event.ord = cur_max_read_depth(ep->com.dev);
1383                event.ird = cur_max_read_depth(ep->com.dev);
1384                event.private_data_len = ep->plen;
1385                event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
1386        }
1387        c4iw_get_ep(&ep->com);
1388        ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
1389                                                      &event);
1390        if (ret)
1391                c4iw_put_ep(&ep->com);
1392        set_bit(CONNREQ_UPCALL, &ep->com.history);
1393        c4iw_put_ep(&ep->parent_ep->com);
1394        return ret;
1395}
1396
1397static void established_upcall(struct c4iw_ep *ep)
1398{
1399        struct iw_cm_event event;
1400
1401        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1402        memset(&event, 0, sizeof(event));
1403        event.event = IW_CM_EVENT_ESTABLISHED;
1404        event.ird = ep->ord;
1405        event.ord = ep->ird;
1406        if (ep->com.cm_id) {
1407                pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1408                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1409                set_bit(ESTAB_UPCALL, &ep->com.history);
1410        }
1411}
1412
1413static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
1414{
1415        struct sk_buff *skb;
1416        u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
1417        u32 credit_dack;
1418
1419        pr_debug("ep %p tid %u credits %u\n",
1420                 ep, ep->hwtid, credits);
1421        skb = get_skb(NULL, wrlen, GFP_KERNEL);
1422        if (!skb) {
1423                pr_err("update_rx_credits - cannot alloc skb!\n");
1424                return 0;
1425        }
1426
1427        /*
1428         * If we couldn't specify the entire rcv window at connection setup
1429         * due to the limit in the number of bits in the RCV_BUFSIZ field,
1430         * then add the overage in to the credits returned.
1431         */
1432        if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
1433                credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
1434
1435        credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F |
1436                      RX_DACK_MODE_V(dack_mode);
1437
1438        cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx,
1439                            credit_dack);
1440
1441        c4iw_ofld_send(&ep->com.dev->rdev, skb);
1442        return credits;
1443}
1444
1445#define RELAXED_IRD_NEGOTIATION 1
1446
1447/*
1448 * process_mpa_reply - process streaming mode MPA reply
1449 *
1450 * Returns:
1451 *
1452 * 0 upon success indicating a connect request was delivered to the ULP
1453 * or the mpa request is incomplete but valid so far.
1454 *
1455 * 1 if a failure requires the caller to close the connection.
1456 *
1457 * 2 if a failure requires the caller to abort the connection.
1458 */
1459static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1460{
1461        struct mpa_message *mpa;
1462        struct mpa_v2_conn_params *mpa_v2_params;
1463        u16 plen;
1464        u16 resp_ird, resp_ord;
1465        u8 rtr_mismatch = 0, insuff_ird = 0;
1466        struct c4iw_qp_attributes attrs;
1467        enum c4iw_qp_attr_mask mask;
1468        int err;
1469        int disconnect = 0;
1470
1471        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1472
1473        /*
1474         * If we get more than the supported amount of private data
1475         * then we must fail this connection.
1476         */
1477        if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
1478                err = -EINVAL;
1479                goto err_stop_timer;
1480        }
1481
1482        /*
1483         * copy the new data into our accumulation buffer.
1484         */
1485        skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1486                                  skb->len);
1487        ep->mpa_pkt_len += skb->len;
1488
1489        /*
1490         * if we don't even have the mpa message, then bail.
1491         */
1492        if (ep->mpa_pkt_len < sizeof(*mpa))
1493                return 0;
1494        mpa = (struct mpa_message *) ep->mpa_pkt;
1495
1496        /* Validate MPA header. */
1497        if (mpa->revision > mpa_rev) {
1498                pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
1499                       __func__, mpa_rev, mpa->revision);
1500                err = -EPROTO;
1501                goto err_stop_timer;
1502        }
1503        if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1504                err = -EPROTO;
1505                goto err_stop_timer;
1506        }
1507
1508        plen = ntohs(mpa->private_data_size);
1509
1510        /*
1511         * Fail if there's too much private data.
1512         */
1513        if (plen > MPA_MAX_PRIVATE_DATA) {
1514                err = -EPROTO;
1515                goto err_stop_timer;
1516        }
1517
1518        /*
1519         * If plen does not account for pkt size
1520         */
1521        if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1522                err = -EPROTO;
1523                goto err_stop_timer;
1524        }
1525
1526        ep->plen = (u8) plen;
1527
1528        /*
1529         * If we don't have all the pdata yet, then bail.
1530         * We'll continue process when more data arrives.
1531         */
1532        if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1533                return 0;
1534
1535        if (mpa->flags & MPA_REJECT) {
1536                err = -ECONNREFUSED;
1537                goto err_stop_timer;
1538        }
1539
1540        /*
1541         * Stop mpa timer.  If it expired, then
1542         * we ignore the MPA reply.  process_timeout()
1543         * will abort the connection.
1544         */
1545        if (stop_ep_timer(ep))
1546                return 0;
1547
1548        /*
1549         * If we get here we have accumulated the entire mpa
1550         * start reply message including private data. And
1551         * the MPA header is valid.
1552         */
1553        __state_set(&ep->com, FPDU_MODE);
1554        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1555        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1556        ep->mpa_attr.version = mpa->revision;
1557        ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1558
1559        if (mpa->revision == 2) {
1560                ep->mpa_attr.enhanced_rdma_conn =
1561                        mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1562                if (ep->mpa_attr.enhanced_rdma_conn) {
1563                        mpa_v2_params = (struct mpa_v2_conn_params *)
1564                                (ep->mpa_pkt + sizeof(*mpa));
1565                        resp_ird = ntohs(mpa_v2_params->ird) &
1566                                MPA_V2_IRD_ORD_MASK;
1567                        resp_ord = ntohs(mpa_v2_params->ord) &
1568                                MPA_V2_IRD_ORD_MASK;
1569                        pr_debug("responder ird %u ord %u ep ird %u ord %u\n",
1570                                 resp_ird, resp_ord, ep->ird, ep->ord);
1571
1572                        /*
1573                         * This is a double-check. Ideally, below checks are
1574                         * not required since ird/ord stuff has been taken
1575                         * care of in c4iw_accept_cr
1576                         */
1577                        if (ep->ird < resp_ord) {
1578                                if (RELAXED_IRD_NEGOTIATION && resp_ord <=
1579                                    ep->com.dev->rdev.lldi.max_ordird_qp)
1580                                        ep->ird = resp_ord;
1581                                else
1582                                        insuff_ird = 1;
1583                        } else if (ep->ird > resp_ord) {
1584                                ep->ird = resp_ord;
1585                        }
1586                        if (ep->ord > resp_ird) {
1587                                if (RELAXED_IRD_NEGOTIATION)
1588                                        ep->ord = resp_ird;
1589                                else
1590                                        insuff_ird = 1;
1591                        }
1592                        if (insuff_ird) {
1593                                err = -ENOMEM;
1594                                ep->ird = resp_ord;
1595                                ep->ord = resp_ird;
1596                        }
1597
1598                        if (ntohs(mpa_v2_params->ird) &
1599                                        MPA_V2_PEER2PEER_MODEL) {
1600                                if (ntohs(mpa_v2_params->ord) &
1601                                                MPA_V2_RDMA_WRITE_RTR)
1602                                        ep->mpa_attr.p2p_type =
1603                                                FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1604                                else if (ntohs(mpa_v2_params->ord) &
1605                                                MPA_V2_RDMA_READ_RTR)
1606                                        ep->mpa_attr.p2p_type =
1607                                                FW_RI_INIT_P2PTYPE_READ_REQ;
1608                        }
1609                }
1610        } else if (mpa->revision == 1)
1611                if (peer2peer)
1612                        ep->mpa_attr.p2p_type = p2p_type;
1613
1614        pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = %d\n",
1615                 ep->mpa_attr.crc_enabled,
1616                 ep->mpa_attr.recv_marker_enabled,
1617                 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1618                 ep->mpa_attr.p2p_type, p2p_type);
1619
1620        /*
1621         * If responder's RTR does not match with that of initiator, assign
1622         * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
1623         * generated when moving QP to RTS state.
1624         * A TERM message will be sent after QP has moved to RTS state
1625         */
1626        if ((ep->mpa_attr.version == 2) && peer2peer &&
1627                        (ep->mpa_attr.p2p_type != p2p_type)) {
1628                ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1629                rtr_mismatch = 1;
1630        }
1631
1632        attrs.mpa_attr = ep->mpa_attr;
1633        attrs.max_ird = ep->ird;
1634        attrs.max_ord = ep->ord;
1635        attrs.llp_stream_handle = ep;
1636        attrs.next_state = C4IW_QP_STATE_RTS;
1637
1638        mask = C4IW_QP_ATTR_NEXT_STATE |
1639            C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
1640            C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
1641
1642        /* bind QP and TID with INIT_WR */
1643        err = c4iw_modify_qp(ep->com.qp->rhp,
1644                             ep->com.qp, mask, &attrs, 1);
1645        if (err)
1646                goto err;
1647
1648        /*
1649         * If responder's RTR requirement did not match with what initiator
1650         * supports, generate TERM message
1651         */
1652        if (rtr_mismatch) {
1653                pr_err("%s: RTR mismatch, sending TERM\n", __func__);
1654                attrs.layer_etype = LAYER_MPA | DDP_LLP;
1655                attrs.ecode = MPA_NOMATCH_RTR;
1656                attrs.next_state = C4IW_QP_STATE_TERMINATE;
1657                attrs.send_term = 1;
1658                err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1659                                C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1660                err = -ENOMEM;
1661                disconnect = 1;
1662                goto out;
1663        }
1664
1665        /*
1666         * Generate TERM if initiator IRD is not sufficient for responder
1667         * provided ORD. Currently, we do the same behaviour even when
1668         * responder provided IRD is also not sufficient as regards to
1669         * initiator ORD.
1670         */
1671        if (insuff_ird) {
1672                pr_err("%s: Insufficient IRD, sending TERM\n", __func__);
1673                attrs.layer_etype = LAYER_MPA | DDP_LLP;
1674                attrs.ecode = MPA_INSUFF_IRD;
1675                attrs.next_state = C4IW_QP_STATE_TERMINATE;
1676                attrs.send_term = 1;
1677                err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1678                                C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1679                err = -ENOMEM;
1680                disconnect = 1;
1681                goto out;
1682        }
1683        goto out;
1684err_stop_timer:
1685        stop_ep_timer(ep);
1686err:
1687        disconnect = 2;
1688out:
1689        connect_reply_upcall(ep, err);
1690        return disconnect;
1691}
1692
1693/*
1694 * process_mpa_request - process streaming mode MPA request
1695 *
1696 * Returns:
1697 *
1698 * 0 upon success indicating a connect request was delivered to the ULP
1699 * or the mpa request is incomplete but valid so far.
1700 *
1701 * 1 if a failure requires the caller to close the connection.
1702 *
1703 * 2 if a failure requires the caller to abort the connection.
1704 */
1705static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1706{
1707        struct mpa_message *mpa;
1708        struct mpa_v2_conn_params *mpa_v2_params;
1709        u16 plen;
1710
1711        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1712
1713        /*
1714         * If we get more than the supported amount of private data
1715         * then we must fail this connection.
1716         */
1717        if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt))
1718                goto err_stop_timer;
1719
1720        pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
1721
1722        /*
1723         * Copy the new data into our accumulation buffer.
1724         */
1725        skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1726                                  skb->len);
1727        ep->mpa_pkt_len += skb->len;
1728
1729        /*
1730         * If we don't even have the mpa message, then bail.
1731         * We'll continue process when more data arrives.
1732         */
1733        if (ep->mpa_pkt_len < sizeof(*mpa))
1734                return 0;
1735
1736        pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
1737        mpa = (struct mpa_message *) ep->mpa_pkt;
1738
1739        /*
1740         * Validate MPA Header.
1741         */
1742        if (mpa->revision > mpa_rev) {
1743                pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
1744                       __func__, mpa_rev, mpa->revision);
1745                goto err_stop_timer;
1746        }
1747
1748        if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
1749                goto err_stop_timer;
1750
1751        plen = ntohs(mpa->private_data_size);
1752
1753        /*
1754         * Fail if there's too much private data.
1755         */
1756        if (plen > MPA_MAX_PRIVATE_DATA)
1757                goto err_stop_timer;
1758
1759        /*
1760         * If plen does not account for pkt size
1761         */
1762        if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
1763                goto err_stop_timer;
1764        ep->plen = (u8) plen;
1765
1766        /*
1767         * If we don't have all the pdata yet, then bail.
1768         */
1769        if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1770                return 0;
1771
1772        /*
1773         * If we get here we have accumulated the entire mpa
1774         * start reply message including private data.
1775         */
1776        ep->mpa_attr.initiator = 0;
1777        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1778        ep->mpa_attr.recv_marker_enabled = markers_enabled;
1779        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1780        ep->mpa_attr.version = mpa->revision;
1781        if (mpa->revision == 1)
1782                ep->tried_with_mpa_v1 = 1;
1783        ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1784
1785        if (mpa->revision == 2) {
1786                ep->mpa_attr.enhanced_rdma_conn =
1787                        mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1788                if (ep->mpa_attr.enhanced_rdma_conn) {
1789                        mpa_v2_params = (struct mpa_v2_conn_params *)
1790                                (ep->mpa_pkt + sizeof(*mpa));
1791                        ep->ird = ntohs(mpa_v2_params->ird) &
1792                                MPA_V2_IRD_ORD_MASK;
1793                        ep->ird = min_t(u32, ep->ird,
1794                                        cur_max_read_depth(ep->com.dev));
1795                        ep->ord = ntohs(mpa_v2_params->ord) &
1796                                MPA_V2_IRD_ORD_MASK;
1797                        ep->ord = min_t(u32, ep->ord,
1798                                        cur_max_read_depth(ep->com.dev));
1799                        pr_debug("initiator ird %u ord %u\n",
1800                                 ep->ird, ep->ord);
1801                        if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
1802                                if (peer2peer) {
1803                                        if (ntohs(mpa_v2_params->ord) &
1804                                                        MPA_V2_RDMA_WRITE_RTR)
1805                                                ep->mpa_attr.p2p_type =
1806                                                FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1807                                        else if (ntohs(mpa_v2_params->ord) &
1808                                                        MPA_V2_RDMA_READ_RTR)
1809                                                ep->mpa_attr.p2p_type =
1810                                                FW_RI_INIT_P2PTYPE_READ_REQ;
1811                                }
1812                }
1813        } else if (mpa->revision == 1)
1814                if (peer2peer)
1815                        ep->mpa_attr.p2p_type = p2p_type;
1816
1817        pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d\n",
1818                 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1819                 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1820                 ep->mpa_attr.p2p_type);
1821
1822        __state_set(&ep->com, MPA_REQ_RCVD);
1823
1824        /* drive upcall */
1825        mutex_lock_nested(&ep->parent_ep->com.mutex, SINGLE_DEPTH_NESTING);
1826        if (ep->parent_ep->com.state != DEAD) {
1827                if (connect_request_upcall(ep))
1828                        goto err_unlock_parent;
1829        } else {
1830                goto err_unlock_parent;
1831        }
1832        mutex_unlock(&ep->parent_ep->com.mutex);
1833        return 0;
1834
1835err_unlock_parent:
1836        mutex_unlock(&ep->parent_ep->com.mutex);
1837        goto err_out;
1838err_stop_timer:
1839        (void)stop_ep_timer(ep);
1840err_out:
1841        return 2;
1842}
1843
1844static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1845{
1846        struct c4iw_ep *ep;
1847        struct cpl_rx_data *hdr = cplhdr(skb);
1848        unsigned int dlen = ntohs(hdr->len);
1849        unsigned int tid = GET_TID(hdr);
1850        __u8 status = hdr->status;
1851        int disconnect = 0;
1852
1853        ep = get_ep_from_tid(dev, tid);
1854        if (!ep)
1855                return 0;
1856        pr_debug("ep %p tid %u dlen %u\n", ep, ep->hwtid, dlen);
1857        skb_pull(skb, sizeof(*hdr));
1858        skb_trim(skb, dlen);
1859        mutex_lock(&ep->com.mutex);
1860
1861        switch (ep->com.state) {
1862        case MPA_REQ_SENT:
1863                update_rx_credits(ep, dlen);
1864                ep->rcv_seq += dlen;
1865                disconnect = process_mpa_reply(ep, skb);
1866                break;
1867        case MPA_REQ_WAIT:
1868                update_rx_credits(ep, dlen);
1869                ep->rcv_seq += dlen;
1870                disconnect = process_mpa_request(ep, skb);
1871                break;
1872        case FPDU_MODE: {
1873                struct c4iw_qp_attributes attrs;
1874
1875                update_rx_credits(ep, dlen);
1876                if (status)
1877                        pr_err("%s Unexpected streaming data." \
1878                               " qpid %u ep %p state %d tid %u status %d\n",
1879                               __func__, ep->com.qp->wq.sq.qid, ep,
1880                               ep->com.state, ep->hwtid, status);
1881                attrs.next_state = C4IW_QP_STATE_TERMINATE;
1882                c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1883                               C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1884                disconnect = 1;
1885                break;
1886        }
1887        default:
1888                break;
1889        }
1890        mutex_unlock(&ep->com.mutex);
1891        if (disconnect)
1892                c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
1893        c4iw_put_ep(&ep->com);
1894        return 0;
1895}
1896
1897static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx)
1898{
1899        enum chip_type adapter_type;
1900
1901        adapter_type = ep->com.dev->rdev.lldi.adapter_type;
1902
1903        /*
1904         * If this TCB had a srq buffer cached, then we must complete
1905         * it. For user mode, that means saving the srqidx in the
1906         * user/kernel status page for this qp.  For kernel mode, just
1907         * synthesize the CQE now.
1908         */
1909        if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) {
1910                if (ep->com.qp->ibqp.uobject)
1911                        t4_set_wq_in_error(&ep->com.qp->wq, srqidx);
1912                else
1913                        c4iw_flush_srqidx(ep->com.qp, srqidx);
1914        }
1915}
1916
1917static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1918{
1919        u32 srqidx;
1920        struct c4iw_ep *ep;
1921        struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
1922        int release = 0;
1923        unsigned int tid = GET_TID(rpl);
1924
1925        ep = get_ep_from_tid(dev, tid);
1926        if (!ep) {
1927                pr_warn("Abort rpl to freed endpoint\n");
1928                return 0;
1929        }
1930
1931        if (ep->com.qp && ep->com.qp->srq) {
1932                srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(rpl->srqidx_status));
1933                complete_cached_srq_buffers(ep, srqidx ? srqidx : ep->srqe_idx);
1934        }
1935
1936        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1937        mutex_lock(&ep->com.mutex);
1938        switch (ep->com.state) {
1939        case ABORTING:
1940                c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
1941                __state_set(&ep->com, DEAD);
1942                release = 1;
1943                break;
1944        default:
1945                pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
1946                break;
1947        }
1948        mutex_unlock(&ep->com.mutex);
1949
1950        if (release) {
1951                close_complete_upcall(ep, -ECONNRESET);
1952                release_ep_resources(ep);
1953        }
1954        c4iw_put_ep(&ep->com);
1955        return 0;
1956}
1957
1958static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
1959{
1960        struct sk_buff *skb;
1961        struct fw_ofld_connection_wr *req;
1962        unsigned int mtu_idx;
1963        u32 wscale;
1964        struct sockaddr_in *sin;
1965        int win;
1966
1967        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1968        req = __skb_put_zero(skb, sizeof(*req));
1969        req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR));
1970        req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
1971        req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
1972                                     ep->com.dev->rdev.lldi.ports[0],
1973                                     ep->l2t));
1974        sin = (struct sockaddr_in *)&ep->com.local_addr;
1975        req->le.lport = sin->sin_port;
1976        req->le.u.ipv4.lip = sin->sin_addr.s_addr;
1977        sin = (struct sockaddr_in *)&ep->com.remote_addr;
1978        req->le.pport = sin->sin_port;
1979        req->le.u.ipv4.pip = sin->sin_addr.s_addr;
1980        req->tcb.t_state_to_astid =
1981                        htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_SENT) |
1982                        FW_OFLD_CONNECTION_WR_ASTID_V(atid));
1983        req->tcb.cplrxdataack_cplpassacceptrpl =
1984                        htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F);
1985        req->tcb.tx_max = (__force __be32) jiffies;
1986        req->tcb.rcv_adv = htons(1);
1987        cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
1988                      enable_tcp_timestamps,
1989                      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
1990        wscale = cxgb_compute_wscale(rcv_win);
1991
1992        /*
1993         * Specify the largest window that will fit in opt0. The
1994         * remainder will be specified in the rx_data_ack.
1995         */
1996        win = ep->rcv_win >> 10;
1997        if (win > RCV_BUFSIZ_M)
1998                win = RCV_BUFSIZ_M;
1999
2000        req->tcb.opt0 = (__force __be64) (TCAM_BYPASS_F |
2001                (nocong ? NO_CONG_F : 0) |
2002                KEEP_ALIVE_F |
2003                DELACK_F |
2004                WND_SCALE_V(wscale) |
2005                MSS_IDX_V(mtu_idx) |
2006                L2T_IDX_V(ep->l2t->idx) |
2007                TX_CHAN_V(ep->tx_chan) |
2008                SMAC_SEL_V(ep->smac_idx) |
2009                DSCP_V(ep->tos >> 2) |
2010                ULP_MODE_V(ULP_MODE_TCPDDP) |
2011                RCV_BUFSIZ_V(win));
2012        req->tcb.opt2 = (__force __be32) (PACE_V(1) |
2013                TX_QUEUE_V(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) |
2014                RX_CHANNEL_V(0) |
2015                CCTRL_ECN_V(enable_ecn) |
2016                RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid));
2017        if (enable_tcp_timestamps)
2018                req->tcb.opt2 |= (__force __be32)TSTAMPS_EN_F;
2019        if (enable_tcp_sack)
2020                req->tcb.opt2 |= (__force __be32)SACK_EN_F;
2021        if (wscale && enable_tcp_window_scaling)
2022                req->tcb.opt2 |= (__force __be32)WND_SCALE_EN_F;
2023        req->tcb.opt0 = cpu_to_be64((__force u64)req->tcb.opt0);
2024        req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2);
2025        set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
2026        set_bit(ACT_OFLD_CONN, &ep->com.history);
2027        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
2028}
2029
2030/*
2031 * Some of the error codes above implicitly indicate that there is no TID
2032 * allocated with the result of an ACT_OPEN.  We use this predicate to make
2033 * that explicit.
2034 */
2035static inline int act_open_has_tid(int status)
2036{
2037        return (status != CPL_ERR_TCAM_PARITY &&
2038                status != CPL_ERR_TCAM_MISS &&
2039                status != CPL_ERR_TCAM_FULL &&
2040                status != CPL_ERR_CONN_EXIST_SYNRECV &&
2041                status != CPL_ERR_CONN_EXIST);
2042}
2043
2044static char *neg_adv_str(unsigned int status)
2045{
2046        switch (status) {
2047        case CPL_ERR_RTX_NEG_ADVICE:
2048                return "Retransmit timeout";
2049        case CPL_ERR_PERSIST_NEG_ADVICE:
2050                return "Persist timeout";
2051        case CPL_ERR_KEEPALV_NEG_ADVICE:
2052                return "Keepalive timeout";
2053        default:
2054                return "Unknown";
2055        }
2056}
2057
2058static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
2059{
2060        ep->snd_win = snd_win;
2061        ep->rcv_win = rcv_win;
2062        pr_debug("snd_win %d rcv_win %d\n",
2063                 ep->snd_win, ep->rcv_win);
2064}
2065
2066#define ACT_OPEN_RETRY_COUNT 2
2067
2068static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
2069                     struct dst_entry *dst, struct c4iw_dev *cdev,
2070                     bool clear_mpa_v1, enum chip_type adapter_type, u8 tos)
2071{
2072        struct neighbour *n;
2073        int err, step;
2074        struct net_device *pdev;
2075
2076        n = dst_neigh_lookup(dst, peer_ip);
2077        if (!n)
2078                return -ENODEV;
2079
2080        rcu_read_lock();
2081        err = -ENOMEM;
2082        if (n->dev->flags & IFF_LOOPBACK) {
2083                if (iptype == 4)
2084                        pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip);
2085                else if (IS_ENABLED(CONFIG_IPV6))
2086                        for_each_netdev(&init_net, pdev) {
2087                                if (ipv6_chk_addr(&init_net,
2088                                                  (struct in6_addr *)peer_ip,
2089                                                  pdev, 1))
2090                                        break;
2091                        }
2092                else
2093                        pdev = NULL;
2094
2095                if (!pdev) {
2096                        err = -ENODEV;
2097                        goto out;
2098                }
2099                ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
2100                                        n, pdev, rt_tos2priority(tos));
2101                if (!ep->l2t) {
2102                        dev_put(pdev);
2103                        goto out;
2104                }
2105                ep->mtu = pdev->mtu;
2106                ep->tx_chan = cxgb4_port_chan(pdev);
2107                ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
2108                step = cdev->rdev.lldi.ntxq /
2109                        cdev->rdev.lldi.nchan;
2110                ep->txq_idx = cxgb4_port_idx(pdev) * step;
2111                step = cdev->rdev.lldi.nrxq /
2112                        cdev->rdev.lldi.nchan;
2113                ep->ctrlq_idx = cxgb4_port_idx(pdev);
2114                ep->rss_qid = cdev->rdev.lldi.rxq_ids[
2115                        cxgb4_port_idx(pdev) * step];
2116                set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
2117                dev_put(pdev);
2118        } else {
2119                pdev = get_real_dev(n->dev);
2120                ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
2121                                        n, pdev, rt_tos2priority(tos));
2122                if (!ep->l2t)
2123                        goto out;
2124                ep->mtu = dst_mtu(dst);
2125                ep->tx_chan = cxgb4_port_chan(pdev);
2126                ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
2127                step = cdev->rdev.lldi.ntxq /
2128                        cdev->rdev.lldi.nchan;
2129                ep->txq_idx = cxgb4_port_idx(pdev) * step;
2130                ep->ctrlq_idx = cxgb4_port_idx(pdev);
2131                step = cdev->rdev.lldi.nrxq /
2132                        cdev->rdev.lldi.nchan;
2133                ep->rss_qid = cdev->rdev.lldi.rxq_ids[
2134                        cxgb4_port_idx(pdev) * step];
2135                set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
2136
2137                if (clear_mpa_v1) {
2138                        ep->retry_with_mpa_v1 = 0;
2139                        ep->tried_with_mpa_v1 = 0;
2140                }
2141        }
2142        err = 0;
2143out:
2144        rcu_read_unlock();
2145
2146        neigh_release(n);
2147
2148        return err;
2149}
2150
2151static int c4iw_reconnect(struct c4iw_ep *ep)
2152{
2153        int err = 0;
2154        int size = 0;
2155        struct sockaddr_in *laddr = (struct sockaddr_in *)
2156                                    &ep->com.cm_id->m_local_addr;
2157        struct sockaddr_in *raddr = (struct sockaddr_in *)
2158                                    &ep->com.cm_id->m_remote_addr;
2159        struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)
2160                                      &ep->com.cm_id->m_local_addr;
2161        struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
2162                                      &ep->com.cm_id->m_remote_addr;
2163        int iptype;
2164        __u8 *ra;
2165
2166        pr_debug("qp %p cm_id %p\n", ep->com.qp, ep->com.cm_id);
2167        c4iw_init_wr_wait(ep->com.wr_waitp);
2168
2169        /* When MPA revision is different on nodes, the node with MPA_rev=2
2170         * tries to reconnect with MPA_rev 1 for the same EP through
2171         * c4iw_reconnect(), where the same EP is assigned with new tid for
2172         * further connection establishment. As we are using the same EP pointer
2173         * for reconnect, few skbs are used during the previous c4iw_connect(),
2174         * which leaves the EP with inadequate skbs for further
2175         * c4iw_reconnect(), Further causing a crash due to an empty
2176         * skb_list() during peer_abort(). Allocate skbs which is already used.
2177         */
2178        size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list));
2179        if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) {
2180                err = -ENOMEM;
2181                goto fail1;
2182        }
2183
2184        /*
2185         * Allocate an active TID to initiate a TCP connection.
2186         */
2187        ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
2188        if (ep->atid == -1) {
2189                pr_err("%s - cannot alloc atid\n", __func__);
2190                err = -ENOMEM;
2191                goto fail2;
2192        }
2193        err = xa_insert_irq(&ep->com.dev->atids, ep->atid, ep, GFP_KERNEL);
2194        if (err)
2195                goto fail2a;
2196
2197        /* find a route */
2198        if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
2199                ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev,
2200                                          laddr->sin_addr.s_addr,
2201                                          raddr->sin_addr.s_addr,
2202                                          laddr->sin_port,
2203                                          raddr->sin_port, ep->com.cm_id->tos);
2204                iptype = 4;
2205                ra = (__u8 *)&raddr->sin_addr;
2206        } else {
2207                ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi,
2208                                           get_real_dev,
2209                                           laddr6->sin6_addr.s6_addr,
2210                                           raddr6->sin6_addr.s6_addr,
2211                                           laddr6->sin6_port,
2212                                           raddr6->sin6_port,
2213                                           ep->com.cm_id->tos,
2214                                           raddr6->sin6_scope_id);
2215                iptype = 6;
2216                ra = (__u8 *)&raddr6->sin6_addr;
2217        }
2218        if (!ep->dst) {
2219                pr_err("%s - cannot find route\n", __func__);
2220                err = -EHOSTUNREACH;
2221                goto fail3;
2222        }
2223        err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false,
2224                        ep->com.dev->rdev.lldi.adapter_type,
2225                        ep->com.cm_id->tos);
2226        if (err) {
2227                pr_err("%s - cannot alloc l2e\n", __func__);
2228                goto fail4;
2229        }
2230
2231        pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
2232                 ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
2233                 ep->l2t->idx);
2234
2235        state_set(&ep->com, CONNECTING);
2236        ep->tos = ep->com.cm_id->tos;
2237
2238        /* send connect request to rnic */
2239        err = send_connect(ep);
2240        if (!err)
2241                goto out;
2242
2243        cxgb4_l2t_release(ep->l2t);
2244fail4:
2245        dst_release(ep->dst);
2246fail3:
2247        xa_erase_irq(&ep->com.dev->atids, ep->atid);
2248fail2a:
2249        cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
2250fail2:
2251        /*
2252         * remember to send notification to upper layer.
2253         * We are in here so the upper layer is not aware that this is
2254         * re-connect attempt and so, upper layer is still waiting for
2255         * response of 1st connect request.
2256         */
2257        connect_reply_upcall(ep, -ECONNRESET);
2258fail1:
2259        c4iw_put_ep(&ep->com);
2260out:
2261        return err;
2262}
2263
2264static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2265{
2266        struct c4iw_ep *ep;
2267        struct cpl_act_open_rpl *rpl = cplhdr(skb);
2268        unsigned int atid = TID_TID_G(AOPEN_ATID_G(
2269                                      ntohl(rpl->atid_status)));
2270        struct tid_info *t = dev->rdev.lldi.tids;
2271        int status = AOPEN_STATUS_G(ntohl(rpl->atid_status));
2272        struct sockaddr_in *la;
2273        struct sockaddr_in *ra;
2274        struct sockaddr_in6 *la6;
2275        struct sockaddr_in6 *ra6;
2276        int ret = 0;
2277
2278        ep = lookup_atid(t, atid);
2279        la = (struct sockaddr_in *)&ep->com.local_addr;
2280        ra = (struct sockaddr_in *)&ep->com.remote_addr;
2281        la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
2282        ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
2283
2284        pr_debug("ep %p atid %u status %u errno %d\n", ep, atid,
2285                 status, status2errno(status));
2286
2287        if (cxgb_is_neg_adv(status)) {
2288                pr_debug("Connection problems for atid %u status %u (%s)\n",
2289                         atid, status, neg_adv_str(status));
2290                ep->stats.connect_neg_adv++;
2291                mutex_lock(&dev->rdev.stats.lock);
2292                dev->rdev.stats.neg_adv++;
2293                mutex_unlock(&dev->rdev.stats.lock);
2294                return 0;
2295        }
2296
2297        set_bit(ACT_OPEN_RPL, &ep->com.history);
2298
2299        /*
2300         * Log interesting failures.
2301         */
2302        switch (status) {
2303        case CPL_ERR_CONN_RESET:
2304        case CPL_ERR_CONN_TIMEDOUT:
2305                break;
2306        case CPL_ERR_TCAM_FULL:
2307                mutex_lock(&dev->rdev.stats.lock);
2308                dev->rdev.stats.tcam_full++;
2309                mutex_unlock(&dev->rdev.stats.lock);
2310                if (ep->com.local_addr.ss_family == AF_INET &&
2311                    dev->rdev.lldi.enable_fw_ofld_conn) {
2312                        ret = send_fw_act_open_req(ep, TID_TID_G(AOPEN_ATID_G(
2313                                                   ntohl(rpl->atid_status))));
2314                        if (ret)
2315                                goto fail;
2316                        return 0;
2317                }
2318                break;
2319        case CPL_ERR_CONN_EXIST:
2320                if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
2321                        set_bit(ACT_RETRY_INUSE, &ep->com.history);
2322                        if (ep->com.remote_addr.ss_family == AF_INET6) {
2323                                struct sockaddr_in6 *sin6 =
2324                                                (struct sockaddr_in6 *)
2325                                                &ep->com.local_addr;
2326                                cxgb4_clip_release(
2327                                                ep->com.dev->rdev.lldi.ports[0],
2328                                                (const u32 *)
2329                                                &sin6->sin6_addr.s6_addr, 1);
2330                        }
2331                        xa_erase_irq(&ep->com.dev->atids, atid);
2332                        cxgb4_free_atid(t, atid);
2333                        dst_release(ep->dst);
2334                        cxgb4_l2t_release(ep->l2t);
2335                        c4iw_reconnect(ep);
2336                        return 0;
2337                }
2338                break;
2339        default:
2340                if (ep->com.local_addr.ss_family == AF_INET) {
2341                        pr_info("Active open failure - atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
2342                                atid, status, status2errno(status),
2343                                &la->sin_addr.s_addr, ntohs(la->sin_port),
2344                                &ra->sin_addr.s_addr, ntohs(ra->sin_port));
2345                } else {
2346                        pr_info("Active open failure - atid %u status %u errno %d %pI6:%u->%pI6:%u\n",
2347                                atid, status, status2errno(status),
2348                                la6->sin6_addr.s6_addr, ntohs(la6->sin6_port),
2349                                ra6->sin6_addr.s6_addr, ntohs(ra6->sin6_port));
2350                }
2351                break;
2352        }
2353
2354fail:
2355        connect_reply_upcall(ep, status2errno(status));
2356        state_set(&ep->com, DEAD);
2357
2358        if (ep->com.remote_addr.ss_family == AF_INET6) {
2359                struct sockaddr_in6 *sin6 =
2360                        (struct sockaddr_in6 *)&ep->com.local_addr;
2361                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
2362                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2363        }
2364        if (status && act_open_has_tid(status))
2365                cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl),
2366                                 ep->com.local_addr.ss_family);
2367
2368        xa_erase_irq(&ep->com.dev->atids, atid);
2369        cxgb4_free_atid(t, atid);
2370        dst_release(ep->dst);
2371        cxgb4_l2t_release(ep->l2t);
2372        c4iw_put_ep(&ep->com);
2373
2374        return 0;
2375}
2376
2377static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2378{
2379        struct cpl_pass_open_rpl *rpl = cplhdr(skb);
2380        unsigned int stid = GET_TID(rpl);
2381        struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
2382
2383        if (!ep) {
2384                pr_warn("%s stid %d lookup failure!\n", __func__, stid);
2385                goto out;
2386        }
2387        pr_debug("ep %p status %d error %d\n", ep,
2388                 rpl->status, status2errno(rpl->status));
2389        c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status));
2390        c4iw_put_ep(&ep->com);
2391out:
2392        return 0;
2393}
2394
2395static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2396{
2397        struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
2398        unsigned int stid = GET_TID(rpl);
2399        struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
2400
2401        if (!ep) {
2402                pr_warn("%s stid %d lookup failure!\n", __func__, stid);
2403                goto out;
2404        }
2405        pr_debug("ep %p\n", ep);
2406        c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status));
2407        c4iw_put_ep(&ep->com);
2408out:
2409        return 0;
2410}
2411
2412static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
2413                     struct cpl_pass_accept_req *req)
2414{
2415        struct cpl_pass_accept_rpl *rpl;
2416        unsigned int mtu_idx;
2417        u64 opt0;
2418        u32 opt2;
2419        u32 wscale;
2420        struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
2421        int win;
2422        enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
2423
2424        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2425        cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
2426                      enable_tcp_timestamps && req->tcpopt.tstamp,
2427                      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
2428        wscale = cxgb_compute_wscale(rcv_win);
2429
2430        /*
2431         * Specify the largest window that will fit in opt0. The
2432         * remainder will be specified in the rx_data_ack.
2433         */
2434        win = ep->rcv_win >> 10;
2435        if (win > RCV_BUFSIZ_M)
2436                win = RCV_BUFSIZ_M;
2437        opt0 = (nocong ? NO_CONG_F : 0) |
2438               KEEP_ALIVE_F |
2439               DELACK_F |
2440               WND_SCALE_V(wscale) |
2441               MSS_IDX_V(mtu_idx) |
2442               L2T_IDX_V(ep->l2t->idx) |
2443               TX_CHAN_V(ep->tx_chan) |
2444               SMAC_SEL_V(ep->smac_idx) |
2445               DSCP_V(ep->tos >> 2) |
2446               ULP_MODE_V(ULP_MODE_TCPDDP) |
2447               RCV_BUFSIZ_V(win);
2448        opt2 = RX_CHANNEL_V(0) |
2449               RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
2450
2451        if (enable_tcp_timestamps && req->tcpopt.tstamp)
2452                opt2 |= TSTAMPS_EN_F;
2453        if (enable_tcp_sack && req->tcpopt.sack)
2454                opt2 |= SACK_EN_F;
2455        if (wscale && enable_tcp_window_scaling)
2456                opt2 |= WND_SCALE_EN_F;
2457        if (enable_ecn) {
2458                const struct tcphdr *tcph;
2459                u32 hlen = ntohl(req->hdr_len);
2460
2461                if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5)
2462                        tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
2463                                IP_HDR_LEN_G(hlen);
2464                else
2465                        tcph = (const void *)(req + 1) +
2466                                T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen);
2467                if (tcph->ece && tcph->cwr)
2468                        opt2 |= CCTRL_ECN_V(1);
2469        }
2470
2471        skb_get(skb);
2472        rpl = cplhdr(skb);
2473        if (!is_t4(adapter_type)) {
2474                BUILD_BUG_ON(sizeof(*rpl5) != roundup(sizeof(*rpl5), 16));
2475                skb_trim(skb, sizeof(*rpl5));
2476                rpl5 = (void *)rpl;
2477                INIT_TP_WR(rpl5, ep->hwtid);
2478        } else {
2479                skb_trim(skb, sizeof(*rpl));
2480                INIT_TP_WR(rpl, ep->hwtid);
2481        }
2482        OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
2483                                                    ep->hwtid));
2484
2485        if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
2486                u32 isn = (prandom_u32() & ~7UL) - 1;
2487                opt2 |= T5_OPT_2_VALID_F;
2488                opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
2489                opt2 |= T5_ISS_F;
2490                rpl5 = (void *)rpl;
2491                memset_after(rpl5, 0, iss);
2492                if (peer2peer)
2493                        isn += 4;
2494                rpl5->iss = cpu_to_be32(isn);
2495                pr_debug("iss %u\n", be32_to_cpu(rpl5->iss));
2496        }
2497
2498        rpl->opt0 = cpu_to_be64(opt0);
2499        rpl->opt2 = cpu_to_be32(opt2);
2500        set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
2501        t4_set_arp_err_handler(skb, ep, pass_accept_rpl_arp_failure);
2502
2503        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
2504}
2505
2506static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
2507{
2508        pr_debug("c4iw_dev %p tid %u\n", dev, hwtid);
2509        skb_trim(skb, sizeof(struct cpl_tid_release));
2510        release_tid(&dev->rdev, hwtid, skb);
2511        return;
2512}
2513
2514static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2515{
2516        struct c4iw_ep *child_ep = NULL, *parent_ep;
2517        struct cpl_pass_accept_req *req = cplhdr(skb);
2518        unsigned int stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
2519        struct tid_info *t = dev->rdev.lldi.tids;
2520        unsigned int hwtid = GET_TID(req);
2521        struct dst_entry *dst;
2522        __u8 local_ip[16], peer_ip[16];
2523        __be16 local_port, peer_port;
2524        struct sockaddr_in6 *sin6;
2525        int err;
2526        u16 peer_mss = ntohs(req->tcpopt.mss);
2527        int iptype;
2528        unsigned short hdrs;
2529        u8 tos;
2530
2531        parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
2532        if (!parent_ep) {
2533                pr_err("%s connect request on invalid stid %d\n",
2534                       __func__, stid);
2535                goto reject;
2536        }
2537
2538        if (state_read(&parent_ep->com) != LISTEN) {
2539                pr_err("%s - listening ep not in LISTEN\n", __func__);
2540                goto reject;
2541        }
2542
2543        if (parent_ep->com.cm_id->tos_set)
2544                tos = parent_ep->com.cm_id->tos;
2545        else
2546                tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
2547
2548        cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
2549                        &iptype, local_ip, peer_ip, &local_port, &peer_port);
2550
2551        /* Find output route */
2552        if (iptype == 4)  {
2553                pr_debug("parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
2554                         , parent_ep, hwtid,
2555                         local_ip, peer_ip, ntohs(local_port),
2556                         ntohs(peer_port), peer_mss);
2557                dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
2558                                      *(__be32 *)local_ip, *(__be32 *)peer_ip,
2559                                      local_port, peer_port, tos);
2560        } else {
2561                pr_debug("parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
2562                         , parent_ep, hwtid,
2563                         local_ip, peer_ip, ntohs(local_port),
2564                         ntohs(peer_port), peer_mss);
2565                dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
2566                                local_ip, peer_ip, local_port, peer_port,
2567                                tos,
2568                                ((struct sockaddr_in6 *)
2569                                 &parent_ep->com.local_addr)->sin6_scope_id);
2570        }
2571        if (!dst) {
2572                pr_err("%s - failed to find dst entry!\n", __func__);
2573                goto reject;
2574        }
2575
2576        child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
2577        if (!child_ep) {
2578                pr_err("%s - failed to allocate ep entry!\n", __func__);
2579                dst_release(dst);
2580                goto reject;
2581        }
2582
2583        err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
2584                        parent_ep->com.dev->rdev.lldi.adapter_type, tos);
2585        if (err) {
2586                pr_err("%s - failed to allocate l2t entry!\n", __func__);
2587                dst_release(dst);
2588                kfree(child_ep);
2589                goto reject;
2590        }
2591
2592        hdrs = ((iptype == 4) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) +
2593               sizeof(struct tcphdr) +
2594               ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
2595        if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
2596                child_ep->mtu = peer_mss + hdrs;
2597
2598        skb_queue_head_init(&child_ep->com.ep_skb_list);
2599        if (alloc_ep_skb_list(&child_ep->com.ep_skb_list, CN_MAX_CON_BUF))
2600                goto fail;
2601
2602        state_set(&child_ep->com, CONNECTING);
2603        child_ep->com.dev = dev;
2604        child_ep->com.cm_id = NULL;
2605
2606        if (iptype == 4) {
2607                struct sockaddr_in *sin = (struct sockaddr_in *)
2608                        &child_ep->com.local_addr;
2609
2610                sin->sin_family = AF_INET;
2611                sin->sin_port = local_port;
2612                sin->sin_addr.s_addr = *(__be32 *)local_ip;
2613
2614                sin = (struct sockaddr_in *)&child_ep->com.local_addr;
2615                sin->sin_family = AF_INET;
2616                sin->sin_port = ((struct sockaddr_in *)
2617                                 &parent_ep->com.local_addr)->sin_port;
2618                sin->sin_addr.s_addr = *(__be32 *)local_ip;
2619
2620                sin = (struct sockaddr_in *)&child_ep->com.remote_addr;
2621                sin->sin_family = AF_INET;
2622                sin->sin_port = peer_port;
2623                sin->sin_addr.s_addr = *(__be32 *)peer_ip;
2624        } else {
2625                sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2626                sin6->sin6_family = PF_INET6;
2627                sin6->sin6_port = local_port;
2628                memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2629
2630                sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2631                sin6->sin6_family = PF_INET6;
2632                sin6->sin6_port = ((struct sockaddr_in6 *)
2633                                   &parent_ep->com.local_addr)->sin6_port;
2634                memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2635
2636                sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr;
2637                sin6->sin6_family = PF_INET6;
2638                sin6->sin6_port = peer_port;
2639                memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
2640        }
2641
2642        c4iw_get_ep(&parent_ep->com);
2643        child_ep->parent_ep = parent_ep;
2644        child_ep->tos = tos;
2645        child_ep->dst = dst;
2646        child_ep->hwtid = hwtid;
2647
2648        pr_debug("tx_chan %u smac_idx %u rss_qid %u\n",
2649                 child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
2650
2651        timer_setup(&child_ep->timer, ep_timeout, 0);
2652        cxgb4_insert_tid(t, child_ep, hwtid,
2653                         child_ep->com.local_addr.ss_family);
2654        insert_ep_tid(child_ep);
2655        if (accept_cr(child_ep, skb, req)) {
2656                c4iw_put_ep(&parent_ep->com);
2657                release_ep_resources(child_ep);
2658        } else {
2659                set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
2660        }
2661        if (iptype == 6) {
2662                sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2663                cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0],
2664                               (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2665        }
2666        goto out;
2667fail:
2668        c4iw_put_ep(&child_ep->com);
2669reject:
2670        reject_cr(dev, hwtid, skb);
2671out:
2672        if (parent_ep)
2673                c4iw_put_ep(&parent_ep->com);
2674        return 0;
2675}
2676
2677static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
2678{
2679        struct c4iw_ep *ep;
2680        struct cpl_pass_establish *req = cplhdr(skb);
2681        unsigned int tid = GET_TID(req);
2682        int ret;
2683        u16 tcp_opt = ntohs(req->tcp_opt);
2684
2685        ep = get_ep_from_tid(dev, tid);
2686        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2687        ep->snd_seq = be32_to_cpu(req->snd_isn);
2688        ep->rcv_seq = be32_to_cpu(req->rcv_isn);
2689        ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
2690
2691        pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, tcp_opt);
2692
2693        set_emss(ep, tcp_opt);
2694
2695        dst_confirm(ep->dst);
2696        mutex_lock(&ep->com.mutex);
2697        ep->com.state = MPA_REQ_WAIT;
2698        start_ep_timer(ep);
2699        set_bit(PASS_ESTAB, &ep->com.history);
2700        ret = send_flowc(ep);
2701        mutex_unlock(&ep->com.mutex);
2702        if (ret)
2703                c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
2704        c4iw_put_ep(&ep->com);
2705
2706        return 0;
2707}
2708
2709static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
2710{
2711        struct cpl_peer_close *hdr = cplhdr(skb);
2712        struct c4iw_ep *ep;
2713        struct c4iw_qp_attributes attrs;
2714        int disconnect = 1;
2715        int release = 0;
2716        unsigned int tid = GET_TID(hdr);
2717        int ret;
2718
2719        ep = get_ep_from_tid(dev, tid);
2720        if (!ep)
2721                return 0;
2722
2723        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2724        dst_confirm(ep->dst);
2725
2726        set_bit(PEER_CLOSE, &ep->com.history);
2727        mutex_lock(&ep->com.mutex);
2728        switch (ep->com.state) {
2729        case MPA_REQ_WAIT:
2730                __state_set(&ep->com, CLOSING);
2731                break;
2732        case MPA_REQ_SENT:
2733                __state_set(&ep->com, CLOSING);
2734                connect_reply_upcall(ep, -ECONNRESET);
2735                break;
2736        case MPA_REQ_RCVD:
2737
2738                /*
2739                 * We're gonna mark this puppy DEAD, but keep
2740                 * the reference on it until the ULP accepts or
2741                 * rejects the CR. Also wake up anyone waiting
2742                 * in rdma connection migration (see c4iw_accept_cr()).
2743                 */
2744                __state_set(&ep->com, CLOSING);
2745                pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
2746                c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2747                break;
2748        case MPA_REP_SENT:
2749                __state_set(&ep->com, CLOSING);
2750                pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
2751                c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2752                break;
2753        case FPDU_MODE:
2754                start_ep_timer(ep);
2755                __state_set(&ep->com, CLOSING);
2756                attrs.next_state = C4IW_QP_STATE_CLOSING;
2757                ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2758                                       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2759                if (ret != -ECONNRESET) {
2760                        peer_close_upcall(ep);
2761                        disconnect = 1;
2762                }
2763                break;
2764        case ABORTING:
2765                disconnect = 0;
2766                break;
2767        case CLOSING:
2768                __state_set(&ep->com, MORIBUND);
2769                disconnect = 0;
2770                break;
2771        case MORIBUND:
2772                (void)stop_ep_timer(ep);
2773                if (ep->com.cm_id && ep->com.qp) {
2774                        attrs.next_state = C4IW_QP_STATE_IDLE;
2775                        c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2776                                       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2777                }
2778                close_complete_upcall(ep, 0);
2779                __state_set(&ep->com, DEAD);
2780                release = 1;
2781                disconnect = 0;
2782                break;
2783        case DEAD:
2784                disconnect = 0;
2785                break;
2786        default:
2787                WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
2788        }
2789        mutex_unlock(&ep->com.mutex);
2790        if (disconnect)
2791                c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
2792        if (release)
2793                release_ep_resources(ep);
2794        c4iw_put_ep(&ep->com);
2795        return 0;
2796}
2797
2798static void finish_peer_abort(struct c4iw_dev *dev, struct c4iw_ep *ep)
2799{
2800        complete_cached_srq_buffers(ep, ep->srqe_idx);
2801        if (ep->com.cm_id && ep->com.qp) {
2802                struct c4iw_qp_attributes attrs;
2803
2804                attrs.next_state = C4IW_QP_STATE_ERROR;
2805                c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2806                               C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2807        }
2808        peer_abort_upcall(ep);
2809        release_ep_resources(ep);
2810        c4iw_put_ep(&ep->com);
2811}
2812
2813static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2814{
2815        struct cpl_abort_req_rss6 *req = cplhdr(skb);
2816        struct c4iw_ep *ep;
2817        struct sk_buff *rpl_skb;
2818        struct c4iw_qp_attributes attrs;
2819        int ret;
2820        int release = 0;
2821        unsigned int tid = GET_TID(req);
2822        u8 status;
2823        u32 srqidx;
2824
2825        u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
2826
2827        ep = get_ep_from_tid(dev, tid);
2828        if (!ep)
2829                return 0;
2830
2831        status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status));
2832
2833        if (cxgb_is_neg_adv(status)) {
2834                pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
2835                         ep->hwtid, status, neg_adv_str(status));
2836                ep->stats.abort_neg_adv++;
2837                mutex_lock(&dev->rdev.stats.lock);
2838                dev->rdev.stats.neg_adv++;
2839                mutex_unlock(&dev->rdev.stats.lock);
2840                goto deref_ep;
2841        }
2842
2843        pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
2844                 ep->com.state);
2845        set_bit(PEER_ABORT, &ep->com.history);
2846
2847        /*
2848         * Wake up any threads in rdma_init() or rdma_fini().
2849         * However, this is not needed if com state is just
2850         * MPA_REQ_SENT
2851         */
2852        if (ep->com.state != MPA_REQ_SENT)
2853                c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2854
2855        mutex_lock(&ep->com.mutex);
2856        switch (ep->com.state) {
2857        case CONNECTING:
2858                c4iw_put_ep(&ep->parent_ep->com);
2859                break;
2860        case MPA_REQ_WAIT:
2861                (void)stop_ep_timer(ep);
2862                break;
2863        case MPA_REQ_SENT:
2864                (void)stop_ep_timer(ep);
2865                if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 ||
2866                    (mpa_rev == 2 && ep->tried_with_mpa_v1))
2867                        connect_reply_upcall(ep, -ECONNRESET);
2868                else {
2869                        /*
2870                         * we just don't send notification upwards because we
2871                         * want to retry with mpa_v1 without upper layers even
2872                         * knowing it.
2873                         *
2874                         * do some housekeeping so as to re-initiate the
2875                         * connection
2876                         */
2877                        pr_info("%s: mpa_rev=%d. Retrying with mpav1\n",
2878                                __func__, mpa_rev);
2879                        ep->retry_with_mpa_v1 = 1;
2880                }
2881                break;
2882        case MPA_REP_SENT:
2883                break;
2884        case MPA_REQ_RCVD:
2885                break;
2886        case MORIBUND:
2887        case CLOSING:
2888                stop_ep_timer(ep);
2889                fallthrough;
2890        case FPDU_MODE:
2891                if (ep->com.qp && ep->com.qp->srq) {
2892                        srqidx = ABORT_RSS_SRQIDX_G(
2893                                        be32_to_cpu(req->srqidx_status));
2894                        if (srqidx) {
2895                                complete_cached_srq_buffers(ep, srqidx);
2896                        } else {
2897                                /* Hold ep ref until finish_peer_abort() */
2898                                c4iw_get_ep(&ep->com);
2899                                __state_set(&ep->com, ABORTING);
2900                                set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags);
2901                                read_tcb(ep);
2902                                break;
2903
2904                        }
2905                }
2906
2907                if (ep->com.cm_id && ep->com.qp) {
2908                        attrs.next_state = C4IW_QP_STATE_ERROR;
2909                        ret = c4iw_modify_qp(ep->com.qp->rhp,
2910                                     ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
2911                                     &attrs, 1);
2912                        if (ret)
2913                                pr_err("%s - qp <- error failed!\n", __func__);
2914                }
2915                peer_abort_upcall(ep);
2916                break;
2917        case ABORTING:
2918                break;
2919        case DEAD:
2920                pr_warn("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
2921                mutex_unlock(&ep->com.mutex);
2922                goto deref_ep;
2923        default:
2924                WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
2925                break;
2926        }
2927        dst_confirm(ep->dst);
2928        if (ep->com.state != ABORTING) {
2929                __state_set(&ep->com, DEAD);
2930                /* we don't release if we want to retry with mpa_v1 */
2931                if (!ep->retry_with_mpa_v1)
2932                        release = 1;
2933        }
2934        mutex_unlock(&ep->com.mutex);
2935
2936        rpl_skb = skb_dequeue(&ep->com.ep_skb_list);
2937        if (WARN_ON(!rpl_skb)) {
2938                release = 1;
2939                goto out;
2940        }
2941
2942        cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx);
2943
2944        c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
2945out:
2946        if (release)
2947                release_ep_resources(ep);
2948        else if (ep->retry_with_mpa_v1) {
2949                if (ep->com.remote_addr.ss_family == AF_INET6) {
2950                        struct sockaddr_in6 *sin6 =
2951                                        (struct sockaddr_in6 *)
2952                                        &ep->com.local_addr;
2953                        cxgb4_clip_release(
2954                                        ep->com.dev->rdev.lldi.ports[0],
2955                                        (const u32 *)&sin6->sin6_addr.s6_addr,
2956                                        1);
2957                }
2958                xa_erase_irq(&ep->com.dev->hwtids, ep->hwtid);
2959                cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
2960                                 ep->com.local_addr.ss_family);
2961                dst_release(ep->dst);
2962                cxgb4_l2t_release(ep->l2t);
2963                c4iw_reconnect(ep);
2964        }
2965
2966deref_ep:
2967        c4iw_put_ep(&ep->com);
2968        /* Dereferencing ep, referenced in peer_abort_intr() */
2969        c4iw_put_ep(&ep->com);
2970        return 0;
2971}
2972
2973static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2974{
2975        struct c4iw_ep *ep;
2976        struct c4iw_qp_attributes attrs;
2977        struct cpl_close_con_rpl *rpl = cplhdr(skb);
2978        int release = 0;
2979        unsigned int tid = GET_TID(rpl);
2980
2981        ep = get_ep_from_tid(dev, tid);
2982        if (!ep)
2983                return 0;
2984
2985        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2986
2987        /* The cm_id may be null if we failed to connect */
2988        mutex_lock(&ep->com.mutex);
2989        set_bit(CLOSE_CON_RPL, &ep->com.history);
2990        switch (ep->com.state) {
2991        case CLOSING:
2992                __state_set(&ep->com, MORIBUND);
2993                break;
2994        case MORIBUND:
2995                (void)stop_ep_timer(ep);
2996                if ((ep->com.cm_id) && (ep->com.qp)) {
2997                        attrs.next_state = C4IW_QP_STATE_IDLE;
2998                        c4iw_modify_qp(ep->com.qp->rhp,
2999                                             ep->com.qp,
3000                                             C4IW_QP_ATTR_NEXT_STATE,
3001                                             &attrs, 1);
3002                }
3003                close_complete_upcall(ep, 0);
3004                __state_set(&ep->com, DEAD);
3005                release = 1;
3006                break;
3007        case ABORTING:
3008        case DEAD:
3009                break;
3010        default:
3011                WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
3012                break;
3013        }
3014        mutex_unlock(&ep->com.mutex);
3015        if (release)
3016                release_ep_resources(ep);
3017        c4iw_put_ep(&ep->com);
3018        return 0;
3019}
3020
3021static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
3022{
3023        struct cpl_rdma_terminate *rpl = cplhdr(skb);
3024        unsigned int tid = GET_TID(rpl);
3025        struct c4iw_ep *ep;
3026        struct c4iw_qp_attributes attrs;
3027
3028        ep = get_ep_from_tid(dev, tid);
3029
3030        if (ep) {
3031                if (ep->com.qp) {
3032                        pr_warn("TERM received tid %u qpid %u\n", tid,
3033                                ep->com.qp->wq.sq.qid);
3034                        attrs.next_state = C4IW_QP_STATE_TERMINATE;
3035                        c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
3036                                       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
3037                }
3038
3039                /* As per draft-hilland-iwarp-verbs-v1.0, sec 6.2.3,
3040                 * when entering the TERM state the RNIC MUST initiate a CLOSE.
3041                 */
3042                c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
3043                c4iw_put_ep(&ep->com);
3044        } else
3045                pr_warn("TERM received tid %u no ep/qp\n", tid);
3046
3047        return 0;
3048}
3049
3050/*
3051 * Upcall from the adapter indicating data has been transmitted.
3052 * For us its just the single MPA request or reply.  We can now free
3053 * the skb holding the mpa message.
3054 */
3055static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
3056{
3057        struct c4iw_ep *ep;
3058        struct cpl_fw4_ack *hdr = cplhdr(skb);
3059        u8 credits = hdr->credits;
3060        unsigned int tid = GET_TID(hdr);
3061
3062
3063        ep = get_ep_from_tid(dev, tid);
3064        if (!ep)
3065                return 0;
3066        pr_debug("ep %p tid %u credits %u\n",
3067                 ep, ep->hwtid, credits);
3068        if (credits == 0) {
3069                pr_debug("0 credit ack ep %p tid %u state %u\n",
3070                         ep, ep->hwtid, state_read(&ep->com));
3071                goto out;
3072        }
3073
3074        dst_confirm(ep->dst);
3075        if (ep->mpa_skb) {
3076                pr_debug("last streaming msg ack ep %p tid %u state %u initiator %u freeing skb\n",
3077                         ep, ep->hwtid, state_read(&ep->com),
3078                         ep->mpa_attr.initiator ? 1 : 0);
3079                mutex_lock(&ep->com.mutex);
3080                kfree_skb(ep->mpa_skb);
3081                ep->mpa_skb = NULL;
3082                if (test_bit(STOP_MPA_TIMER, &ep->com.flags))
3083                        stop_ep_timer(ep);
3084                mutex_unlock(&ep->com.mutex);
3085        }
3086out:
3087        c4iw_put_ep(&ep->com);
3088        return 0;
3089}
3090
3091int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
3092{
3093        int abort;
3094        struct c4iw_ep *ep = to_ep(cm_id);
3095
3096        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
3097
3098        mutex_lock(&ep->com.mutex);
3099        if (ep->com.state != MPA_REQ_RCVD) {
3100                mutex_unlock(&ep->com.mutex);
3101                c4iw_put_ep(&ep->com);
3102                return -ECONNRESET;
3103        }
3104        set_bit(ULP_REJECT, &ep->com.history);
3105        if (mpa_rev == 0)
3106                abort = 1;
3107        else
3108                abort = send_mpa_reject(ep, pdata, pdata_len);
3109        mutex_unlock(&ep->com.mutex);
3110
3111        stop_ep_timer(ep);
3112        c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL);
3113        c4iw_put_ep(&ep->com);
3114        return 0;
3115}
3116
3117int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3118{
3119        int err;
3120        struct c4iw_qp_attributes attrs;
3121        enum c4iw_qp_attr_mask mask;
3122        struct c4iw_ep *ep = to_ep(cm_id);
3123        struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
3124        struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
3125        int abort = 0;
3126
3127        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
3128
3129        mutex_lock(&ep->com.mutex);
3130        if (ep->com.state != MPA_REQ_RCVD) {
3131                err = -ECONNRESET;
3132                goto err_out;
3133        }
3134
3135        if (!qp) {
3136                err = -EINVAL;
3137                goto err_out;
3138        }
3139
3140        set_bit(ULP_ACCEPT, &ep->com.history);
3141        if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) ||
3142            (conn_param->ird > cur_max_read_depth(ep->com.dev))) {
3143                err = -EINVAL;
3144                goto err_abort;
3145        }
3146
3147        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
3148                if (conn_param->ord > ep->ird) {
3149                        if (RELAXED_IRD_NEGOTIATION) {
3150                                conn_param->ord = ep->ird;
3151                        } else {
3152                                ep->ird = conn_param->ird;
3153                                ep->ord = conn_param->ord;
3154                                send_mpa_reject(ep, conn_param->private_data,
3155                                                conn_param->private_data_len);
3156                                err = -ENOMEM;
3157                                goto err_abort;
3158                        }
3159                }
3160                if (conn_param->ird < ep->ord) {
3161                        if (RELAXED_IRD_NEGOTIATION &&
3162                            ep->ord <= h->rdev.lldi.max_ordird_qp) {
3163                                conn_param->ird = ep->ord;
3164                        } else {
3165                                err = -ENOMEM;
3166                                goto err_abort;
3167                        }
3168                }
3169        }
3170        ep->ird = conn_param->ird;
3171        ep->ord = conn_param->ord;
3172
3173        if (ep->mpa_attr.version == 1) {
3174                if (peer2peer && ep->ird == 0)
3175                        ep->ird = 1;
3176        } else {
3177                if (peer2peer &&
3178                    (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) &&
3179                    (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0)
3180                        ep->ird = 1;
3181        }
3182
3183        pr_debug("ird %d ord %d\n", ep->ird, ep->ord);
3184
3185        ep->com.cm_id = cm_id;
3186        ref_cm_id(&ep->com);
3187        ep->com.qp = qp;
3188        ref_qp(ep);
3189
3190        /* bind QP to EP and move to RTS */
3191        attrs.mpa_attr = ep->mpa_attr;
3192        attrs.max_ird = ep->ird;
3193        attrs.max_ord = ep->ord;
3194        attrs.llp_stream_handle = ep;
3195        attrs.next_state = C4IW_QP_STATE_RTS;
3196
3197        /* bind QP and TID with INIT_WR */
3198        mask = C4IW_QP_ATTR_NEXT_STATE |
3199                             C4IW_QP_ATTR_LLP_STREAM_HANDLE |
3200                             C4IW_QP_ATTR_MPA_ATTR |
3201                             C4IW_QP_ATTR_MAX_IRD |
3202                             C4IW_QP_ATTR_MAX_ORD;
3203
3204        err = c4iw_modify_qp(ep->com.qp->rhp,
3205                             ep->com.qp, mask, &attrs, 1);
3206        if (err)
3207                goto err_deref_cm_id;
3208
3209        set_bit(STOP_MPA_TIMER, &ep->com.flags);
3210        err = send_mpa_reply(ep, conn_param->private_data,
3211                             conn_param->private_data_len);
3212        if (err)
3213                goto err_deref_cm_id;
3214
3215        __state_set(&ep->com, FPDU_MODE);
3216        established_upcall(ep);
3217        mutex_unlock(&ep->com.mutex);
3218        c4iw_put_ep(&ep->com);
3219        return 0;
3220err_deref_cm_id:
3221        deref_cm_id(&ep->com);
3222err_abort:
3223        abort = 1;
3224err_out:
3225        mutex_unlock(&ep->com.mutex);
3226        if (abort)
3227                c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
3228        c4iw_put_ep(&ep->com);
3229        return err;
3230}
3231
3232static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
3233{
3234        struct in_device *ind;
3235        int found = 0;
3236        struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
3237        struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
3238        const struct in_ifaddr *ifa;
3239
3240        ind = in_dev_get(dev->rdev.lldi.ports[0]);
3241        if (!ind)
3242                return -EADDRNOTAVAIL;
3243        rcu_read_lock();
3244        in_dev_for_each_ifa_rcu(ifa, ind) {
3245                if (ifa->ifa_flags & IFA_F_SECONDARY)
3246                        continue;
3247                laddr->sin_addr.s_addr = ifa->ifa_address;
3248                raddr->sin_addr.s_addr = ifa->ifa_address;
3249                found = 1;
3250                break;
3251        }
3252        rcu_read_unlock();
3253
3254        in_dev_put(ind);
3255        return found ? 0 : -EADDRNOTAVAIL;
3256}
3257
3258static int get_lladdr(struct net_device *dev, struct in6_addr *addr,
3259                      unsigned char banned_flags)
3260{
3261        struct inet6_dev *idev;
3262        int err = -EADDRNOTAVAIL;
3263
3264        rcu_read_lock();
3265        idev = __in6_dev_get(dev);
3266        if (idev != NULL) {
3267                struct inet6_ifaddr *ifp;
3268
3269                read_lock_bh(&idev->lock);
3270                list_for_each_entry(ifp, &idev->addr_list, if_list) {
3271                        if (ifp->scope == IFA_LINK &&
3272                            !(ifp->flags & banned_flags)) {
3273                                memcpy(addr, &ifp->addr, 16);
3274                                err = 0;
3275                                break;
3276                        }
3277                }
3278                read_unlock_bh(&idev->lock);
3279        }
3280        rcu_read_unlock();
3281        return err;
3282}
3283
3284static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
3285{
3286        struct in6_addr addr;
3287        struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
3288        struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
3289
3290        if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) {
3291                memcpy(la6->sin6_addr.s6_addr, &addr, 16);
3292                memcpy(ra6->sin6_addr.s6_addr, &addr, 16);
3293                return 0;
3294        }
3295        return -EADDRNOTAVAIL;
3296}
3297
3298int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3299{
3300        struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
3301        struct c4iw_ep *ep;
3302        int err = 0;
3303        struct sockaddr_in *laddr;
3304        struct sockaddr_in *raddr;
3305        struct sockaddr_in6 *laddr6;
3306        struct sockaddr_in6 *raddr6;
3307        __u8 *ra;
3308        int iptype;
3309
3310        if ((conn_param->ord > cur_max_read_depth(dev)) ||
3311            (conn_param->ird > cur_max_read_depth(dev))) {
3312                err = -EINVAL;
3313                goto out;
3314        }
3315        ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
3316        if (!ep) {
3317                pr_err("%s - cannot alloc ep\n", __func__);
3318                err = -ENOMEM;
3319                goto out;
3320        }
3321
3322        skb_queue_head_init(&ep->com.ep_skb_list);
3323        if (alloc_ep_skb_list(&ep->com.ep_skb_list, CN_MAX_CON_BUF)) {
3324                err = -ENOMEM;
3325                goto fail1;
3326        }
3327
3328        timer_setup(&ep->timer, ep_timeout, 0);
3329        ep->plen = conn_param->private_data_len;
3330        if (ep->plen)
3331                memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
3332                       conn_param->private_data, ep->plen);
3333        ep->ird = conn_param->ird;
3334        ep->ord = conn_param->ord;
3335
3336        if (peer2peer && ep->ord == 0)
3337                ep->ord = 1;
3338
3339        ep->com.cm_id = cm_id;
3340        ref_cm_id(&ep->com);
3341        cm_id->provider_data = ep;
3342        ep->com.dev = dev;
3343        ep->com.qp = get_qhp(dev, conn_param->qpn);
3344        if (!ep->com.qp) {
3345                pr_warn("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
3346                err = -EINVAL;
3347                goto fail2;
3348        }
3349        ref_qp(ep);
3350        pr_debug("qpn 0x%x qp %p cm_id %p\n", conn_param->qpn,
3351                 ep->com.qp, cm_id);
3352
3353        /*
3354         * Allocate an active TID to initiate a TCP connection.
3355         */
3356        ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep);
3357        if (ep->atid == -1) {
3358                pr_err("%s - cannot alloc atid\n", __func__);
3359                err = -ENOMEM;
3360                goto fail2;
3361        }
3362        err = xa_insert_irq(&dev->atids, ep->atid, ep, GFP_KERNEL);
3363        if (err)
3364                goto fail5;
3365
3366        memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3367               sizeof(ep->com.local_addr));
3368        memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
3369               sizeof(ep->com.remote_addr));
3370
3371        laddr = (struct sockaddr_in *)&ep->com.local_addr;
3372        raddr = (struct sockaddr_in *)&ep->com.remote_addr;
3373        laddr6 = (struct sockaddr_in6 *)&ep->com.local_addr;
3374        raddr6 = (struct sockaddr_in6 *) &ep->com.remote_addr;
3375
3376        if (cm_id->m_remote_addr.ss_family == AF_INET) {
3377                iptype = 4;
3378                ra = (__u8 *)&raddr->sin_addr;
3379
3380                /*
3381                 * Handle loopback requests to INADDR_ANY.
3382                 */
3383                if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
3384                        err = pick_local_ipaddrs(dev, cm_id);
3385                        if (err)
3386                                goto fail3;
3387                }
3388
3389                /* find a route */
3390                pr_debug("saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
3391                         &laddr->sin_addr, ntohs(laddr->sin_port),
3392                         ra, ntohs(raddr->sin_port));
3393                ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
3394                                          laddr->sin_addr.s_addr,
3395                                          raddr->sin_addr.s_addr,
3396                                          laddr->sin_port,
3397                                          raddr->sin_port, cm_id->tos);
3398        } else {
3399                iptype = 6;
3400                ra = (__u8 *)&raddr6->sin6_addr;
3401
3402                /*
3403                 * Handle loopback requests to INADDR_ANY.
3404                 */
3405                if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
3406                        err = pick_local_ip6addrs(dev, cm_id);
3407                        if (err)
3408                                goto fail3;
3409                }
3410
3411                /* find a route */
3412                pr_debug("saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
3413                         laddr6->sin6_addr.s6_addr,
3414                         ntohs(laddr6->sin6_port),
3415                         raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
3416                ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
3417                                           laddr6->sin6_addr.s6_addr,
3418                                           raddr6->sin6_addr.s6_addr,
3419                                           laddr6->sin6_port,
3420                                           raddr6->sin6_port, cm_id->tos,
3421                                           raddr6->sin6_scope_id);
3422        }
3423        if (!ep->dst) {
3424                pr_err("%s - cannot find route\n", __func__);
3425                err = -EHOSTUNREACH;
3426                goto fail3;
3427        }
3428
3429        err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
3430                        ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
3431        if (err) {
3432                pr_err("%s - cannot alloc l2e\n", __func__);
3433                goto fail4;
3434        }
3435
3436        pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
3437                 ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
3438                 ep->l2t->idx);
3439
3440        state_set(&ep->com, CONNECTING);
3441        ep->tos = cm_id->tos;
3442
3443        /* send connect request to rnic */
3444        err = send_connect(ep);
3445        if (!err)
3446                goto out;
3447
3448        cxgb4_l2t_release(ep->l2t);
3449fail4:
3450        dst_release(ep->dst);
3451fail3:
3452        xa_erase_irq(&ep->com.dev->atids, ep->atid);
3453fail5:
3454        cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
3455fail2:
3456        skb_queue_purge(&ep->com.ep_skb_list);
3457        deref_cm_id(&ep->com);
3458fail1:
3459        c4iw_put_ep(&ep->com);
3460out:
3461        return err;
3462}
3463
3464static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3465{
3466        int err;
3467        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
3468                                    &ep->com.local_addr;
3469
3470        if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
3471                err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
3472                                     (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3473                if (err)
3474                        return err;
3475        }
3476        c4iw_init_wr_wait(ep->com.wr_waitp);
3477        err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
3478                                   ep->stid, &sin6->sin6_addr,
3479                                   sin6->sin6_port,
3480                                   ep->com.dev->rdev.lldi.rxq_ids[0]);
3481        if (!err)
3482                err = c4iw_wait_for_reply(&ep->com.dev->rdev,
3483                                          ep->com.wr_waitp,
3484                                          0, 0, __func__);
3485        else if (err > 0)
3486                err = net_xmit_errno(err);
3487        if (err) {
3488                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3489                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3490                pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
3491                       err, ep->stid,
3492                       sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
3493        }
3494        return err;
3495}
3496
3497static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3498{
3499        int err;
3500        struct sockaddr_in *sin = (struct sockaddr_in *)
3501                                  &ep->com.local_addr;
3502
3503        if (dev->rdev.lldi.enable_fw_ofld_conn) {
3504                do {
3505                        err = cxgb4_create_server_filter(
3506                                ep->com.dev->rdev.lldi.ports[0], ep->stid,
3507                                sin->sin_addr.s_addr, sin->sin_port, 0,
3508                                ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0);
3509                        if (err == -EBUSY) {
3510                                if (c4iw_fatal_error(&ep->com.dev->rdev)) {
3511                                        err = -EIO;
3512                                        break;
3513                                }
3514                                set_current_state(TASK_UNINTERRUPTIBLE);
3515                                schedule_timeout(usecs_to_jiffies(100));
3516                        }
3517                } while (err == -EBUSY);
3518        } else {
3519                c4iw_init_wr_wait(ep->com.wr_waitp);
3520                err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0],
3521                                ep->stid, sin->sin_addr.s_addr, sin->sin_port,
3522                                0, ep->com.dev->rdev.lldi.rxq_ids[0]);
3523                if (!err)
3524                        err = c4iw_wait_for_reply(&ep->com.dev->rdev,
3525                                                  ep->com.wr_waitp,
3526                                                  0, 0, __func__);
3527                else if (err > 0)
3528                        err = net_xmit_errno(err);
3529        }
3530        if (err)
3531                pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n"
3532                       , err, ep->stid,
3533                       &sin->sin_addr, ntohs(sin->sin_port));
3534        return err;
3535}
3536
3537int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3538{
3539        int err = 0;
3540        struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
3541        struct c4iw_listen_ep *ep;
3542
3543        might_sleep();
3544
3545        ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
3546        if (!ep) {
3547                pr_err("%s - cannot alloc ep\n", __func__);
3548                err = -ENOMEM;
3549                goto fail1;
3550        }
3551        skb_queue_head_init(&ep->com.ep_skb_list);
3552        pr_debug("ep %p\n", ep);
3553        ep->com.cm_id = cm_id;
3554        ref_cm_id(&ep->com);
3555        ep->com.dev = dev;
3556        ep->backlog = backlog;
3557        memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3558               sizeof(ep->com.local_addr));
3559
3560        /*
3561         * Allocate a server TID.
3562         */
3563        if (dev->rdev.lldi.enable_fw_ofld_conn &&
3564            ep->com.local_addr.ss_family == AF_INET)
3565                ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids,
3566                                             cm_id->m_local_addr.ss_family, ep);
3567        else
3568                ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids,
3569                                            cm_id->m_local_addr.ss_family, ep);
3570
3571        if (ep->stid == -1) {
3572                pr_err("%s - cannot alloc stid\n", __func__);
3573                err = -ENOMEM;
3574                goto fail2;
3575        }
3576        err = xa_insert_irq(&dev->stids, ep->stid, ep, GFP_KERNEL);
3577        if (err)
3578                goto fail3;
3579
3580        state_set(&ep->com, LISTEN);
3581        if (ep->com.local_addr.ss_family == AF_INET)
3582                err = create_server4(dev, ep);
3583        else
3584                err = create_server6(dev, ep);
3585        if (!err) {
3586                cm_id->provider_data = ep;
3587                goto out;
3588        }
3589        xa_erase_irq(&ep->com.dev->stids, ep->stid);
3590fail3:
3591        cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3592                        ep->com.local_addr.ss_family);
3593fail2:
3594        deref_cm_id(&ep->com);
3595        c4iw_put_ep(&ep->com);
3596fail1:
3597out:
3598        return err;
3599}
3600
3601int c4iw_destroy_listen(struct iw_cm_id *cm_id)
3602{
3603        int err;
3604        struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
3605
3606        pr_debug("ep %p\n", ep);
3607
3608        might_sleep();
3609        state_set(&ep->com, DEAD);
3610        if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn &&
3611            ep->com.local_addr.ss_family == AF_INET) {
3612                err = cxgb4_remove_server_filter(
3613                        ep->com.dev->rdev.lldi.ports[0], ep->stid,
3614                        ep->com.dev->rdev.lldi.rxq_ids[0], false);
3615        } else {
3616                struct sockaddr_in6 *sin6;
3617                c4iw_init_wr_wait(ep->com.wr_waitp);
3618                err = cxgb4_remove_server(
3619                                ep->com.dev->rdev.lldi.ports[0], ep->stid,
3620                                ep->com.dev->rdev.lldi.rxq_ids[0],
3621                                ep->com.local_addr.ss_family == AF_INET6);
3622                if (err)
3623                        goto done;
3624                err = c4iw_wait_for_reply(&ep->com.dev->rdev, ep->com.wr_waitp,
3625                                          0, 0, __func__);
3626                sin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
3627                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3628                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3629        }
3630        xa_erase_irq(&ep->com.dev->stids, ep->stid);
3631        cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3632                        ep->com.local_addr.ss_family);
3633done:
3634        deref_cm_id(&ep->com);
3635        c4iw_put_ep(&ep->com);
3636        return err;
3637}
3638
3639int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
3640{
3641        int ret = 0;
3642        int close = 0;
3643        int fatal = 0;
3644        struct c4iw_rdev *rdev;
3645
3646        mutex_lock(&ep->com.mutex);
3647
3648        pr_debug("ep %p state %s, abrupt %d\n", ep,
3649                 states[ep->com.state], abrupt);
3650
3651        /*
3652         * Ref the ep here in case we have fatal errors causing the
3653         * ep to be released and freed.
3654         */
3655        c4iw_get_ep(&ep->com);
3656
3657        rdev = &ep->com.dev->rdev;
3658        if (c4iw_fatal_error(rdev)) {
3659                fatal = 1;
3660                close_complete_upcall(ep, -EIO);
3661                ep->com.state = DEAD;
3662        }
3663        switch (ep->com.state) {
3664        case MPA_REQ_WAIT:
3665        case MPA_REQ_SENT:
3666        case MPA_REQ_RCVD:
3667        case MPA_REP_SENT:
3668        case FPDU_MODE:
3669        case CONNECTING:
3670                close = 1;
3671                if (abrupt)
3672                        ep->com.state = ABORTING;
3673                else {
3674                        ep->com.state = CLOSING;
3675
3676                        /*
3677                         * if we close before we see the fw4_ack() then we fix
3678                         * up the timer state since we're reusing it.
3679                         */
3680                        if (ep->mpa_skb &&
3681                            test_bit(STOP_MPA_TIMER, &ep->com.flags)) {
3682                                clear_bit(STOP_MPA_TIMER, &ep->com.flags);
3683                                stop_ep_timer(ep);
3684                        }
3685                        start_ep_timer(ep);
3686                }
3687                set_bit(CLOSE_SENT, &ep->com.flags);
3688                break;
3689        case CLOSING:
3690                if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
3691                        close = 1;
3692                        if (abrupt) {
3693                                (void)stop_ep_timer(ep);
3694                                ep->com.state = ABORTING;
3695                        } else
3696                                ep->com.state = MORIBUND;
3697                }
3698                break;
3699        case MORIBUND:
3700        case ABORTING:
3701        case DEAD:
3702                pr_debug("ignoring disconnect ep %p state %u\n",
3703                         ep, ep->com.state);
3704                break;
3705        default:
3706                WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
3707                break;
3708        }
3709
3710        if (close) {
3711                if (abrupt) {
3712                        set_bit(EP_DISC_ABORT, &ep->com.history);
3713                        ret = send_abort(ep);
3714                } else {
3715                        set_bit(EP_DISC_CLOSE, &ep->com.history);
3716                        ret = send_halfclose(ep);
3717                }
3718                if (ret) {
3719                        set_bit(EP_DISC_FAIL, &ep->com.history);
3720                        if (!abrupt) {
3721                                stop_ep_timer(ep);
3722                                close_complete_upcall(ep, -EIO);
3723                        }
3724                        if (ep->com.qp) {
3725                                struct c4iw_qp_attributes attrs;
3726
3727                                attrs.next_state = C4IW_QP_STATE_ERROR;
3728                                ret = c4iw_modify_qp(ep->com.qp->rhp,
3729                                                     ep->com.qp,
3730                                                     C4IW_QP_ATTR_NEXT_STATE,
3731                                                     &attrs, 1);
3732                                if (ret)
3733                                        pr_err("%s - qp <- error failed!\n",
3734                                               __func__);
3735                        }
3736                        fatal = 1;
3737                }
3738        }
3739        mutex_unlock(&ep->com.mutex);
3740        c4iw_put_ep(&ep->com);
3741        if (fatal)
3742                release_ep_resources(ep);
3743        return ret;
3744}
3745
3746static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3747                        struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
3748{
3749        struct c4iw_ep *ep;
3750        int atid = be32_to_cpu(req->tid);
3751
3752        ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids,
3753                                           (__force u32) req->tid);
3754        if (!ep)
3755                return;
3756
3757        switch (req->retval) {
3758        case FW_ENOMEM:
3759                set_bit(ACT_RETRY_NOMEM, &ep->com.history);
3760                if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
3761                        send_fw_act_open_req(ep, atid);
3762                        return;
3763                }
3764                fallthrough;
3765        case FW_EADDRINUSE:
3766                set_bit(ACT_RETRY_INUSE, &ep->com.history);
3767                if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
3768                        send_fw_act_open_req(ep, atid);
3769                        return;
3770                }
3771                break;
3772        default:
3773                pr_info("%s unexpected ofld conn wr retval %d\n",
3774                       __func__, req->retval);
3775                break;
3776        }
3777        pr_err("active ofld_connect_wr failure %d atid %d\n",
3778               req->retval, atid);
3779        mutex_lock(&dev->rdev.stats.lock);
3780        dev->rdev.stats.act_ofld_conn_fails++;
3781        mutex_unlock(&dev->rdev.stats.lock);
3782        connect_reply_upcall(ep, status2errno(req->retval));
3783        state_set(&ep->com, DEAD);
3784        if (ep->com.remote_addr.ss_family == AF_INET6) {
3785                struct sockaddr_in6 *sin6 =
3786                        (struct sockaddr_in6 *)&ep->com.local_addr;
3787                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3788                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3789        }
3790        xa_erase_irq(&dev->atids, atid);
3791        cxgb4_free_atid(dev->rdev.lldi.tids, atid);
3792        dst_release(ep->dst);
3793        cxgb4_l2t_release(ep->l2t);
3794        c4iw_put_ep(&ep->com);
3795}
3796
3797static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3798                        struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
3799{
3800        struct sk_buff *rpl_skb;
3801        struct cpl_pass_accept_req *cpl;
3802        int ret;
3803
3804        rpl_skb = (struct sk_buff *)(unsigned long)req->cookie;
3805        if (req->retval) {
3806                pr_err("%s passive open failure %d\n", __func__, req->retval);
3807                mutex_lock(&dev->rdev.stats.lock);
3808                dev->rdev.stats.pas_ofld_conn_fails++;
3809                mutex_unlock(&dev->rdev.stats.lock);
3810                kfree_skb(rpl_skb);
3811        } else {
3812                cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb);
3813                OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ,
3814                                        (__force u32) htonl(
3815                                        (__force u32) req->tid)));
3816                ret = pass_accept_req(dev, rpl_skb);
3817                if (!ret)
3818                        kfree_skb(rpl_skb);
3819        }
3820        return;
3821}
3822
3823static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word)
3824{
3825        u64 tlo = be64_to_cpu(tcb[((31 - word) / 2)]);
3826        u64 thi = be64_to_cpu(tcb[((31 - word) / 2) - 1]);
3827        u64 t;
3828        u32 shift = 32;
3829
3830        t = (thi << shift) | (tlo >> shift);
3831
3832        return t;
3833}
3834
3835static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift)
3836{
3837        u32 v;
3838        u64 t = be64_to_cpu(tcb[(31 - word) / 2]);
3839
3840        if (word & 0x1)
3841                shift += 32;
3842        v = (t >> shift) & mask;
3843        return v;
3844}
3845
3846static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
3847{
3848        struct cpl_get_tcb_rpl *rpl = cplhdr(skb);
3849        __be64 *tcb = (__be64 *)(rpl + 1);
3850        unsigned int tid = GET_TID(rpl);
3851        struct c4iw_ep *ep;
3852        u64 t_flags_64;
3853        u32 rx_pdu_out;
3854
3855        ep = get_ep_from_tid(dev, tid);
3856        if (!ep)
3857                return 0;
3858        /* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to
3859         * determine if there's a rx PDU feedback event pending.
3860         *
3861         * If that bit is set, it means we'll need to re-read the TCB's
3862         * rq_start value. The final value is the one present in a TCB
3863         * with the TF_RX_PDU_OUT bit cleared.
3864         */
3865
3866        t_flags_64 = t4_tcb_get_field64(tcb, TCB_T_FLAGS_W);
3867        rx_pdu_out = (t_flags_64 & TF_RX_PDU_OUT_V(1)) >> TF_RX_PDU_OUT_S;
3868
3869        c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */
3870        c4iw_put_ep(&ep->com); /* from read_tcb() */
3871
3872        /* If TF_RX_PDU_OUT bit is set, re-read the TCB */
3873        if (rx_pdu_out) {
3874                if (++ep->rx_pdu_out_cnt >= 2) {
3875                        WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n");
3876                        goto cleanup;
3877                }
3878                read_tcb(ep);
3879                return 0;
3880        }
3881
3882        ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_M,
3883                                          TCB_RQ_START_S);
3884cleanup:
3885        pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx);
3886
3887        if (test_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags))
3888                finish_peer_abort(dev, ep);
3889        else if (test_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags))
3890                send_abort_req(ep);
3891        else
3892                WARN_ONCE(1, "unexpected state!");
3893
3894        return 0;
3895}
3896
3897static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
3898{
3899        struct cpl_fw6_msg *rpl = cplhdr(skb);
3900        struct cpl_fw6_msg_ofld_connection_wr_rpl *req;
3901
3902        switch (rpl->type) {
3903        case FW6_TYPE_CQE:
3904                c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]);
3905                break;
3906        case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
3907                req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data;
3908                switch (req->t_state) {
3909                case TCP_SYN_SENT:
3910                        active_ofld_conn_reply(dev, skb, req);
3911                        break;
3912                case TCP_SYN_RECV:
3913                        passive_ofld_conn_reply(dev, skb, req);
3914                        break;
3915                default:
3916                        pr_err("%s unexpected ofld conn wr state %d\n",
3917                               __func__, req->t_state);
3918                        break;
3919                }
3920                break;
3921        }
3922        return 0;
3923}
3924
3925static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
3926{
3927        __be32 l2info;
3928        __be16 hdr_len, vlantag, len;
3929        u16 eth_hdr_len;
3930        int tcp_hdr_len, ip_hdr_len;
3931        u8 intf;
3932        struct cpl_rx_pkt *cpl = cplhdr(skb);
3933        struct cpl_pass_accept_req *req;
3934        struct tcp_options_received tmp_opt;
3935        struct c4iw_dev *dev;
3936        enum chip_type type;
3937
3938        dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
3939        /* Store values from cpl_rx_pkt in temporary location. */
3940        vlantag = cpl->vlan;
3941        len = cpl->len;
3942        l2info  = cpl->l2info;
3943        hdr_len = cpl->hdr_len;
3944        intf = cpl->iff;
3945
3946        __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header));
3947
3948        /*
3949         * We need to parse the TCP options from SYN packet.
3950         * to generate cpl_pass_accept_req.
3951         */
3952        memset(&tmp_opt, 0, sizeof(tmp_opt));
3953        tcp_clear_options(&tmp_opt);
3954        tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL);
3955
3956        req = __skb_push(skb, sizeof(*req));
3957        memset(req, 0, sizeof(*req));
3958        req->l2info = cpu_to_be16(SYN_INTF_V(intf) |
3959                         SYN_MAC_IDX_V(RX_MACIDX_G(
3960                         be32_to_cpu(l2info))) |
3961                         SYN_XACT_MATCH_F);
3962        type = dev->rdev.lldi.adapter_type;
3963        tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len));
3964        ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len));
3965        req->hdr_len =
3966                cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info))));
3967        if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) {
3968                eth_hdr_len = is_t4(type) ?
3969                                RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) :
3970                                RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info));
3971                req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) |
3972                                            IP_HDR_LEN_V(ip_hdr_len) |
3973                                            ETH_HDR_LEN_V(eth_hdr_len));
3974        } else { /* T6 and later */
3975                eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info));
3976                req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) |
3977                                            T6_IP_HDR_LEN_V(ip_hdr_len) |
3978                                            T6_ETH_HDR_LEN_V(eth_hdr_len));
3979        }
3980        req->vlan = vlantag;
3981        req->len = len;
3982        req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) |
3983                                    PASS_OPEN_TOS_V(tos));
3984        req->tcpopt.mss = htons(tmp_opt.mss_clamp);
3985        if (tmp_opt.wscale_ok)
3986                req->tcpopt.wsf = tmp_opt.snd_wscale;
3987        req->tcpopt.tstamp = tmp_opt.saw_tstamp;
3988        if (tmp_opt.sack_ok)
3989                req->tcpopt.sack = 1;
3990        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0));
3991        return;
3992}
3993
3994static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
3995                                  __be32 laddr, __be16 lport,
3996                                  __be32 raddr, __be16 rport,
3997                                  u32 rcv_isn, u32 filter, u16 window,
3998                                  u32 rss_qid, u8 port_id)
3999{
4000        struct sk_buff *req_skb;
4001        struct fw_ofld_connection_wr *req;
4002        struct cpl_pass_accept_req *cpl = cplhdr(skb);
4003        int ret;
4004
4005        req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
4006        if (!req_skb)
4007                return;
4008        req = __skb_put_zero(req_skb, sizeof(*req));
4009        req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F);
4010        req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
4011        req->le.version_cpl = htonl(FW_OFLD_CONNECTION_WR_CPL_F);
4012        req->le.filter = (__force __be32) filter;
4013        req->le.lport = lport;
4014        req->le.pport = rport;
4015        req->le.u.ipv4.lip = laddr;
4016        req->le.u.ipv4.pip = raddr;
4017        req->tcb.rcv_nxt = htonl(rcv_isn + 1);
4018        req->tcb.rcv_adv = htons(window);
4019        req->tcb.t_state_to_astid =
4020                 htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_RECV) |
4021                        FW_OFLD_CONNECTION_WR_RCV_SCALE_V(cpl->tcpopt.wsf) |
4022                        FW_OFLD_CONNECTION_WR_ASTID_V(
4023                        PASS_OPEN_TID_G(ntohl(cpl->tos_stid))));
4024
4025        /*
4026         * We store the qid in opt2 which will be used by the firmware
4027         * to send us the wr response.
4028         */
4029        req->tcb.opt2 = htonl(RSS_QUEUE_V(rss_qid));
4030
4031        /*
4032         * We initialize the MSS index in TCB to 0xF.
4033         * So that when driver sends cpl_pass_accept_rpl
4034         * TCB picks up the correct value. If this was 0
4035         * TP will ignore any value > 0 for MSS index.
4036         */
4037        req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF));
4038        req->cookie = (uintptr_t)skb;
4039
4040        set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
4041        ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
4042        if (ret < 0) {
4043                pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__,
4044                       ret);
4045                kfree_skb(skb);
4046                kfree_skb(req_skb);
4047        }
4048}
4049
4050/*
4051 * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt
4052 * messages when a filter is being used instead of server to
4053 * redirect a syn packet. When packets hit filter they are redirected
4054 * to the offload queue and driver tries to establish the connection
4055 * using firmware work request.
4056 */
4057static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
4058{
4059        int stid;
4060        unsigned int filter;
4061        struct ethhdr *eh = NULL;
4062        struct vlan_ethhdr *vlan_eh = NULL;
4063        struct iphdr *iph;
4064        struct tcphdr *tcph;
4065        struct rss_header *rss = (void *)skb->data;
4066        struct cpl_rx_pkt *cpl = (void *)skb->data;
4067        struct cpl_pass_accept_req *req = (void *)(rss + 1);
4068        struct l2t_entry *e;
4069        struct dst_entry *dst;
4070        struct c4iw_ep *lep = NULL;
4071        u16 window;
4072        struct port_info *pi;
4073        struct net_device *pdev;
4074        u16 rss_qid, eth_hdr_len;
4075        int step;
4076        struct neighbour *neigh;
4077
4078        /* Drop all non-SYN packets */
4079        if (!(cpl->l2info & cpu_to_be32(RXF_SYN_F)))
4080                goto reject;
4081
4082        /*
4083         * Drop all packets which did not hit the filter.
4084         * Unlikely to happen.
4085         */
4086        if (!(rss->filter_hit && rss->filter_tid))
4087                goto reject;
4088
4089        /*
4090         * Calculate the server tid from filter hit index from cpl_rx_pkt.
4091         */
4092        stid = (__force int) cpu_to_be32((__force u32) rss->hash_val);
4093
4094        lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
4095        if (!lep) {
4096                pr_warn("%s connect request on invalid stid %d\n",
4097                        __func__, stid);
4098                goto reject;
4099        }
4100
4101        switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) {
4102        case CHELSIO_T4:
4103                eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4104                break;
4105        case CHELSIO_T5:
4106                eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4107                break;
4108        case CHELSIO_T6:
4109                eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4110                break;
4111        default:
4112                pr_err("T%d Chip is not supported\n",
4113                       CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type));
4114                goto reject;
4115        }
4116
4117        if (eth_hdr_len == ETH_HLEN) {
4118                eh = (struct ethhdr *)(req + 1);
4119                iph = (struct iphdr *)(eh + 1);
4120        } else {
4121                vlan_eh = (struct vlan_ethhdr *)(req + 1);
4122                iph = (struct iphdr *)(vlan_eh + 1);
4123                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan));
4124        }
4125
4126        if (iph->version != 0x4)
4127                goto reject;
4128
4129        tcph = (struct tcphdr *)(iph + 1);
4130        skb_set_network_header(skb, (void *)iph - (void *)rss);
4131        skb_set_transport_header(skb, (void *)tcph - (void *)rss);
4132        skb_get(skb);
4133
4134        pr_debug("lip 0x%x lport %u pip 0x%x pport %u tos %d\n",
4135                 ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
4136                 ntohs(tcph->source), iph->tos);
4137
4138        dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
4139                              iph->daddr, iph->saddr, tcph->dest,
4140                              tcph->source, iph->tos);
4141        if (!dst) {
4142                pr_err("%s - failed to find dst entry!\n", __func__);
4143                goto reject;
4144        }
4145        neigh = dst_neigh_lookup_skb(dst, skb);
4146
4147        if (!neigh) {
4148                pr_err("%s - failed to allocate neigh!\n", __func__);
4149                goto free_dst;
4150        }
4151
4152        if (neigh->dev->flags & IFF_LOOPBACK) {
4153                pdev = ip_dev_find(&init_net, iph->daddr);
4154                e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
4155                                    pdev, 0);
4156                pi = (struct port_info *)netdev_priv(pdev);
4157                dev_put(pdev);
4158        } else {
4159                pdev = get_real_dev(neigh->dev);
4160                e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
4161                                        pdev, 0);
4162                pi = (struct port_info *)netdev_priv(pdev);
4163        }
4164        neigh_release(neigh);
4165        if (!e) {
4166                pr_err("%s - failed to allocate l2t entry!\n",
4167                       __func__);
4168                goto free_dst;
4169        }
4170
4171        step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan;
4172        rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step];
4173        window = (__force u16) htons((__force u16)tcph->window);
4174
4175        /* Calcuate filter portion for LE region. */
4176        filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple(
4177                                                    dev->rdev.lldi.ports[0],
4178                                                    e));
4179
4180        /*
4181         * Synthesize the cpl_pass_accept_req. We have everything except the
4182         * TID. Once firmware sends a reply with TID we update the TID field
4183         * in cpl and pass it through the regular cpl_pass_accept_req path.
4184         */
4185        build_cpl_pass_accept_req(skb, stid, iph->tos);
4186        send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr,
4187                              tcph->source, ntohl(tcph->seq), filter, window,
4188                              rss_qid, pi->port_id);
4189        cxgb4_l2t_release(e);
4190free_dst:
4191        dst_release(dst);
4192reject:
4193        if (lep)
4194                c4iw_put_ep(&lep->com);
4195        return 0;
4196}
4197
4198/*
4199 * These are the real handlers that are called from a
4200 * work queue.
4201 */
4202static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = {
4203        [CPL_ACT_ESTABLISH] = act_establish,
4204        [CPL_ACT_OPEN_RPL] = act_open_rpl,
4205        [CPL_RX_DATA] = rx_data,
4206        [CPL_ABORT_RPL_RSS] = abort_rpl,
4207        [CPL_ABORT_RPL] = abort_rpl,
4208        [CPL_PASS_OPEN_RPL] = pass_open_rpl,
4209        [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
4210        [CPL_PASS_ACCEPT_REQ] = pass_accept_req,
4211        [CPL_PASS_ESTABLISH] = pass_establish,
4212        [CPL_PEER_CLOSE] = peer_close,
4213        [CPL_ABORT_REQ_RSS] = peer_abort,
4214        [CPL_CLOSE_CON_RPL] = close_con_rpl,
4215        [CPL_RDMA_TERMINATE] = terminate,
4216        [CPL_FW4_ACK] = fw4_ack,
4217        [CPL_GET_TCB_RPL] = read_tcb_rpl,
4218        [CPL_FW6_MSG] = deferred_fw6_msg,
4219        [CPL_RX_PKT] = rx_pkt,
4220        [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe,
4221        [FAKE_CPL_PASS_PUT_EP_SAFE] = _put_pass_ep_safe
4222};
4223
4224static void process_timeout(struct c4iw_ep *ep)
4225{
4226        struct c4iw_qp_attributes attrs;
4227        int abort = 1;
4228
4229        mutex_lock(&ep->com.mutex);
4230        pr_debug("ep %p tid %u state %d\n", ep, ep->hwtid, ep->com.state);
4231        set_bit(TIMEDOUT, &ep->com.history);
4232        switch (ep->com.state) {
4233        case MPA_REQ_SENT:
4234                connect_reply_upcall(ep, -ETIMEDOUT);
4235                break;
4236        case MPA_REQ_WAIT:
4237        case MPA_REQ_RCVD:
4238        case MPA_REP_SENT:
4239        case FPDU_MODE:
4240                break;
4241        case CLOSING:
4242        case MORIBUND:
4243                if (ep->com.cm_id && ep->com.qp) {
4244                        attrs.next_state = C4IW_QP_STATE_ERROR;
4245                        c4iw_modify_qp(ep->com.qp->rhp,
4246                                     ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
4247                                     &attrs, 1);
4248                }
4249                close_complete_upcall(ep, -ETIMEDOUT);
4250                break;
4251        case ABORTING:
4252        case DEAD:
4253
4254                /*
4255                 * These states are expected if the ep timed out at the same
4256                 * time as another thread was calling stop_ep_timer().
4257                 * So we silently do nothing for these states.
4258                 */
4259                abort = 0;
4260                break;
4261        default:
4262                WARN(1, "%s unexpected state ep %p tid %u state %u\n",
4263                        __func__, ep, ep->hwtid, ep->com.state);
4264                abort = 0;
4265        }
4266        mutex_unlock(&ep->com.mutex);
4267        if (abort)
4268                c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
4269        c4iw_put_ep(&ep->com);
4270}
4271
4272static void process_timedout_eps(void)
4273{
4274        struct c4iw_ep *ep;
4275
4276        spin_lock_irq(&timeout_lock);
4277        while (!list_empty(&timeout_list)) {
4278                struct list_head *tmp;
4279
4280                tmp = timeout_list.next;
4281                list_del(tmp);
4282                tmp->next = NULL;
4283                tmp->prev = NULL;
4284                spin_unlock_irq(&timeout_lock);
4285                ep = list_entry(tmp, struct c4iw_ep, entry);
4286                process_timeout(ep);
4287                spin_lock_irq(&timeout_lock);
4288        }
4289        spin_unlock_irq(&timeout_lock);
4290}
4291
4292static void process_work(struct work_struct *work)
4293{
4294        struct sk_buff *skb = NULL;
4295        struct c4iw_dev *dev;
4296        struct cpl_act_establish *rpl;
4297        unsigned int opcode;
4298        int ret;
4299
4300        process_timedout_eps();
4301        while ((skb = skb_dequeue(&rxq))) {
4302                rpl = cplhdr(skb);
4303                dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
4304                opcode = rpl->ot.opcode;
4305
4306                if (opcode >= ARRAY_SIZE(work_handlers) ||
4307                    !work_handlers[opcode]) {
4308                        pr_err("No handler for opcode 0x%x.\n", opcode);
4309                        kfree_skb(skb);
4310                } else {
4311                        ret = work_handlers[opcode](dev, skb);
4312                        if (!ret)
4313                                kfree_skb(skb);
4314                }
4315                process_timedout_eps();
4316        }
4317}
4318
4319static DECLARE_WORK(skb_work, process_work);
4320
4321static void ep_timeout(struct timer_list *t)
4322{
4323        struct c4iw_ep *ep = from_timer(ep, t, timer);
4324        int kickit = 0;
4325
4326        spin_lock(&timeout_lock);
4327        if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
4328                /*
4329                 * Only insert if it is not already on the list.
4330                 */
4331                if (!ep->entry.next) {
4332                        list_add_tail(&ep->entry, &timeout_list);
4333                        kickit = 1;
4334                }
4335        }
4336        spin_unlock(&timeout_lock);
4337        if (kickit)
4338                queue_work(workq, &skb_work);
4339}
4340
4341/*
4342 * All the CM events are handled on a work queue to have a safe context.
4343 */
4344static int sched(struct c4iw_dev *dev, struct sk_buff *skb)
4345{
4346
4347        /*
4348         * Save dev in the skb->cb area.
4349         */
4350        *((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev;
4351
4352        /*
4353         * Queue the skb and schedule the worker thread.
4354         */
4355        skb_queue_tail(&rxq, skb);
4356        queue_work(workq, &skb_work);
4357        return 0;
4358}
4359
4360static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
4361{
4362        struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
4363
4364        if (rpl->status != CPL_ERR_NONE) {
4365                pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
4366                       rpl->status, GET_TID(rpl));
4367        }
4368        kfree_skb(skb);
4369        return 0;
4370}
4371
4372static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
4373{
4374        struct cpl_fw6_msg *rpl = cplhdr(skb);
4375        struct c4iw_wr_wait *wr_waitp;
4376        int ret;
4377
4378        pr_debug("type %u\n", rpl->type);
4379
4380        switch (rpl->type) {
4381        case FW6_TYPE_WR_RPL:
4382                ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
4383                wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
4384                pr_debug("wr_waitp %p ret %u\n", wr_waitp, ret);
4385                if (wr_waitp)
4386                        c4iw_wake_up_deref(wr_waitp, ret ? -ret : 0);
4387                kfree_skb(skb);
4388                break;
4389        case FW6_TYPE_CQE:
4390        case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
4391                sched(dev, skb);
4392                break;
4393        default:
4394                pr_err("%s unexpected fw6 msg type %u\n",
4395                       __func__, rpl->type);
4396                kfree_skb(skb);
4397                break;
4398        }
4399        return 0;
4400}
4401
4402static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
4403{
4404        struct cpl_abort_req_rss *req = cplhdr(skb);
4405        struct c4iw_ep *ep;
4406        unsigned int tid = GET_TID(req);
4407
4408        ep = get_ep_from_tid(dev, tid);
4409        /* This EP will be dereferenced in peer_abort() */
4410        if (!ep) {
4411                pr_warn("Abort on non-existent endpoint, tid %d\n", tid);
4412                kfree_skb(skb);
4413                return 0;
4414        }
4415        if (cxgb_is_neg_adv(req->status)) {
4416                pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
4417                         ep->hwtid, req->status,
4418                         neg_adv_str(req->status));
4419                goto out;
4420        }
4421        pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state);
4422
4423        c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
4424out:
4425        sched(dev, skb);
4426        return 0;
4427}
4428
4429/*
4430 * Most upcalls from the T4 Core go to sched() to
4431 * schedule the processing on a work queue.
4432 */
4433c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
4434        [CPL_ACT_ESTABLISH] = sched,
4435        [CPL_ACT_OPEN_RPL] = sched,
4436        [CPL_RX_DATA] = sched,
4437        [CPL_ABORT_RPL_RSS] = sched,
4438        [CPL_ABORT_RPL] = sched,
4439        [CPL_PASS_OPEN_RPL] = sched,
4440        [CPL_CLOSE_LISTSRV_RPL] = sched,
4441        [CPL_PASS_ACCEPT_REQ] = sched,
4442        [CPL_PASS_ESTABLISH] = sched,
4443        [CPL_PEER_CLOSE] = sched,
4444        [CPL_CLOSE_CON_RPL] = sched,
4445        [CPL_ABORT_REQ_RSS] = peer_abort_intr,
4446        [CPL_RDMA_TERMINATE] = sched,
4447        [CPL_FW4_ACK] = sched,
4448        [CPL_SET_TCB_RPL] = set_tcb_rpl,
4449        [CPL_GET_TCB_RPL] = sched,
4450        [CPL_FW6_MSG] = fw6_msg,
4451        [CPL_RX_PKT] = sched
4452};
4453
4454int __init c4iw_cm_init(void)
4455{
4456        skb_queue_head_init(&rxq);
4457
4458        workq = alloc_ordered_workqueue("iw_cxgb4", WQ_MEM_RECLAIM);
4459        if (!workq)
4460                return -ENOMEM;
4461
4462        return 0;
4463}
4464
4465void c4iw_cm_term(void)
4466{
4467        WARN_ON(!list_empty(&timeout_list));
4468        destroy_workqueue(workq);
4469}
4470