linux/drivers/infiniband/hw/cxgb4/cm.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/module.h>
  33#include <linux/list.h>
  34#include <linux/workqueue.h>
  35#include <linux/skbuff.h>
  36#include <linux/timer.h>
  37#include <linux/notifier.h>
  38#include <linux/inetdevice.h>
  39#include <linux/ip.h>
  40#include <linux/tcp.h>
  41#include <linux/if_vlan.h>
  42
  43#include <net/neighbour.h>
  44#include <net/netevent.h>
  45#include <net/route.h>
  46#include <net/tcp.h>
  47#include <net/ip6_route.h>
  48#include <net/addrconf.h>
  49
  50#include <rdma/ib_addr.h>
  51
  52#include <libcxgb_cm.h>
  53#include "iw_cxgb4.h"
  54#include "clip_tbl.h"
  55
  56static char *states[] = {
  57        "idle",
  58        "listen",
  59        "connecting",
  60        "mpa_wait_req",
  61        "mpa_req_sent",
  62        "mpa_req_rcvd",
  63        "mpa_rep_sent",
  64        "fpdu_mode",
  65        "aborting",
  66        "closing",
  67        "moribund",
  68        "dead",
  69        NULL,
  70};
  71
  72static int nocong;
  73module_param(nocong, int, 0644);
  74MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)");
  75
  76static int enable_ecn;
  77module_param(enable_ecn, int, 0644);
  78MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)");
  79
  80static int dack_mode = 1;
  81module_param(dack_mode, int, 0644);
  82MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
  83
  84uint c4iw_max_read_depth = 32;
  85module_param(c4iw_max_read_depth, int, 0644);
  86MODULE_PARM_DESC(c4iw_max_read_depth,
  87                 "Per-connection max ORD/IRD (default=32)");
  88
  89static int enable_tcp_timestamps;
  90module_param(enable_tcp_timestamps, int, 0644);
  91MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)");
  92
  93static int enable_tcp_sack;
  94module_param(enable_tcp_sack, int, 0644);
  95MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)");
  96
  97static int enable_tcp_window_scaling = 1;
  98module_param(enable_tcp_window_scaling, int, 0644);
  99MODULE_PARM_DESC(enable_tcp_window_scaling,
 100                 "Enable tcp window scaling (default=1)");
 101
 102static int peer2peer = 1;
 103module_param(peer2peer, int, 0644);
 104MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)");
 105
 106static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
 107module_param(p2p_type, int, 0644);
 108MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: "
 109                           "1=RDMA_READ 0=RDMA_WRITE (default 1)");
 110
 111static int ep_timeout_secs = 60;
 112module_param(ep_timeout_secs, int, 0644);
 113MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
 114                                   "in seconds (default=60)");
 115
 116static int mpa_rev = 2;
 117module_param(mpa_rev, int, 0644);
 118MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
 119                "1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft"
 120                " compliant (default=2)");
 121
 122static int markers_enabled;
 123module_param(markers_enabled, int, 0644);
 124MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
 125
 126static int crc_enabled = 1;
 127module_param(crc_enabled, int, 0644);
 128MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
 129
 130static int rcv_win = 256 * 1024;
 131module_param(rcv_win, int, 0644);
 132MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)");
 133
 134static int snd_win = 128 * 1024;
 135module_param(snd_win, int, 0644);
 136MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)");
 137
 138static struct workqueue_struct *workq;
 139
 140static struct sk_buff_head rxq;
 141
 142static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
 143static void ep_timeout(struct timer_list *t);
 144static void connect_reply_upcall(struct c4iw_ep *ep, int status);
 145static int sched(struct c4iw_dev *dev, struct sk_buff *skb);
 146
 147static LIST_HEAD(timeout_list);
 148static spinlock_t timeout_lock;
 149
 150static void deref_cm_id(struct c4iw_ep_common *epc)
 151{
 152        epc->cm_id->rem_ref(epc->cm_id);
 153        epc->cm_id = NULL;
 154        set_bit(CM_ID_DEREFED, &epc->history);
 155}
 156
 157static void ref_cm_id(struct c4iw_ep_common *epc)
 158{
 159        set_bit(CM_ID_REFED, &epc->history);
 160        epc->cm_id->add_ref(epc->cm_id);
 161}
 162
 163static void deref_qp(struct c4iw_ep *ep)
 164{
 165        c4iw_qp_rem_ref(&ep->com.qp->ibqp);
 166        clear_bit(QP_REFERENCED, &ep->com.flags);
 167        set_bit(QP_DEREFED, &ep->com.history);
 168}
 169
 170static void ref_qp(struct c4iw_ep *ep)
 171{
 172        set_bit(QP_REFERENCED, &ep->com.flags);
 173        set_bit(QP_REFED, &ep->com.history);
 174        c4iw_qp_add_ref(&ep->com.qp->ibqp);
 175}
 176
 177static void start_ep_timer(struct c4iw_ep *ep)
 178{
 179        pr_debug("ep %p\n", ep);
 180        if (timer_pending(&ep->timer)) {
 181                pr_err("%s timer already started! ep %p\n",
 182                       __func__, ep);
 183                return;
 184        }
 185        clear_bit(TIMEOUT, &ep->com.flags);
 186        c4iw_get_ep(&ep->com);
 187        ep->timer.expires = jiffies + ep_timeout_secs * HZ;
 188        add_timer(&ep->timer);
 189}
 190
 191static int stop_ep_timer(struct c4iw_ep *ep)
 192{
 193        pr_debug("ep %p stopping\n", ep);
 194        del_timer_sync(&ep->timer);
 195        if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
 196                c4iw_put_ep(&ep->com);
 197                return 0;
 198        }
 199        return 1;
 200}
 201
 202static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
 203                  struct l2t_entry *l2e)
 204{
 205        int     error = 0;
 206
 207        if (c4iw_fatal_error(rdev)) {
 208                kfree_skb(skb);
 209                pr_err("%s - device in error state - dropping\n", __func__);
 210                return -EIO;
 211        }
 212        error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
 213        if (error < 0)
 214                kfree_skb(skb);
 215        else if (error == NET_XMIT_DROP)
 216                return -ENOMEM;
 217        return error < 0 ? error : 0;
 218}
 219
 220int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
 221{
 222        int     error = 0;
 223
 224        if (c4iw_fatal_error(rdev)) {
 225                kfree_skb(skb);
 226                pr_err("%s - device in error state - dropping\n", __func__);
 227                return -EIO;
 228        }
 229        error = cxgb4_ofld_send(rdev->lldi.ports[0], skb);
 230        if (error < 0)
 231                kfree_skb(skb);
 232        return error < 0 ? error : 0;
 233}
 234
 235static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
 236{
 237        u32 len = roundup(sizeof(struct cpl_tid_release), 16);
 238
 239        skb = get_skb(skb, len, GFP_KERNEL);
 240        if (!skb)
 241                return;
 242
 243        cxgb_mk_tid_release(skb, len, hwtid, 0);
 244        c4iw_ofld_send(rdev, skb);
 245        return;
 246}
 247
 248static void set_emss(struct c4iw_ep *ep, u16 opt)
 249{
 250        ep->emss = ep->com.dev->rdev.lldi.mtus[TCPOPT_MSS_G(opt)] -
 251                   ((AF_INET == ep->com.remote_addr.ss_family) ?
 252                    sizeof(struct iphdr) : sizeof(struct ipv6hdr)) -
 253                   sizeof(struct tcphdr);
 254        ep->mss = ep->emss;
 255        if (TCPOPT_TSTAMP_G(opt))
 256                ep->emss -= round_up(TCPOLEN_TIMESTAMP, 4);
 257        if (ep->emss < 128)
 258                ep->emss = 128;
 259        if (ep->emss & 7)
 260                pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n",
 261                         TCPOPT_MSS_G(opt), ep->mss, ep->emss);
 262        pr_debug("mss_idx %u mss %u emss=%u\n", TCPOPT_MSS_G(opt), ep->mss,
 263                 ep->emss);
 264}
 265
 266static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc)
 267{
 268        enum c4iw_ep_state state;
 269
 270        mutex_lock(&epc->mutex);
 271        state = epc->state;
 272        mutex_unlock(&epc->mutex);
 273        return state;
 274}
 275
 276static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
 277{
 278        epc->state = new;
 279}
 280
 281static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
 282{
 283        mutex_lock(&epc->mutex);
 284        pr_debug("%s -> %s\n", states[epc->state], states[new]);
 285        __state_set(epc, new);
 286        mutex_unlock(&epc->mutex);
 287        return;
 288}
 289
 290static int alloc_ep_skb_list(struct sk_buff_head *ep_skb_list, int size)
 291{
 292        struct sk_buff *skb;
 293        unsigned int i;
 294        size_t len;
 295
 296        len = roundup(sizeof(union cpl_wr_size), 16);
 297        for (i = 0; i < size; i++) {
 298                skb = alloc_skb(len, GFP_KERNEL);
 299                if (!skb)
 300                        goto fail;
 301                skb_queue_tail(ep_skb_list, skb);
 302        }
 303        return 0;
 304fail:
 305        skb_queue_purge(ep_skb_list);
 306        return -ENOMEM;
 307}
 308
 309static void *alloc_ep(int size, gfp_t gfp)
 310{
 311        struct c4iw_ep_common *epc;
 312
 313        epc = kzalloc(size, gfp);
 314        if (epc) {
 315                epc->wr_waitp = c4iw_alloc_wr_wait(gfp);
 316                if (!epc->wr_waitp) {
 317                        kfree(epc);
 318                        epc = NULL;
 319                        goto out;
 320                }
 321                kref_init(&epc->kref);
 322                mutex_init(&epc->mutex);
 323                c4iw_init_wr_wait(epc->wr_waitp);
 324        }
 325        pr_debug("alloc ep %p\n", epc);
 326out:
 327        return epc;
 328}
 329
 330static void remove_ep_tid(struct c4iw_ep *ep)
 331{
 332        unsigned long flags;
 333
 334        xa_lock_irqsave(&ep->com.dev->hwtids, flags);
 335        __xa_erase(&ep->com.dev->hwtids, ep->hwtid);
 336        if (xa_empty(&ep->com.dev->hwtids))
 337                wake_up(&ep->com.dev->wait);
 338        xa_unlock_irqrestore(&ep->com.dev->hwtids, flags);
 339}
 340
 341static int insert_ep_tid(struct c4iw_ep *ep)
 342{
 343        unsigned long flags;
 344        int err;
 345
 346        xa_lock_irqsave(&ep->com.dev->hwtids, flags);
 347        err = __xa_insert(&ep->com.dev->hwtids, ep->hwtid, ep, GFP_KERNEL);
 348        xa_unlock_irqrestore(&ep->com.dev->hwtids, flags);
 349
 350        return err;
 351}
 352
 353/*
 354 * Atomically lookup the ep ptr given the tid and grab a reference on the ep.
 355 */
 356static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid)
 357{
 358        struct c4iw_ep *ep;
 359        unsigned long flags;
 360
 361        xa_lock_irqsave(&dev->hwtids, flags);
 362        ep = xa_load(&dev->hwtids, tid);
 363        if (ep)
 364                c4iw_get_ep(&ep->com);
 365        xa_unlock_irqrestore(&dev->hwtids, flags);
 366        return ep;
 367}
 368
 369/*
 370 * Atomically lookup the ep ptr given the stid and grab a reference on the ep.
 371 */
 372static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev,
 373                                               unsigned int stid)
 374{
 375        struct c4iw_listen_ep *ep;
 376        unsigned long flags;
 377
 378        xa_lock_irqsave(&dev->stids, flags);
 379        ep = xa_load(&dev->stids, stid);
 380        if (ep)
 381                c4iw_get_ep(&ep->com);
 382        xa_unlock_irqrestore(&dev->stids, flags);
 383        return ep;
 384}
 385
 386void _c4iw_free_ep(struct kref *kref)
 387{
 388        struct c4iw_ep *ep;
 389
 390        ep = container_of(kref, struct c4iw_ep, com.kref);
 391        pr_debug("ep %p state %s\n", ep, states[ep->com.state]);
 392        if (test_bit(QP_REFERENCED, &ep->com.flags))
 393                deref_qp(ep);
 394        if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
 395                if (ep->com.remote_addr.ss_family == AF_INET6) {
 396                        struct sockaddr_in6 *sin6 =
 397                                        (struct sockaddr_in6 *)
 398                                        &ep->com.local_addr;
 399
 400                        cxgb4_clip_release(
 401                                        ep->com.dev->rdev.lldi.ports[0],
 402                                        (const u32 *)&sin6->sin6_addr.s6_addr,
 403                                        1);
 404                }
 405                cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
 406                                 ep->com.local_addr.ss_family);
 407                dst_release(ep->dst);
 408                cxgb4_l2t_release(ep->l2t);
 409                kfree_skb(ep->mpa_skb);
 410        }
 411        if (!skb_queue_empty(&ep->com.ep_skb_list))
 412                skb_queue_purge(&ep->com.ep_skb_list);
 413        c4iw_put_wr_wait(ep->com.wr_waitp);
 414        kfree(ep);
 415}
 416
 417static void release_ep_resources(struct c4iw_ep *ep)
 418{
 419        set_bit(RELEASE_RESOURCES, &ep->com.flags);
 420
 421        /*
 422         * If we have a hwtid, then remove it from the idr table
 423         * so lookups will no longer find this endpoint.  Otherwise
 424         * we have a race where one thread finds the ep ptr just
 425         * before the other thread is freeing the ep memory.
 426         */
 427        if (ep->hwtid != -1)
 428                remove_ep_tid(ep);
 429        c4iw_put_ep(&ep->com);
 430}
 431
 432static int status2errno(int status)
 433{
 434        switch (status) {
 435        case CPL_ERR_NONE:
 436                return 0;
 437        case CPL_ERR_CONN_RESET:
 438                return -ECONNRESET;
 439        case CPL_ERR_ARP_MISS:
 440                return -EHOSTUNREACH;
 441        case CPL_ERR_CONN_TIMEDOUT:
 442                return -ETIMEDOUT;
 443        case CPL_ERR_TCAM_FULL:
 444                return -ENOMEM;
 445        case CPL_ERR_CONN_EXIST:
 446                return -EADDRINUSE;
 447        default:
 448                return -EIO;
 449        }
 450}
 451
 452/*
 453 * Try and reuse skbs already allocated...
 454 */
 455static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
 456{
 457        if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
 458                skb_trim(skb, 0);
 459                skb_get(skb);
 460                skb_reset_transport_header(skb);
 461        } else {
 462                skb = alloc_skb(len, gfp);
 463                if (!skb)
 464                        return NULL;
 465        }
 466        t4_set_arp_err_handler(skb, NULL, NULL);
 467        return skb;
 468}
 469
 470static struct net_device *get_real_dev(struct net_device *egress_dev)
 471{
 472        return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
 473}
 474
 475static void arp_failure_discard(void *handle, struct sk_buff *skb)
 476{
 477        pr_err("ARP failure\n");
 478        kfree_skb(skb);
 479}
 480
 481static void mpa_start_arp_failure(void *handle, struct sk_buff *skb)
 482{
 483        pr_err("ARP failure during MPA Negotiation - Closing Connection\n");
 484}
 485
 486enum {
 487        NUM_FAKE_CPLS = 2,
 488        FAKE_CPL_PUT_EP_SAFE = NUM_CPL_CMDS + 0,
 489        FAKE_CPL_PASS_PUT_EP_SAFE = NUM_CPL_CMDS + 1,
 490};
 491
 492static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
 493{
 494        struct c4iw_ep *ep;
 495
 496        ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
 497        release_ep_resources(ep);
 498        kfree_skb(skb);
 499        return 0;
 500}
 501
 502static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
 503{
 504        struct c4iw_ep *ep;
 505
 506        ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
 507        c4iw_put_ep(&ep->parent_ep->com);
 508        release_ep_resources(ep);
 509        kfree_skb(skb);
 510        return 0;
 511}
 512
 513/*
 514 * Fake up a special CPL opcode and call sched() so process_work() will call
 515 * _put_ep_safe() in a safe context to free the ep resources.  This is needed
 516 * because ARP error handlers are called in an ATOMIC context, and
 517 * _c4iw_free_ep() needs to block.
 518 */
 519static void queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb,
 520                                  int cpl)
 521{
 522        struct cpl_act_establish *rpl = cplhdr(skb);
 523
 524        /* Set our special ARP_FAILURE opcode */
 525        rpl->ot.opcode = cpl;
 526
 527        /*
 528         * Save ep in the skb->cb area, after where sched() will save the dev
 529         * ptr.
 530         */
 531        *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))) = ep;
 532        sched(ep->com.dev, skb);
 533}
 534
 535/* Handle an ARP failure for an accept */
 536static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb)
 537{
 538        struct c4iw_ep *ep = handle;
 539
 540        pr_err("ARP failure during accept - tid %u - dropping connection\n",
 541               ep->hwtid);
 542
 543        __state_set(&ep->com, DEAD);
 544        queue_arp_failure_cpl(ep, skb, FAKE_CPL_PASS_PUT_EP_SAFE);
 545}
 546
 547/*
 548 * Handle an ARP failure for an active open.
 549 */
 550static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
 551{
 552        struct c4iw_ep *ep = handle;
 553
 554        pr_err("ARP failure during connect\n");
 555        connect_reply_upcall(ep, -EHOSTUNREACH);
 556        __state_set(&ep->com, DEAD);
 557        if (ep->com.remote_addr.ss_family == AF_INET6) {
 558                struct sockaddr_in6 *sin6 =
 559                        (struct sockaddr_in6 *)&ep->com.local_addr;
 560                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 561                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 562        }
 563        xa_erase_irq(&ep->com.dev->atids, ep->atid);
 564        cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
 565        queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
 566}
 567
 568/*
 569 * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
 570 * and send it along.
 571 */
 572static void abort_arp_failure(void *handle, struct sk_buff *skb)
 573{
 574        int ret;
 575        struct c4iw_ep *ep = handle;
 576        struct c4iw_rdev *rdev = &ep->com.dev->rdev;
 577        struct cpl_abort_req *req = cplhdr(skb);
 578
 579        pr_debug("rdev %p\n", rdev);
 580        req->cmd = CPL_ABORT_NO_RST;
 581        skb_get(skb);
 582        ret = c4iw_ofld_send(rdev, skb);
 583        if (ret) {
 584                __state_set(&ep->com, DEAD);
 585                queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
 586        } else
 587                kfree_skb(skb);
 588}
 589
 590static int send_flowc(struct c4iw_ep *ep)
 591{
 592        struct fw_flowc_wr *flowc;
 593        struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
 594        u16 vlan = ep->l2t->vlan;
 595        int nparams;
 596        int flowclen, flowclen16;
 597
 598        if (WARN_ON(!skb))
 599                return -ENOMEM;
 600
 601        if (vlan == CPL_L2T_VLAN_NONE)
 602                nparams = 9;
 603        else
 604                nparams = 10;
 605
 606        flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
 607        flowclen16 = DIV_ROUND_UP(flowclen, 16);
 608        flowclen = flowclen16 * 16;
 609
 610        flowc = __skb_put(skb, flowclen);
 611        memset(flowc, 0, flowclen);
 612
 613        flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
 614                                           FW_FLOWC_WR_NPARAMS_V(nparams));
 615        flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
 616                                          FW_WR_FLOWID_V(ep->hwtid));
 617
 618        flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
 619        flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V
 620                                            (ep->com.dev->rdev.lldi.pf));
 621        flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
 622        flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan);
 623        flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
 624        flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan);
 625        flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
 626        flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid);
 627        flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT;
 628        flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq);
 629        flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
 630        flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq);
 631        flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
 632        flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
 633        flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
 634        flowc->mnemval[7].val = cpu_to_be32(ep->emss);
 635        flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_RCV_SCALE;
 636        flowc->mnemval[8].val = cpu_to_be32(ep->snd_wscale);
 637        if (nparams == 10) {
 638                u16 pri;
 639                pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
 640                flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
 641                flowc->mnemval[9].val = cpu_to_be32(pri);
 642        }
 643
 644        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
 645        return c4iw_ofld_send(&ep->com.dev->rdev, skb);
 646}
 647
 648static int send_halfclose(struct c4iw_ep *ep)
 649{
 650        struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
 651        u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
 652
 653        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
 654        if (WARN_ON(!skb))
 655                return -ENOMEM;
 656
 657        cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx,
 658                              NULL, arp_failure_discard);
 659
 660        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 661}
 662
 663static void read_tcb(struct c4iw_ep *ep)
 664{
 665        struct sk_buff *skb;
 666        struct cpl_get_tcb *req;
 667        int wrlen = roundup(sizeof(*req), 16);
 668
 669        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 670        if (WARN_ON(!skb))
 671                return;
 672
 673        set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
 674        req = (struct cpl_get_tcb *) skb_put(skb, wrlen);
 675        memset(req, 0, wrlen);
 676        INIT_TP_WR(req, ep->hwtid);
 677        OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_GET_TCB, ep->hwtid));
 678        req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(ep->rss_qid));
 679
 680        /*
 681         * keep a ref on the ep so the tcb is not unlocked before this
 682         * cpl completes. The ref is released in read_tcb_rpl().
 683         */
 684        c4iw_get_ep(&ep->com);
 685        if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb)))
 686                c4iw_put_ep(&ep->com);
 687}
 688
 689static int send_abort_req(struct c4iw_ep *ep)
 690{
 691        u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
 692        struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
 693
 694        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
 695        if (WARN_ON(!req_skb))
 696                return -ENOMEM;
 697
 698        cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx,
 699                          ep, abort_arp_failure);
 700
 701        return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
 702}
 703
 704static int send_abort(struct c4iw_ep *ep)
 705{
 706        if (!ep->com.qp || !ep->com.qp->srq) {
 707                send_abort_req(ep);
 708                return 0;
 709        }
 710        set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags);
 711        read_tcb(ep);
 712        return 0;
 713}
 714
 715static int send_connect(struct c4iw_ep *ep)
 716{
 717        struct cpl_act_open_req *req = NULL;
 718        struct cpl_t5_act_open_req *t5req = NULL;
 719        struct cpl_t6_act_open_req *t6req = NULL;
 720        struct cpl_act_open_req6 *req6 = NULL;
 721        struct cpl_t5_act_open_req6 *t5req6 = NULL;
 722        struct cpl_t6_act_open_req6 *t6req6 = NULL;
 723        struct sk_buff *skb;
 724        u64 opt0;
 725        u32 opt2;
 726        unsigned int mtu_idx;
 727        u32 wscale;
 728        int win, sizev4, sizev6, wrlen;
 729        struct sockaddr_in *la = (struct sockaddr_in *)
 730                                 &ep->com.local_addr;
 731        struct sockaddr_in *ra = (struct sockaddr_in *)
 732                                 &ep->com.remote_addr;
 733        struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)
 734                                   &ep->com.local_addr;
 735        struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
 736                                   &ep->com.remote_addr;
 737        int ret;
 738        enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
 739        u32 isn = (prandom_u32() & ~7UL) - 1;
 740        struct net_device *netdev;
 741        u64 params;
 742
 743        netdev = ep->com.dev->rdev.lldi.ports[0];
 744
 745        switch (CHELSIO_CHIP_VERSION(adapter_type)) {
 746        case CHELSIO_T4:
 747                sizev4 = sizeof(struct cpl_act_open_req);
 748                sizev6 = sizeof(struct cpl_act_open_req6);
 749                break;
 750        case CHELSIO_T5:
 751                sizev4 = sizeof(struct cpl_t5_act_open_req);
 752                sizev6 = sizeof(struct cpl_t5_act_open_req6);
 753                break;
 754        case CHELSIO_T6:
 755                sizev4 = sizeof(struct cpl_t6_act_open_req);
 756                sizev6 = sizeof(struct cpl_t6_act_open_req6);
 757                break;
 758        default:
 759                pr_err("T%d Chip is not supported\n",
 760                       CHELSIO_CHIP_VERSION(adapter_type));
 761                return -EINVAL;
 762        }
 763
 764        wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
 765                        roundup(sizev4, 16) :
 766                        roundup(sizev6, 16);
 767
 768        pr_debug("ep %p atid %u\n", ep, ep->atid);
 769
 770        skb = get_skb(NULL, wrlen, GFP_KERNEL);
 771        if (!skb) {
 772                pr_err("%s - failed to alloc skb\n", __func__);
 773                return -ENOMEM;
 774        }
 775        set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
 776
 777        cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
 778                      enable_tcp_timestamps,
 779                      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
 780        wscale = cxgb_compute_wscale(rcv_win);
 781
 782        /*
 783         * Specify the largest window that will fit in opt0. The
 784         * remainder will be specified in the rx_data_ack.
 785         */
 786        win = ep->rcv_win >> 10;
 787        if (win > RCV_BUFSIZ_M)
 788                win = RCV_BUFSIZ_M;
 789
 790        opt0 = (nocong ? NO_CONG_F : 0) |
 791               KEEP_ALIVE_F |
 792               DELACK_F |
 793               WND_SCALE_V(wscale) |
 794               MSS_IDX_V(mtu_idx) |
 795               L2T_IDX_V(ep->l2t->idx) |
 796               TX_CHAN_V(ep->tx_chan) |
 797               SMAC_SEL_V(ep->smac_idx) |
 798               DSCP_V(ep->tos >> 2) |
 799               ULP_MODE_V(ULP_MODE_TCPDDP) |
 800               RCV_BUFSIZ_V(win);
 801        opt2 = RX_CHANNEL_V(0) |
 802               CCTRL_ECN_V(enable_ecn) |
 803               RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
 804        if (enable_tcp_timestamps)
 805                opt2 |= TSTAMPS_EN_F;
 806        if (enable_tcp_sack)
 807                opt2 |= SACK_EN_F;
 808        if (wscale && enable_tcp_window_scaling)
 809                opt2 |= WND_SCALE_EN_F;
 810        if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
 811                if (peer2peer)
 812                        isn += 4;
 813
 814                opt2 |= T5_OPT_2_VALID_F;
 815                opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
 816                opt2 |= T5_ISS_F;
 817        }
 818
 819        params = cxgb4_select_ntuple(netdev, ep->l2t);
 820
 821        if (ep->com.remote_addr.ss_family == AF_INET6)
 822                cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
 823                               (const u32 *)&la6->sin6_addr.s6_addr, 1);
 824
 825        t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
 826
 827        if (ep->com.remote_addr.ss_family == AF_INET) {
 828                switch (CHELSIO_CHIP_VERSION(adapter_type)) {
 829                case CHELSIO_T4:
 830                        req = skb_put(skb, wrlen);
 831                        INIT_TP_WR(req, 0);
 832                        break;
 833                case CHELSIO_T5:
 834                        t5req = skb_put(skb, wrlen);
 835                        INIT_TP_WR(t5req, 0);
 836                        req = (struct cpl_act_open_req *)t5req;
 837                        break;
 838                case CHELSIO_T6:
 839                        t6req = skb_put(skb, wrlen);
 840                        INIT_TP_WR(t6req, 0);
 841                        req = (struct cpl_act_open_req *)t6req;
 842                        t5req = (struct cpl_t5_act_open_req *)t6req;
 843                        break;
 844                default:
 845                        pr_err("T%d Chip is not supported\n",
 846                               CHELSIO_CHIP_VERSION(adapter_type));
 847                        ret = -EINVAL;
 848                        goto clip_release;
 849                }
 850
 851                OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
 852                                        ((ep->rss_qid<<14) | ep->atid)));
 853                req->local_port = la->sin_port;
 854                req->peer_port = ra->sin_port;
 855                req->local_ip = la->sin_addr.s_addr;
 856                req->peer_ip = ra->sin_addr.s_addr;
 857                req->opt0 = cpu_to_be64(opt0);
 858
 859                if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
 860                        req->params = cpu_to_be32(params);
 861                        req->opt2 = cpu_to_be32(opt2);
 862                } else {
 863                        if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
 864                                t5req->params =
 865                                          cpu_to_be64(FILTER_TUPLE_V(params));
 866                                t5req->rsvd = cpu_to_be32(isn);
 867                                pr_debug("snd_isn %u\n", t5req->rsvd);
 868                                t5req->opt2 = cpu_to_be32(opt2);
 869                        } else {
 870                                t6req->params =
 871                                          cpu_to_be64(FILTER_TUPLE_V(params));
 872                                t6req->rsvd = cpu_to_be32(isn);
 873                                pr_debug("snd_isn %u\n", t6req->rsvd);
 874                                t6req->opt2 = cpu_to_be32(opt2);
 875                        }
 876                }
 877        } else {
 878                switch (CHELSIO_CHIP_VERSION(adapter_type)) {
 879                case CHELSIO_T4:
 880                        req6 = skb_put(skb, wrlen);
 881                        INIT_TP_WR(req6, 0);
 882                        break;
 883                case CHELSIO_T5:
 884                        t5req6 = skb_put(skb, wrlen);
 885                        INIT_TP_WR(t5req6, 0);
 886                        req6 = (struct cpl_act_open_req6 *)t5req6;
 887                        break;
 888                case CHELSIO_T6:
 889                        t6req6 = skb_put(skb, wrlen);
 890                        INIT_TP_WR(t6req6, 0);
 891                        req6 = (struct cpl_act_open_req6 *)t6req6;
 892                        t5req6 = (struct cpl_t5_act_open_req6 *)t6req6;
 893                        break;
 894                default:
 895                        pr_err("T%d Chip is not supported\n",
 896                               CHELSIO_CHIP_VERSION(adapter_type));
 897                        ret = -EINVAL;
 898                        goto clip_release;
 899                }
 900
 901                OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
 902                                        ((ep->rss_qid<<14)|ep->atid)));
 903                req6->local_port = la6->sin6_port;
 904                req6->peer_port = ra6->sin6_port;
 905                req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr));
 906                req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8));
 907                req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr));
 908                req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8));
 909                req6->opt0 = cpu_to_be64(opt0);
 910
 911                if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
 912                        req6->params = cpu_to_be32(cxgb4_select_ntuple(netdev,
 913                                                                      ep->l2t));
 914                        req6->opt2 = cpu_to_be32(opt2);
 915                } else {
 916                        if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
 917                                t5req6->params =
 918                                            cpu_to_be64(FILTER_TUPLE_V(params));
 919                                t5req6->rsvd = cpu_to_be32(isn);
 920                                pr_debug("snd_isn %u\n", t5req6->rsvd);
 921                                t5req6->opt2 = cpu_to_be32(opt2);
 922                        } else {
 923                                t6req6->params =
 924                                            cpu_to_be64(FILTER_TUPLE_V(params));
 925                                t6req6->rsvd = cpu_to_be32(isn);
 926                                pr_debug("snd_isn %u\n", t6req6->rsvd);
 927                                t6req6->opt2 = cpu_to_be32(opt2);
 928                        }
 929
 930                }
 931        }
 932
 933        set_bit(ACT_OPEN_REQ, &ep->com.history);
 934        ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 935clip_release:
 936        if (ret && ep->com.remote_addr.ss_family == AF_INET6)
 937                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 938                                   (const u32 *)&la6->sin6_addr.s6_addr, 1);
 939        return ret;
 940}
 941
 942static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
 943                        u8 mpa_rev_to_use)
 944{
 945        int mpalen, wrlen, ret;
 946        struct fw_ofld_tx_data_wr *req;
 947        struct mpa_message *mpa;
 948        struct mpa_v2_conn_params mpa_v2_params;
 949
 950        pr_debug("ep %p tid %u pd_len %d\n",
 951                 ep, ep->hwtid, ep->plen);
 952
 953        mpalen = sizeof(*mpa) + ep->plen;
 954        if (mpa_rev_to_use == 2)
 955                mpalen += sizeof(struct mpa_v2_conn_params);
 956        wrlen = roundup(mpalen + sizeof(*req), 16);
 957        skb = get_skb(skb, wrlen, GFP_KERNEL);
 958        if (!skb) {
 959                connect_reply_upcall(ep, -ENOMEM);
 960                return -ENOMEM;
 961        }
 962        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
 963
 964        req = skb_put_zero(skb, wrlen);
 965        req->op_to_immdlen = cpu_to_be32(
 966                FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
 967                FW_WR_COMPL_F |
 968                FW_WR_IMMDLEN_V(mpalen));
 969        req->flowid_len16 = cpu_to_be32(
 970                FW_WR_FLOWID_V(ep->hwtid) |
 971                FW_WR_LEN16_V(wrlen >> 4));
 972        req->plen = cpu_to_be32(mpalen);
 973        req->tunnel_to_proxy = cpu_to_be32(
 974                FW_OFLD_TX_DATA_WR_FLUSH_F |
 975                FW_OFLD_TX_DATA_WR_SHOVE_F);
 976
 977        mpa = (struct mpa_message *)(req + 1);
 978        memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
 979
 980        mpa->flags = 0;
 981        if (crc_enabled)
 982                mpa->flags |= MPA_CRC;
 983        if (markers_enabled) {
 984                mpa->flags |= MPA_MARKERS;
 985                ep->mpa_attr.recv_marker_enabled = 1;
 986        } else {
 987                ep->mpa_attr.recv_marker_enabled = 0;
 988        }
 989        if (mpa_rev_to_use == 2)
 990                mpa->flags |= MPA_ENHANCED_RDMA_CONN;
 991
 992        mpa->private_data_size = htons(ep->plen);
 993        mpa->revision = mpa_rev_to_use;
 994        if (mpa_rev_to_use == 1) {
 995                ep->tried_with_mpa_v1 = 1;
 996                ep->retry_with_mpa_v1 = 0;
 997        }
 998
 999        if (mpa_rev_to_use == 2) {
1000                mpa->private_data_size =
1001                        htons(ntohs(mpa->private_data_size) +
1002                              sizeof(struct mpa_v2_conn_params));
1003                pr_debug("initiator ird %u ord %u\n", ep->ird,
1004                         ep->ord);
1005                mpa_v2_params.ird = htons((u16)ep->ird);
1006                mpa_v2_params.ord = htons((u16)ep->ord);
1007
1008                if (peer2peer) {
1009                        mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1010                        if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
1011                                mpa_v2_params.ord |=
1012                                        htons(MPA_V2_RDMA_WRITE_RTR);
1013                        else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
1014                                mpa_v2_params.ord |=
1015                                        htons(MPA_V2_RDMA_READ_RTR);
1016                }
1017                memcpy(mpa->private_data, &mpa_v2_params,
1018                       sizeof(struct mpa_v2_conn_params));
1019
1020                if (ep->plen)
1021                        memcpy(mpa->private_data +
1022                               sizeof(struct mpa_v2_conn_params),
1023                               ep->mpa_pkt + sizeof(*mpa), ep->plen);
1024        } else
1025                if (ep->plen)
1026                        memcpy(mpa->private_data,
1027                                        ep->mpa_pkt + sizeof(*mpa), ep->plen);
1028
1029        /*
1030         * Reference the mpa skb.  This ensures the data area
1031         * will remain in memory until the hw acks the tx.
1032         * Function fw4_ack() will deref it.
1033         */
1034        skb_get(skb);
1035        t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
1036        ep->mpa_skb = skb;
1037        ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1038        if (ret)
1039                return ret;
1040        start_ep_timer(ep);
1041        __state_set(&ep->com, MPA_REQ_SENT);
1042        ep->mpa_attr.initiator = 1;
1043        ep->snd_seq += mpalen;
1044        return ret;
1045}
1046
1047static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
1048{
1049        int mpalen, wrlen;
1050        struct fw_ofld_tx_data_wr *req;
1051        struct mpa_message *mpa;
1052        struct sk_buff *skb;
1053        struct mpa_v2_conn_params mpa_v2_params;
1054
1055        pr_debug("ep %p tid %u pd_len %d\n",
1056                 ep, ep->hwtid, ep->plen);
1057
1058        mpalen = sizeof(*mpa) + plen;
1059        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
1060                mpalen += sizeof(struct mpa_v2_conn_params);
1061        wrlen = roundup(mpalen + sizeof(*req), 16);
1062
1063        skb = get_skb(NULL, wrlen, GFP_KERNEL);
1064        if (!skb) {
1065                pr_err("%s - cannot alloc skb!\n", __func__);
1066                return -ENOMEM;
1067        }
1068        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1069
1070        req = skb_put_zero(skb, wrlen);
1071        req->op_to_immdlen = cpu_to_be32(
1072                FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
1073                FW_WR_COMPL_F |
1074                FW_WR_IMMDLEN_V(mpalen));
1075        req->flowid_len16 = cpu_to_be32(
1076                FW_WR_FLOWID_V(ep->hwtid) |
1077                FW_WR_LEN16_V(wrlen >> 4));
1078        req->plen = cpu_to_be32(mpalen);
1079        req->tunnel_to_proxy = cpu_to_be32(
1080                FW_OFLD_TX_DATA_WR_FLUSH_F |
1081                FW_OFLD_TX_DATA_WR_SHOVE_F);
1082
1083        mpa = (struct mpa_message *)(req + 1);
1084        memset(mpa, 0, sizeof(*mpa));
1085        memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1086        mpa->flags = MPA_REJECT;
1087        mpa->revision = ep->mpa_attr.version;
1088        mpa->private_data_size = htons(plen);
1089
1090        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1091                mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1092                mpa->private_data_size =
1093                        htons(ntohs(mpa->private_data_size) +
1094                              sizeof(struct mpa_v2_conn_params));
1095                mpa_v2_params.ird = htons(((u16)ep->ird) |
1096                                          (peer2peer ? MPA_V2_PEER2PEER_MODEL :
1097                                           0));
1098                mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
1099                                          (p2p_type ==
1100                                           FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
1101                                           MPA_V2_RDMA_WRITE_RTR : p2p_type ==
1102                                           FW_RI_INIT_P2PTYPE_READ_REQ ?
1103                                           MPA_V2_RDMA_READ_RTR : 0) : 0));
1104                memcpy(mpa->private_data, &mpa_v2_params,
1105                       sizeof(struct mpa_v2_conn_params));
1106
1107                if (ep->plen)
1108                        memcpy(mpa->private_data +
1109                               sizeof(struct mpa_v2_conn_params), pdata, plen);
1110        } else
1111                if (plen)
1112                        memcpy(mpa->private_data, pdata, plen);
1113
1114        /*
1115         * Reference the mpa skb again.  This ensures the data area
1116         * will remain in memory until the hw acks the tx.
1117         * Function fw4_ack() will deref it.
1118         */
1119        skb_get(skb);
1120        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1121        t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
1122        ep->mpa_skb = skb;
1123        ep->snd_seq += mpalen;
1124        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1125}
1126
1127static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
1128{
1129        int mpalen, wrlen;
1130        struct fw_ofld_tx_data_wr *req;
1131        struct mpa_message *mpa;
1132        struct sk_buff *skb;
1133        struct mpa_v2_conn_params mpa_v2_params;
1134
1135        pr_debug("ep %p tid %u pd_len %d\n",
1136                 ep, ep->hwtid, ep->plen);
1137
1138        mpalen = sizeof(*mpa) + plen;
1139        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
1140                mpalen += sizeof(struct mpa_v2_conn_params);
1141        wrlen = roundup(mpalen + sizeof(*req), 16);
1142
1143        skb = get_skb(NULL, wrlen, GFP_KERNEL);
1144        if (!skb) {
1145                pr_err("%s - cannot alloc skb!\n", __func__);
1146                return -ENOMEM;
1147        }
1148        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1149
1150        req = skb_put_zero(skb, wrlen);
1151        req->op_to_immdlen = cpu_to_be32(
1152                FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
1153                FW_WR_COMPL_F |
1154                FW_WR_IMMDLEN_V(mpalen));
1155        req->flowid_len16 = cpu_to_be32(
1156                FW_WR_FLOWID_V(ep->hwtid) |
1157                FW_WR_LEN16_V(wrlen >> 4));
1158        req->plen = cpu_to_be32(mpalen);
1159        req->tunnel_to_proxy = cpu_to_be32(
1160                FW_OFLD_TX_DATA_WR_FLUSH_F |
1161                FW_OFLD_TX_DATA_WR_SHOVE_F);
1162
1163        mpa = (struct mpa_message *)(req + 1);
1164        memset(mpa, 0, sizeof(*mpa));
1165        memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1166        mpa->flags = 0;
1167        if (ep->mpa_attr.crc_enabled)
1168                mpa->flags |= MPA_CRC;
1169        if (ep->mpa_attr.recv_marker_enabled)
1170                mpa->flags |= MPA_MARKERS;
1171        mpa->revision = ep->mpa_attr.version;
1172        mpa->private_data_size = htons(plen);
1173
1174        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1175                mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1176                mpa->private_data_size =
1177                        htons(ntohs(mpa->private_data_size) +
1178                              sizeof(struct mpa_v2_conn_params));
1179                mpa_v2_params.ird = htons((u16)ep->ird);
1180                mpa_v2_params.ord = htons((u16)ep->ord);
1181                if (peer2peer && (ep->mpa_attr.p2p_type !=
1182                                        FW_RI_INIT_P2PTYPE_DISABLED)) {
1183                        mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1184
1185                        if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
1186                                mpa_v2_params.ord |=
1187                                        htons(MPA_V2_RDMA_WRITE_RTR);
1188                        else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
1189                                mpa_v2_params.ord |=
1190                                        htons(MPA_V2_RDMA_READ_RTR);
1191                }
1192
1193                memcpy(mpa->private_data, &mpa_v2_params,
1194                       sizeof(struct mpa_v2_conn_params));
1195
1196                if (ep->plen)
1197                        memcpy(mpa->private_data +
1198                               sizeof(struct mpa_v2_conn_params), pdata, plen);
1199        } else
1200                if (plen)
1201                        memcpy(mpa->private_data, pdata, plen);
1202
1203        /*
1204         * Reference the mpa skb.  This ensures the data area
1205         * will remain in memory until the hw acks the tx.
1206         * Function fw4_ack() will deref it.
1207         */
1208        skb_get(skb);
1209        t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
1210        ep->mpa_skb = skb;
1211        __state_set(&ep->com, MPA_REP_SENT);
1212        ep->snd_seq += mpalen;
1213        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1214}
1215
1216static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1217{
1218        struct c4iw_ep *ep;
1219        struct cpl_act_establish *req = cplhdr(skb);
1220        unsigned short tcp_opt = ntohs(req->tcp_opt);
1221        unsigned int tid = GET_TID(req);
1222        unsigned int atid = TID_TID_G(ntohl(req->tos_atid));
1223        struct tid_info *t = dev->rdev.lldi.tids;
1224        int ret;
1225
1226        ep = lookup_atid(t, atid);
1227
1228        pr_debug("ep %p tid %u snd_isn %u rcv_isn %u\n", ep, tid,
1229                 be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
1230
1231        mutex_lock(&ep->com.mutex);
1232        dst_confirm(ep->dst);
1233
1234        /* setup the hwtid for this connection */
1235        ep->hwtid = tid;
1236        cxgb4_insert_tid(t, ep, tid, ep->com.local_addr.ss_family);
1237        insert_ep_tid(ep);
1238
1239        ep->snd_seq = be32_to_cpu(req->snd_isn);
1240        ep->rcv_seq = be32_to_cpu(req->rcv_isn);
1241        ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
1242
1243        set_emss(ep, tcp_opt);
1244
1245        /* dealloc the atid */
1246        xa_erase_irq(&ep->com.dev->atids, atid);
1247        cxgb4_free_atid(t, atid);
1248        set_bit(ACT_ESTAB, &ep->com.history);
1249
1250        /* start MPA negotiation */
1251        ret = send_flowc(ep);
1252        if (ret)
1253                goto err;
1254        if (ep->retry_with_mpa_v1)
1255                ret = send_mpa_req(ep, skb, 1);
1256        else
1257                ret = send_mpa_req(ep, skb, mpa_rev);
1258        if (ret)
1259                goto err;
1260        mutex_unlock(&ep->com.mutex);
1261        return 0;
1262err:
1263        mutex_unlock(&ep->com.mutex);
1264        connect_reply_upcall(ep, -ENOMEM);
1265        c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
1266        return 0;
1267}
1268
1269static void close_complete_upcall(struct c4iw_ep *ep, int status)
1270{
1271        struct iw_cm_event event;
1272
1273        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1274        memset(&event, 0, sizeof(event));
1275        event.event = IW_CM_EVENT_CLOSE;
1276        event.status = status;
1277        if (ep->com.cm_id) {
1278                pr_debug("close complete delivered ep %p cm_id %p tid %u\n",
1279                         ep, ep->com.cm_id, ep->hwtid);
1280                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1281                deref_cm_id(&ep->com);
1282                set_bit(CLOSE_UPCALL, &ep->com.history);
1283        }
1284}
1285
1286static void peer_close_upcall(struct c4iw_ep *ep)
1287{
1288        struct iw_cm_event event;
1289
1290        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1291        memset(&event, 0, sizeof(event));
1292        event.event = IW_CM_EVENT_DISCONNECT;
1293        if (ep->com.cm_id) {
1294                pr_debug("peer close delivered ep %p cm_id %p tid %u\n",
1295                         ep, ep->com.cm_id, ep->hwtid);
1296                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1297                set_bit(DISCONN_UPCALL, &ep->com.history);
1298        }
1299}
1300
1301static void peer_abort_upcall(struct c4iw_ep *ep)
1302{
1303        struct iw_cm_event event;
1304
1305        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1306        memset(&event, 0, sizeof(event));
1307        event.event = IW_CM_EVENT_CLOSE;
1308        event.status = -ECONNRESET;
1309        if (ep->com.cm_id) {
1310                pr_debug("abort delivered ep %p cm_id %p tid %u\n", ep,
1311                         ep->com.cm_id, ep->hwtid);
1312                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1313                deref_cm_id(&ep->com);
1314                set_bit(ABORT_UPCALL, &ep->com.history);
1315        }
1316}
1317
1318static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1319{
1320        struct iw_cm_event event;
1321
1322        pr_debug("ep %p tid %u status %d\n",
1323                 ep, ep->hwtid, status);
1324        memset(&event, 0, sizeof(event));
1325        event.event = IW_CM_EVENT_CONNECT_REPLY;
1326        event.status = status;
1327        memcpy(&event.local_addr, &ep->com.local_addr,
1328               sizeof(ep->com.local_addr));
1329        memcpy(&event.remote_addr, &ep->com.remote_addr,
1330               sizeof(ep->com.remote_addr));
1331
1332        if ((status == 0) || (status == -ECONNREFUSED)) {
1333                if (!ep->tried_with_mpa_v1) {
1334                        /* this means MPA_v2 is used */
1335                        event.ord = ep->ird;
1336                        event.ird = ep->ord;
1337                        event.private_data_len = ep->plen -
1338                                sizeof(struct mpa_v2_conn_params);
1339                        event.private_data = ep->mpa_pkt +
1340                                sizeof(struct mpa_message) +
1341                                sizeof(struct mpa_v2_conn_params);
1342                } else {
1343                        /* this means MPA_v1 is used */
1344                        event.ord = cur_max_read_depth(ep->com.dev);
1345                        event.ird = cur_max_read_depth(ep->com.dev);
1346                        event.private_data_len = ep->plen;
1347                        event.private_data = ep->mpa_pkt +
1348                                sizeof(struct mpa_message);
1349                }
1350        }
1351
1352        pr_debug("ep %p tid %u status %d\n", ep,
1353                 ep->hwtid, status);
1354        set_bit(CONN_RPL_UPCALL, &ep->com.history);
1355        ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1356
1357        if (status < 0)
1358                deref_cm_id(&ep->com);
1359}
1360
1361static int connect_request_upcall(struct c4iw_ep *ep)
1362{
1363        struct iw_cm_event event;
1364        int ret;
1365
1366        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1367        memset(&event, 0, sizeof(event));
1368        event.event = IW_CM_EVENT_CONNECT_REQUEST;
1369        memcpy(&event.local_addr, &ep->com.local_addr,
1370               sizeof(ep->com.local_addr));
1371        memcpy(&event.remote_addr, &ep->com.remote_addr,
1372               sizeof(ep->com.remote_addr));
1373        event.provider_data = ep;
1374        if (!ep->tried_with_mpa_v1) {
1375                /* this means MPA_v2 is used */
1376                event.ord = ep->ord;
1377                event.ird = ep->ird;
1378                event.private_data_len = ep->plen -
1379                        sizeof(struct mpa_v2_conn_params);
1380                event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
1381                        sizeof(struct mpa_v2_conn_params);
1382        } else {
1383                /* this means MPA_v1 is used. Send max supported */
1384                event.ord = cur_max_read_depth(ep->com.dev);
1385                event.ird = cur_max_read_depth(ep->com.dev);
1386                event.private_data_len = ep->plen;
1387                event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
1388        }
1389        c4iw_get_ep(&ep->com);
1390        ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
1391                                                      &event);
1392        if (ret)
1393                c4iw_put_ep(&ep->com);
1394        set_bit(CONNREQ_UPCALL, &ep->com.history);
1395        c4iw_put_ep(&ep->parent_ep->com);
1396        return ret;
1397}
1398
1399static void established_upcall(struct c4iw_ep *ep)
1400{
1401        struct iw_cm_event event;
1402
1403        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1404        memset(&event, 0, sizeof(event));
1405        event.event = IW_CM_EVENT_ESTABLISHED;
1406        event.ird = ep->ord;
1407        event.ord = ep->ird;
1408        if (ep->com.cm_id) {
1409                pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1410                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1411                set_bit(ESTAB_UPCALL, &ep->com.history);
1412        }
1413}
1414
1415static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
1416{
1417        struct sk_buff *skb;
1418        u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
1419        u32 credit_dack;
1420
1421        pr_debug("ep %p tid %u credits %u\n",
1422                 ep, ep->hwtid, credits);
1423        skb = get_skb(NULL, wrlen, GFP_KERNEL);
1424        if (!skb) {
1425                pr_err("update_rx_credits - cannot alloc skb!\n");
1426                return 0;
1427        }
1428
1429        /*
1430         * If we couldn't specify the entire rcv window at connection setup
1431         * due to the limit in the number of bits in the RCV_BUFSIZ field,
1432         * then add the overage in to the credits returned.
1433         */
1434        if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
1435                credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
1436
1437        credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F |
1438                      RX_DACK_MODE_V(dack_mode);
1439
1440        cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx,
1441                            credit_dack);
1442
1443        c4iw_ofld_send(&ep->com.dev->rdev, skb);
1444        return credits;
1445}
1446
1447#define RELAXED_IRD_NEGOTIATION 1
1448
1449/*
1450 * process_mpa_reply - process streaming mode MPA reply
1451 *
1452 * Returns:
1453 *
1454 * 0 upon success indicating a connect request was delivered to the ULP
1455 * or the mpa request is incomplete but valid so far.
1456 *
1457 * 1 if a failure requires the caller to close the connection.
1458 *
1459 * 2 if a failure requires the caller to abort the connection.
1460 */
1461static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1462{
1463        struct mpa_message *mpa;
1464        struct mpa_v2_conn_params *mpa_v2_params;
1465        u16 plen;
1466        u16 resp_ird, resp_ord;
1467        u8 rtr_mismatch = 0, insuff_ird = 0;
1468        struct c4iw_qp_attributes attrs;
1469        enum c4iw_qp_attr_mask mask;
1470        int err;
1471        int disconnect = 0;
1472
1473        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1474
1475        /*
1476         * If we get more than the supported amount of private data
1477         * then we must fail this connection.
1478         */
1479        if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
1480                err = -EINVAL;
1481                goto err_stop_timer;
1482        }
1483
1484        /*
1485         * copy the new data into our accumulation buffer.
1486         */
1487        skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1488                                  skb->len);
1489        ep->mpa_pkt_len += skb->len;
1490
1491        /*
1492         * if we don't even have the mpa message, then bail.
1493         */
1494        if (ep->mpa_pkt_len < sizeof(*mpa))
1495                return 0;
1496        mpa = (struct mpa_message *) ep->mpa_pkt;
1497
1498        /* Validate MPA header. */
1499        if (mpa->revision > mpa_rev) {
1500                pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
1501                       __func__, mpa_rev, mpa->revision);
1502                err = -EPROTO;
1503                goto err_stop_timer;
1504        }
1505        if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1506                err = -EPROTO;
1507                goto err_stop_timer;
1508        }
1509
1510        plen = ntohs(mpa->private_data_size);
1511
1512        /*
1513         * Fail if there's too much private data.
1514         */
1515        if (plen > MPA_MAX_PRIVATE_DATA) {
1516                err = -EPROTO;
1517                goto err_stop_timer;
1518        }
1519
1520        /*
1521         * If plen does not account for pkt size
1522         */
1523        if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1524                err = -EPROTO;
1525                goto err_stop_timer;
1526        }
1527
1528        ep->plen = (u8) plen;
1529
1530        /*
1531         * If we don't have all the pdata yet, then bail.
1532         * We'll continue process when more data arrives.
1533         */
1534        if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1535                return 0;
1536
1537        if (mpa->flags & MPA_REJECT) {
1538                err = -ECONNREFUSED;
1539                goto err_stop_timer;
1540        }
1541
1542        /*
1543         * Stop mpa timer.  If it expired, then
1544         * we ignore the MPA reply.  process_timeout()
1545         * will abort the connection.
1546         */
1547        if (stop_ep_timer(ep))
1548                return 0;
1549
1550        /*
1551         * If we get here we have accumulated the entire mpa
1552         * start reply message including private data. And
1553         * the MPA header is valid.
1554         */
1555        __state_set(&ep->com, FPDU_MODE);
1556        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1557        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1558        ep->mpa_attr.version = mpa->revision;
1559        ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1560
1561        if (mpa->revision == 2) {
1562                ep->mpa_attr.enhanced_rdma_conn =
1563                        mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1564                if (ep->mpa_attr.enhanced_rdma_conn) {
1565                        mpa_v2_params = (struct mpa_v2_conn_params *)
1566                                (ep->mpa_pkt + sizeof(*mpa));
1567                        resp_ird = ntohs(mpa_v2_params->ird) &
1568                                MPA_V2_IRD_ORD_MASK;
1569                        resp_ord = ntohs(mpa_v2_params->ord) &
1570                                MPA_V2_IRD_ORD_MASK;
1571                        pr_debug("responder ird %u ord %u ep ird %u ord %u\n",
1572                                 resp_ird, resp_ord, ep->ird, ep->ord);
1573
1574                        /*
1575                         * This is a double-check. Ideally, below checks are
1576                         * not required since ird/ord stuff has been taken
1577                         * care of in c4iw_accept_cr
1578                         */
1579                        if (ep->ird < resp_ord) {
1580                                if (RELAXED_IRD_NEGOTIATION && resp_ord <=
1581                                    ep->com.dev->rdev.lldi.max_ordird_qp)
1582                                        ep->ird = resp_ord;
1583                                else
1584                                        insuff_ird = 1;
1585                        } else if (ep->ird > resp_ord) {
1586                                ep->ird = resp_ord;
1587                        }
1588                        if (ep->ord > resp_ird) {
1589                                if (RELAXED_IRD_NEGOTIATION)
1590                                        ep->ord = resp_ird;
1591                                else
1592                                        insuff_ird = 1;
1593                        }
1594                        if (insuff_ird) {
1595                                err = -ENOMEM;
1596                                ep->ird = resp_ord;
1597                                ep->ord = resp_ird;
1598                        }
1599
1600                        if (ntohs(mpa_v2_params->ird) &
1601                                        MPA_V2_PEER2PEER_MODEL) {
1602                                if (ntohs(mpa_v2_params->ord) &
1603                                                MPA_V2_RDMA_WRITE_RTR)
1604                                        ep->mpa_attr.p2p_type =
1605                                                FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1606                                else if (ntohs(mpa_v2_params->ord) &
1607                                                MPA_V2_RDMA_READ_RTR)
1608                                        ep->mpa_attr.p2p_type =
1609                                                FW_RI_INIT_P2PTYPE_READ_REQ;
1610                        }
1611                }
1612        } else if (mpa->revision == 1)
1613                if (peer2peer)
1614                        ep->mpa_attr.p2p_type = p2p_type;
1615
1616        pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = %d\n",
1617                 ep->mpa_attr.crc_enabled,
1618                 ep->mpa_attr.recv_marker_enabled,
1619                 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1620                 ep->mpa_attr.p2p_type, p2p_type);
1621
1622        /*
1623         * If responder's RTR does not match with that of initiator, assign
1624         * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
1625         * generated when moving QP to RTS state.
1626         * A TERM message will be sent after QP has moved to RTS state
1627         */
1628        if ((ep->mpa_attr.version == 2) && peer2peer &&
1629                        (ep->mpa_attr.p2p_type != p2p_type)) {
1630                ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1631                rtr_mismatch = 1;
1632        }
1633
1634        attrs.mpa_attr = ep->mpa_attr;
1635        attrs.max_ird = ep->ird;
1636        attrs.max_ord = ep->ord;
1637        attrs.llp_stream_handle = ep;
1638        attrs.next_state = C4IW_QP_STATE_RTS;
1639
1640        mask = C4IW_QP_ATTR_NEXT_STATE |
1641            C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
1642            C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
1643
1644        /* bind QP and TID with INIT_WR */
1645        err = c4iw_modify_qp(ep->com.qp->rhp,
1646                             ep->com.qp, mask, &attrs, 1);
1647        if (err)
1648                goto err;
1649
1650        /*
1651         * If responder's RTR requirement did not match with what initiator
1652         * supports, generate TERM message
1653         */
1654        if (rtr_mismatch) {
1655                pr_err("%s: RTR mismatch, sending TERM\n", __func__);
1656                attrs.layer_etype = LAYER_MPA | DDP_LLP;
1657                attrs.ecode = MPA_NOMATCH_RTR;
1658                attrs.next_state = C4IW_QP_STATE_TERMINATE;
1659                attrs.send_term = 1;
1660                err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1661                                C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1662                err = -ENOMEM;
1663                disconnect = 1;
1664                goto out;
1665        }
1666
1667        /*
1668         * Generate TERM if initiator IRD is not sufficient for responder
1669         * provided ORD. Currently, we do the same behaviour even when
1670         * responder provided IRD is also not sufficient as regards to
1671         * initiator ORD.
1672         */
1673        if (insuff_ird) {
1674                pr_err("%s: Insufficient IRD, sending TERM\n", __func__);
1675                attrs.layer_etype = LAYER_MPA | DDP_LLP;
1676                attrs.ecode = MPA_INSUFF_IRD;
1677                attrs.next_state = C4IW_QP_STATE_TERMINATE;
1678                attrs.send_term = 1;
1679                err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1680                                C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1681                err = -ENOMEM;
1682                disconnect = 1;
1683                goto out;
1684        }
1685        goto out;
1686err_stop_timer:
1687        stop_ep_timer(ep);
1688err:
1689        disconnect = 2;
1690out:
1691        connect_reply_upcall(ep, err);
1692        return disconnect;
1693}
1694
1695/*
1696 * process_mpa_request - process streaming mode MPA request
1697 *
1698 * Returns:
1699 *
1700 * 0 upon success indicating a connect request was delivered to the ULP
1701 * or the mpa request is incomplete but valid so far.
1702 *
1703 * 1 if a failure requires the caller to close the connection.
1704 *
1705 * 2 if a failure requires the caller to abort the connection.
1706 */
1707static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1708{
1709        struct mpa_message *mpa;
1710        struct mpa_v2_conn_params *mpa_v2_params;
1711        u16 plen;
1712
1713        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1714
1715        /*
1716         * If we get more than the supported amount of private data
1717         * then we must fail this connection.
1718         */
1719        if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt))
1720                goto err_stop_timer;
1721
1722        pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
1723
1724        /*
1725         * Copy the new data into our accumulation buffer.
1726         */
1727        skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1728                                  skb->len);
1729        ep->mpa_pkt_len += skb->len;
1730
1731        /*
1732         * If we don't even have the mpa message, then bail.
1733         * We'll continue process when more data arrives.
1734         */
1735        if (ep->mpa_pkt_len < sizeof(*mpa))
1736                return 0;
1737
1738        pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
1739        mpa = (struct mpa_message *) ep->mpa_pkt;
1740
1741        /*
1742         * Validate MPA Header.
1743         */
1744        if (mpa->revision > mpa_rev) {
1745                pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
1746                       __func__, mpa_rev, mpa->revision);
1747                goto err_stop_timer;
1748        }
1749
1750        if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
1751                goto err_stop_timer;
1752
1753        plen = ntohs(mpa->private_data_size);
1754
1755        /*
1756         * Fail if there's too much private data.
1757         */
1758        if (plen > MPA_MAX_PRIVATE_DATA)
1759                goto err_stop_timer;
1760
1761        /*
1762         * If plen does not account for pkt size
1763         */
1764        if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
1765                goto err_stop_timer;
1766        ep->plen = (u8) plen;
1767
1768        /*
1769         * If we don't have all the pdata yet, then bail.
1770         */
1771        if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1772                return 0;
1773
1774        /*
1775         * If we get here we have accumulated the entire mpa
1776         * start reply message including private data.
1777         */
1778        ep->mpa_attr.initiator = 0;
1779        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1780        ep->mpa_attr.recv_marker_enabled = markers_enabled;
1781        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1782        ep->mpa_attr.version = mpa->revision;
1783        if (mpa->revision == 1)
1784                ep->tried_with_mpa_v1 = 1;
1785        ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1786
1787        if (mpa->revision == 2) {
1788                ep->mpa_attr.enhanced_rdma_conn =
1789                        mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1790                if (ep->mpa_attr.enhanced_rdma_conn) {
1791                        mpa_v2_params = (struct mpa_v2_conn_params *)
1792                                (ep->mpa_pkt + sizeof(*mpa));
1793                        ep->ird = ntohs(mpa_v2_params->ird) &
1794                                MPA_V2_IRD_ORD_MASK;
1795                        ep->ird = min_t(u32, ep->ird,
1796                                        cur_max_read_depth(ep->com.dev));
1797                        ep->ord = ntohs(mpa_v2_params->ord) &
1798                                MPA_V2_IRD_ORD_MASK;
1799                        ep->ord = min_t(u32, ep->ord,
1800                                        cur_max_read_depth(ep->com.dev));
1801                        pr_debug("initiator ird %u ord %u\n",
1802                                 ep->ird, ep->ord);
1803                        if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
1804                                if (peer2peer) {
1805                                        if (ntohs(mpa_v2_params->ord) &
1806                                                        MPA_V2_RDMA_WRITE_RTR)
1807                                                ep->mpa_attr.p2p_type =
1808                                                FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1809                                        else if (ntohs(mpa_v2_params->ord) &
1810                                                        MPA_V2_RDMA_READ_RTR)
1811                                                ep->mpa_attr.p2p_type =
1812                                                FW_RI_INIT_P2PTYPE_READ_REQ;
1813                                }
1814                }
1815        } else if (mpa->revision == 1)
1816                if (peer2peer)
1817                        ep->mpa_attr.p2p_type = p2p_type;
1818
1819        pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d\n",
1820                 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1821                 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1822                 ep->mpa_attr.p2p_type);
1823
1824        __state_set(&ep->com, MPA_REQ_RCVD);
1825
1826        /* drive upcall */
1827        mutex_lock_nested(&ep->parent_ep->com.mutex, SINGLE_DEPTH_NESTING);
1828        if (ep->parent_ep->com.state != DEAD) {
1829                if (connect_request_upcall(ep))
1830                        goto err_unlock_parent;
1831        } else {
1832                goto err_unlock_parent;
1833        }
1834        mutex_unlock(&ep->parent_ep->com.mutex);
1835        return 0;
1836
1837err_unlock_parent:
1838        mutex_unlock(&ep->parent_ep->com.mutex);
1839        goto err_out;
1840err_stop_timer:
1841        (void)stop_ep_timer(ep);
1842err_out:
1843        return 2;
1844}
1845
1846static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1847{
1848        struct c4iw_ep *ep;
1849        struct cpl_rx_data *hdr = cplhdr(skb);
1850        unsigned int dlen = ntohs(hdr->len);
1851        unsigned int tid = GET_TID(hdr);
1852        __u8 status = hdr->status;
1853        int disconnect = 0;
1854
1855        ep = get_ep_from_tid(dev, tid);
1856        if (!ep)
1857                return 0;
1858        pr_debug("ep %p tid %u dlen %u\n", ep, ep->hwtid, dlen);
1859        skb_pull(skb, sizeof(*hdr));
1860        skb_trim(skb, dlen);
1861        mutex_lock(&ep->com.mutex);
1862
1863        switch (ep->com.state) {
1864        case MPA_REQ_SENT:
1865                update_rx_credits(ep, dlen);
1866                ep->rcv_seq += dlen;
1867                disconnect = process_mpa_reply(ep, skb);
1868                break;
1869        case MPA_REQ_WAIT:
1870                update_rx_credits(ep, dlen);
1871                ep->rcv_seq += dlen;
1872                disconnect = process_mpa_request(ep, skb);
1873                break;
1874        case FPDU_MODE: {
1875                struct c4iw_qp_attributes attrs;
1876
1877                update_rx_credits(ep, dlen);
1878                if (status)
1879                        pr_err("%s Unexpected streaming data." \
1880                               " qpid %u ep %p state %d tid %u status %d\n",
1881                               __func__, ep->com.qp->wq.sq.qid, ep,
1882                               ep->com.state, ep->hwtid, status);
1883                attrs.next_state = C4IW_QP_STATE_TERMINATE;
1884                c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1885                               C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1886                disconnect = 1;
1887                break;
1888        }
1889        default:
1890                break;
1891        }
1892        mutex_unlock(&ep->com.mutex);
1893        if (disconnect)
1894                c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
1895        c4iw_put_ep(&ep->com);
1896        return 0;
1897}
1898
1899static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx)
1900{
1901        enum chip_type adapter_type;
1902
1903        adapter_type = ep->com.dev->rdev.lldi.adapter_type;
1904
1905        /*
1906         * If this TCB had a srq buffer cached, then we must complete
1907         * it. For user mode, that means saving the srqidx in the
1908         * user/kernel status page for this qp.  For kernel mode, just
1909         * synthesize the CQE now.
1910         */
1911        if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) {
1912                if (ep->com.qp->ibqp.uobject)
1913                        t4_set_wq_in_error(&ep->com.qp->wq, srqidx);
1914                else
1915                        c4iw_flush_srqidx(ep->com.qp, srqidx);
1916        }
1917}
1918
1919static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1920{
1921        u32 srqidx;
1922        struct c4iw_ep *ep;
1923        struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
1924        int release = 0;
1925        unsigned int tid = GET_TID(rpl);
1926
1927        ep = get_ep_from_tid(dev, tid);
1928        if (!ep) {
1929                pr_warn("Abort rpl to freed endpoint\n");
1930                return 0;
1931        }
1932
1933        if (ep->com.qp && ep->com.qp->srq) {
1934                srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(rpl->srqidx_status));
1935                complete_cached_srq_buffers(ep, srqidx ? srqidx : ep->srqe_idx);
1936        }
1937
1938        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1939        mutex_lock(&ep->com.mutex);
1940        switch (ep->com.state) {
1941        case ABORTING:
1942                c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
1943                __state_set(&ep->com, DEAD);
1944                release = 1;
1945                break;
1946        default:
1947                pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
1948                break;
1949        }
1950        mutex_unlock(&ep->com.mutex);
1951
1952        if (release) {
1953                close_complete_upcall(ep, -ECONNRESET);
1954                release_ep_resources(ep);
1955        }
1956        c4iw_put_ep(&ep->com);
1957        return 0;
1958}
1959
1960static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
1961{
1962        struct sk_buff *skb;
1963        struct fw_ofld_connection_wr *req;
1964        unsigned int mtu_idx;
1965        u32 wscale;
1966        struct sockaddr_in *sin;
1967        int win;
1968
1969        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1970        req = __skb_put_zero(skb, sizeof(*req));
1971        req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR));
1972        req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
1973        req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
1974                                     ep->com.dev->rdev.lldi.ports[0],
1975                                     ep->l2t));
1976        sin = (struct sockaddr_in *)&ep->com.local_addr;
1977        req->le.lport = sin->sin_port;
1978        req->le.u.ipv4.lip = sin->sin_addr.s_addr;
1979        sin = (struct sockaddr_in *)&ep->com.remote_addr;
1980        req->le.pport = sin->sin_port;
1981        req->le.u.ipv4.pip = sin->sin_addr.s_addr;
1982        req->tcb.t_state_to_astid =
1983                        htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_SENT) |
1984                        FW_OFLD_CONNECTION_WR_ASTID_V(atid));
1985        req->tcb.cplrxdataack_cplpassacceptrpl =
1986                        htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F);
1987        req->tcb.tx_max = (__force __be32) jiffies;
1988        req->tcb.rcv_adv = htons(1);
1989        cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
1990                      enable_tcp_timestamps,
1991                      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
1992        wscale = cxgb_compute_wscale(rcv_win);
1993
1994        /*
1995         * Specify the largest window that will fit in opt0. The
1996         * remainder will be specified in the rx_data_ack.
1997         */
1998        win = ep->rcv_win >> 10;
1999        if (win > RCV_BUFSIZ_M)
2000                win = RCV_BUFSIZ_M;
2001
2002        req->tcb.opt0 = (__force __be64) (TCAM_BYPASS_F |
2003                (nocong ? NO_CONG_F : 0) |
2004                KEEP_ALIVE_F |
2005                DELACK_F |
2006                WND_SCALE_V(wscale) |
2007                MSS_IDX_V(mtu_idx) |
2008                L2T_IDX_V(ep->l2t->idx) |
2009                TX_CHAN_V(ep->tx_chan) |
2010                SMAC_SEL_V(ep->smac_idx) |
2011                DSCP_V(ep->tos >> 2) |
2012                ULP_MODE_V(ULP_MODE_TCPDDP) |
2013                RCV_BUFSIZ_V(win));
2014        req->tcb.opt2 = (__force __be32) (PACE_V(1) |
2015                TX_QUEUE_V(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) |
2016                RX_CHANNEL_V(0) |
2017                CCTRL_ECN_V(enable_ecn) |
2018                RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid));
2019        if (enable_tcp_timestamps)
2020                req->tcb.opt2 |= (__force __be32)TSTAMPS_EN_F;
2021        if (enable_tcp_sack)
2022                req->tcb.opt2 |= (__force __be32)SACK_EN_F;
2023        if (wscale && enable_tcp_window_scaling)
2024                req->tcb.opt2 |= (__force __be32)WND_SCALE_EN_F;
2025        req->tcb.opt0 = cpu_to_be64((__force u64)req->tcb.opt0);
2026        req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2);
2027        set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
2028        set_bit(ACT_OFLD_CONN, &ep->com.history);
2029        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
2030}
2031
2032/*
2033 * Some of the error codes above implicitly indicate that there is no TID
2034 * allocated with the result of an ACT_OPEN.  We use this predicate to make
2035 * that explicit.
2036 */
2037static inline int act_open_has_tid(int status)
2038{
2039        return (status != CPL_ERR_TCAM_PARITY &&
2040                status != CPL_ERR_TCAM_MISS &&
2041                status != CPL_ERR_TCAM_FULL &&
2042                status != CPL_ERR_CONN_EXIST_SYNRECV &&
2043                status != CPL_ERR_CONN_EXIST);
2044}
2045
2046static char *neg_adv_str(unsigned int status)
2047{
2048        switch (status) {
2049        case CPL_ERR_RTX_NEG_ADVICE:
2050                return "Retransmit timeout";
2051        case CPL_ERR_PERSIST_NEG_ADVICE:
2052                return "Persist timeout";
2053        case CPL_ERR_KEEPALV_NEG_ADVICE:
2054                return "Keepalive timeout";
2055        default:
2056                return "Unknown";
2057        }
2058}
2059
2060static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
2061{
2062        ep->snd_win = snd_win;
2063        ep->rcv_win = rcv_win;
2064        pr_debug("snd_win %d rcv_win %d\n",
2065                 ep->snd_win, ep->rcv_win);
2066}
2067
2068#define ACT_OPEN_RETRY_COUNT 2
2069
2070static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
2071                     struct dst_entry *dst, struct c4iw_dev *cdev,
2072                     bool clear_mpa_v1, enum chip_type adapter_type, u8 tos)
2073{
2074        struct neighbour *n;
2075        int err, step;
2076        struct net_device *pdev;
2077
2078        n = dst_neigh_lookup(dst, peer_ip);
2079        if (!n)
2080                return -ENODEV;
2081
2082        rcu_read_lock();
2083        err = -ENOMEM;
2084        if (n->dev->flags & IFF_LOOPBACK) {
2085                if (iptype == 4)
2086                        pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip);
2087                else if (IS_ENABLED(CONFIG_IPV6))
2088                        for_each_netdev(&init_net, pdev) {
2089                                if (ipv6_chk_addr(&init_net,
2090                                                  (struct in6_addr *)peer_ip,
2091                                                  pdev, 1))
2092                                        break;
2093                        }
2094                else
2095                        pdev = NULL;
2096
2097                if (!pdev) {
2098                        err = -ENODEV;
2099                        goto out;
2100                }
2101                ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
2102                                        n, pdev, rt_tos2priority(tos));
2103                if (!ep->l2t) {
2104                        dev_put(pdev);
2105                        goto out;
2106                }
2107                ep->mtu = pdev->mtu;
2108                ep->tx_chan = cxgb4_port_chan(pdev);
2109                ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
2110                step = cdev->rdev.lldi.ntxq /
2111                        cdev->rdev.lldi.nchan;
2112                ep->txq_idx = cxgb4_port_idx(pdev) * step;
2113                step = cdev->rdev.lldi.nrxq /
2114                        cdev->rdev.lldi.nchan;
2115                ep->ctrlq_idx = cxgb4_port_idx(pdev);
2116                ep->rss_qid = cdev->rdev.lldi.rxq_ids[
2117                        cxgb4_port_idx(pdev) * step];
2118                set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
2119                dev_put(pdev);
2120        } else {
2121                pdev = get_real_dev(n->dev);
2122                ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
2123                                        n, pdev, rt_tos2priority(tos));
2124                if (!ep->l2t)
2125                        goto out;
2126                ep->mtu = dst_mtu(dst);
2127                ep->tx_chan = cxgb4_port_chan(pdev);
2128                ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
2129                step = cdev->rdev.lldi.ntxq /
2130                        cdev->rdev.lldi.nchan;
2131                ep->txq_idx = cxgb4_port_idx(pdev) * step;
2132                ep->ctrlq_idx = cxgb4_port_idx(pdev);
2133                step = cdev->rdev.lldi.nrxq /
2134                        cdev->rdev.lldi.nchan;
2135                ep->rss_qid = cdev->rdev.lldi.rxq_ids[
2136                        cxgb4_port_idx(pdev) * step];
2137                set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
2138
2139                if (clear_mpa_v1) {
2140                        ep->retry_with_mpa_v1 = 0;
2141                        ep->tried_with_mpa_v1 = 0;
2142                }
2143        }
2144        err = 0;
2145out:
2146        rcu_read_unlock();
2147
2148        neigh_release(n);
2149
2150        return err;
2151}
2152
2153static int c4iw_reconnect(struct c4iw_ep *ep)
2154{
2155        int err = 0;
2156        int size = 0;
2157        struct sockaddr_in *laddr = (struct sockaddr_in *)
2158                                    &ep->com.cm_id->m_local_addr;
2159        struct sockaddr_in *raddr = (struct sockaddr_in *)
2160                                    &ep->com.cm_id->m_remote_addr;
2161        struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)
2162                                      &ep->com.cm_id->m_local_addr;
2163        struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
2164                                      &ep->com.cm_id->m_remote_addr;
2165        int iptype;
2166        __u8 *ra;
2167
2168        pr_debug("qp %p cm_id %p\n", ep->com.qp, ep->com.cm_id);
2169        c4iw_init_wr_wait(ep->com.wr_waitp);
2170
2171        /* When MPA revision is different on nodes, the node with MPA_rev=2
2172         * tries to reconnect with MPA_rev 1 for the same EP through
2173         * c4iw_reconnect(), where the same EP is assigned with new tid for
2174         * further connection establishment. As we are using the same EP pointer
2175         * for reconnect, few skbs are used during the previous c4iw_connect(),
2176         * which leaves the EP with inadequate skbs for further
2177         * c4iw_reconnect(), Further causing a crash due to an empty
2178         * skb_list() during peer_abort(). Allocate skbs which is already used.
2179         */
2180        size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list));
2181        if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) {
2182                err = -ENOMEM;
2183                goto fail1;
2184        }
2185
2186        /*
2187         * Allocate an active TID to initiate a TCP connection.
2188         */
2189        ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
2190        if (ep->atid == -1) {
2191                pr_err("%s - cannot alloc atid\n", __func__);
2192                err = -ENOMEM;
2193                goto fail2;
2194        }
2195        err = xa_insert_irq(&ep->com.dev->atids, ep->atid, ep, GFP_KERNEL);
2196        if (err)
2197                goto fail2a;
2198
2199        /* find a route */
2200        if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
2201                ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev,
2202                                          laddr->sin_addr.s_addr,
2203                                          raddr->sin_addr.s_addr,
2204                                          laddr->sin_port,
2205                                          raddr->sin_port, ep->com.cm_id->tos);
2206                iptype = 4;
2207                ra = (__u8 *)&raddr->sin_addr;
2208        } else {
2209                ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi,
2210                                           get_real_dev,
2211                                           laddr6->sin6_addr.s6_addr,
2212                                           raddr6->sin6_addr.s6_addr,
2213                                           laddr6->sin6_port,
2214                                           raddr6->sin6_port,
2215                                           ep->com.cm_id->tos,
2216                                           raddr6->sin6_scope_id);
2217                iptype = 6;
2218                ra = (__u8 *)&raddr6->sin6_addr;
2219        }
2220        if (!ep->dst) {
2221                pr_err("%s - cannot find route\n", __func__);
2222                err = -EHOSTUNREACH;
2223                goto fail3;
2224        }
2225        err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false,
2226                        ep->com.dev->rdev.lldi.adapter_type,
2227                        ep->com.cm_id->tos);
2228        if (err) {
2229                pr_err("%s - cannot alloc l2e\n", __func__);
2230                goto fail4;
2231        }
2232
2233        pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
2234                 ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
2235                 ep->l2t->idx);
2236
2237        state_set(&ep->com, CONNECTING);
2238        ep->tos = ep->com.cm_id->tos;
2239
2240        /* send connect request to rnic */
2241        err = send_connect(ep);
2242        if (!err)
2243                goto out;
2244
2245        cxgb4_l2t_release(ep->l2t);
2246fail4:
2247        dst_release(ep->dst);
2248fail3:
2249        xa_erase_irq(&ep->com.dev->atids, ep->atid);
2250fail2a:
2251        cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
2252fail2:
2253        /*
2254         * remember to send notification to upper layer.
2255         * We are in here so the upper layer is not aware that this is
2256         * re-connect attempt and so, upper layer is still waiting for
2257         * response of 1st connect request.
2258         */
2259        connect_reply_upcall(ep, -ECONNRESET);
2260fail1:
2261        c4iw_put_ep(&ep->com);
2262out:
2263        return err;
2264}
2265
2266static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2267{
2268        struct c4iw_ep *ep;
2269        struct cpl_act_open_rpl *rpl = cplhdr(skb);
2270        unsigned int atid = TID_TID_G(AOPEN_ATID_G(
2271                                      ntohl(rpl->atid_status)));
2272        struct tid_info *t = dev->rdev.lldi.tids;
2273        int status = AOPEN_STATUS_G(ntohl(rpl->atid_status));
2274        struct sockaddr_in *la;
2275        struct sockaddr_in *ra;
2276        struct sockaddr_in6 *la6;
2277        struct sockaddr_in6 *ra6;
2278        int ret = 0;
2279
2280        ep = lookup_atid(t, atid);
2281        la = (struct sockaddr_in *)&ep->com.local_addr;
2282        ra = (struct sockaddr_in *)&ep->com.remote_addr;
2283        la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
2284        ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
2285
2286        pr_debug("ep %p atid %u status %u errno %d\n", ep, atid,
2287                 status, status2errno(status));
2288
2289        if (cxgb_is_neg_adv(status)) {
2290                pr_debug("Connection problems for atid %u status %u (%s)\n",
2291                         atid, status, neg_adv_str(status));
2292                ep->stats.connect_neg_adv++;
2293                mutex_lock(&dev->rdev.stats.lock);
2294                dev->rdev.stats.neg_adv++;
2295                mutex_unlock(&dev->rdev.stats.lock);
2296                return 0;
2297        }
2298
2299        set_bit(ACT_OPEN_RPL, &ep->com.history);
2300
2301        /*
2302         * Log interesting failures.
2303         */
2304        switch (status) {
2305        case CPL_ERR_CONN_RESET:
2306        case CPL_ERR_CONN_TIMEDOUT:
2307                break;
2308        case CPL_ERR_TCAM_FULL:
2309                mutex_lock(&dev->rdev.stats.lock);
2310                dev->rdev.stats.tcam_full++;
2311                mutex_unlock(&dev->rdev.stats.lock);
2312                if (ep->com.local_addr.ss_family == AF_INET &&
2313                    dev->rdev.lldi.enable_fw_ofld_conn) {
2314                        ret = send_fw_act_open_req(ep, TID_TID_G(AOPEN_ATID_G(
2315                                                   ntohl(rpl->atid_status))));
2316                        if (ret)
2317                                goto fail;
2318                        return 0;
2319                }
2320                break;
2321        case CPL_ERR_CONN_EXIST:
2322                if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
2323                        set_bit(ACT_RETRY_INUSE, &ep->com.history);
2324                        if (ep->com.remote_addr.ss_family == AF_INET6) {
2325                                struct sockaddr_in6 *sin6 =
2326                                                (struct sockaddr_in6 *)
2327                                                &ep->com.local_addr;
2328                                cxgb4_clip_release(
2329                                                ep->com.dev->rdev.lldi.ports[0],
2330                                                (const u32 *)
2331                                                &sin6->sin6_addr.s6_addr, 1);
2332                        }
2333                        xa_erase_irq(&ep->com.dev->atids, atid);
2334                        cxgb4_free_atid(t, atid);
2335                        dst_release(ep->dst);
2336                        cxgb4_l2t_release(ep->l2t);
2337                        c4iw_reconnect(ep);
2338                        return 0;
2339                }
2340                break;
2341        default:
2342                if (ep->com.local_addr.ss_family == AF_INET) {
2343                        pr_info("Active open failure - atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
2344                                atid, status, status2errno(status),
2345                                &la->sin_addr.s_addr, ntohs(la->sin_port),
2346                                &ra->sin_addr.s_addr, ntohs(ra->sin_port));
2347                } else {
2348                        pr_info("Active open failure - atid %u status %u errno %d %pI6:%u->%pI6:%u\n",
2349                                atid, status, status2errno(status),
2350                                la6->sin6_addr.s6_addr, ntohs(la6->sin6_port),
2351                                ra6->sin6_addr.s6_addr, ntohs(ra6->sin6_port));
2352                }
2353                break;
2354        }
2355
2356fail:
2357        connect_reply_upcall(ep, status2errno(status));
2358        state_set(&ep->com, DEAD);
2359
2360        if (ep->com.remote_addr.ss_family == AF_INET6) {
2361                struct sockaddr_in6 *sin6 =
2362                        (struct sockaddr_in6 *)&ep->com.local_addr;
2363                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
2364                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2365        }
2366        if (status && act_open_has_tid(status))
2367                cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl),
2368                                 ep->com.local_addr.ss_family);
2369
2370        xa_erase_irq(&ep->com.dev->atids, atid);
2371        cxgb4_free_atid(t, atid);
2372        dst_release(ep->dst);
2373        cxgb4_l2t_release(ep->l2t);
2374        c4iw_put_ep(&ep->com);
2375
2376        return 0;
2377}
2378
2379static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2380{
2381        struct cpl_pass_open_rpl *rpl = cplhdr(skb);
2382        unsigned int stid = GET_TID(rpl);
2383        struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
2384
2385        if (!ep) {
2386                pr_warn("%s stid %d lookup failure!\n", __func__, stid);
2387                goto out;
2388        }
2389        pr_debug("ep %p status %d error %d\n", ep,
2390                 rpl->status, status2errno(rpl->status));
2391        c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status));
2392        c4iw_put_ep(&ep->com);
2393out:
2394        return 0;
2395}
2396
2397static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2398{
2399        struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
2400        unsigned int stid = GET_TID(rpl);
2401        struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
2402
2403        if (!ep) {
2404                pr_warn("%s stid %d lookup failure!\n", __func__, stid);
2405                goto out;
2406        }
2407        pr_debug("ep %p\n", ep);
2408        c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status));
2409        c4iw_put_ep(&ep->com);
2410out:
2411        return 0;
2412}
2413
2414static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
2415                     struct cpl_pass_accept_req *req)
2416{
2417        struct cpl_pass_accept_rpl *rpl;
2418        unsigned int mtu_idx;
2419        u64 opt0;
2420        u32 opt2;
2421        u32 wscale;
2422        struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
2423        int win;
2424        enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
2425
2426        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2427
2428        skb_get(skb);
2429        rpl = cplhdr(skb);
2430        if (!is_t4(adapter_type)) {
2431                skb_trim(skb, roundup(sizeof(*rpl5), 16));
2432                rpl5 = (void *)rpl;
2433                INIT_TP_WR(rpl5, ep->hwtid);
2434        } else {
2435                skb_trim(skb, sizeof(*rpl));
2436                INIT_TP_WR(rpl, ep->hwtid);
2437        }
2438        OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
2439                                                    ep->hwtid));
2440
2441        cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
2442                      enable_tcp_timestamps && req->tcpopt.tstamp,
2443                      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
2444        wscale = cxgb_compute_wscale(rcv_win);
2445
2446        /*
2447         * Specify the largest window that will fit in opt0. The
2448         * remainder will be specified in the rx_data_ack.
2449         */
2450        win = ep->rcv_win >> 10;
2451        if (win > RCV_BUFSIZ_M)
2452                win = RCV_BUFSIZ_M;
2453        opt0 = (nocong ? NO_CONG_F : 0) |
2454               KEEP_ALIVE_F |
2455               DELACK_F |
2456               WND_SCALE_V(wscale) |
2457               MSS_IDX_V(mtu_idx) |
2458               L2T_IDX_V(ep->l2t->idx) |
2459               TX_CHAN_V(ep->tx_chan) |
2460               SMAC_SEL_V(ep->smac_idx) |
2461               DSCP_V(ep->tos >> 2) |
2462               ULP_MODE_V(ULP_MODE_TCPDDP) |
2463               RCV_BUFSIZ_V(win);
2464        opt2 = RX_CHANNEL_V(0) |
2465               RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
2466
2467        if (enable_tcp_timestamps && req->tcpopt.tstamp)
2468                opt2 |= TSTAMPS_EN_F;
2469        if (enable_tcp_sack && req->tcpopt.sack)
2470                opt2 |= SACK_EN_F;
2471        if (wscale && enable_tcp_window_scaling)
2472                opt2 |= WND_SCALE_EN_F;
2473        if (enable_ecn) {
2474                const struct tcphdr *tcph;
2475                u32 hlen = ntohl(req->hdr_len);
2476
2477                if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5)
2478                        tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
2479                                IP_HDR_LEN_G(hlen);
2480                else
2481                        tcph = (const void *)(req + 1) +
2482                                T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen);
2483                if (tcph->ece && tcph->cwr)
2484                        opt2 |= CCTRL_ECN_V(1);
2485        }
2486        if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
2487                u32 isn = (prandom_u32() & ~7UL) - 1;
2488                opt2 |= T5_OPT_2_VALID_F;
2489                opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
2490                opt2 |= T5_ISS_F;
2491                rpl5 = (void *)rpl;
2492                memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
2493                if (peer2peer)
2494                        isn += 4;
2495                rpl5->iss = cpu_to_be32(isn);
2496                pr_debug("iss %u\n", be32_to_cpu(rpl5->iss));
2497        }
2498
2499        rpl->opt0 = cpu_to_be64(opt0);
2500        rpl->opt2 = cpu_to_be32(opt2);
2501        set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
2502        t4_set_arp_err_handler(skb, ep, pass_accept_rpl_arp_failure);
2503
2504        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
2505}
2506
2507static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
2508{
2509        pr_debug("c4iw_dev %p tid %u\n", dev, hwtid);
2510        skb_trim(skb, sizeof(struct cpl_tid_release));
2511        release_tid(&dev->rdev, hwtid, skb);
2512        return;
2513}
2514
2515static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2516{
2517        struct c4iw_ep *child_ep = NULL, *parent_ep;
2518        struct cpl_pass_accept_req *req = cplhdr(skb);
2519        unsigned int stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
2520        struct tid_info *t = dev->rdev.lldi.tids;
2521        unsigned int hwtid = GET_TID(req);
2522        struct dst_entry *dst;
2523        __u8 local_ip[16], peer_ip[16];
2524        __be16 local_port, peer_port;
2525        struct sockaddr_in6 *sin6;
2526        int err;
2527        u16 peer_mss = ntohs(req->tcpopt.mss);
2528        int iptype;
2529        unsigned short hdrs;
2530        u8 tos;
2531
2532        parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
2533        if (!parent_ep) {
2534                pr_err("%s connect request on invalid stid %d\n",
2535                       __func__, stid);
2536                goto reject;
2537        }
2538
2539        if (state_read(&parent_ep->com) != LISTEN) {
2540                pr_err("%s - listening ep not in LISTEN\n", __func__);
2541                goto reject;
2542        }
2543
2544        if (parent_ep->com.cm_id->tos_set)
2545                tos = parent_ep->com.cm_id->tos;
2546        else
2547                tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
2548
2549        cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
2550                        &iptype, local_ip, peer_ip, &local_port, &peer_port);
2551
2552        /* Find output route */
2553        if (iptype == 4)  {
2554                pr_debug("parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
2555                         , parent_ep, hwtid,
2556                         local_ip, peer_ip, ntohs(local_port),
2557                         ntohs(peer_port), peer_mss);
2558                dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
2559                                      *(__be32 *)local_ip, *(__be32 *)peer_ip,
2560                                      local_port, peer_port, tos);
2561        } else {
2562                pr_debug("parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
2563                         , parent_ep, hwtid,
2564                         local_ip, peer_ip, ntohs(local_port),
2565                         ntohs(peer_port), peer_mss);
2566                dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
2567                                local_ip, peer_ip, local_port, peer_port,
2568                                tos,
2569                                ((struct sockaddr_in6 *)
2570                                 &parent_ep->com.local_addr)->sin6_scope_id);
2571        }
2572        if (!dst) {
2573                pr_err("%s - failed to find dst entry!\n", __func__);
2574                goto reject;
2575        }
2576
2577        child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
2578        if (!child_ep) {
2579                pr_err("%s - failed to allocate ep entry!\n", __func__);
2580                dst_release(dst);
2581                goto reject;
2582        }
2583
2584        err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
2585                        parent_ep->com.dev->rdev.lldi.adapter_type, tos);
2586        if (err) {
2587                pr_err("%s - failed to allocate l2t entry!\n", __func__);
2588                dst_release(dst);
2589                kfree(child_ep);
2590                goto reject;
2591        }
2592
2593        hdrs = ((iptype == 4) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) +
2594               sizeof(struct tcphdr) +
2595               ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
2596        if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
2597                child_ep->mtu = peer_mss + hdrs;
2598
2599        skb_queue_head_init(&child_ep->com.ep_skb_list);
2600        if (alloc_ep_skb_list(&child_ep->com.ep_skb_list, CN_MAX_CON_BUF))
2601                goto fail;
2602
2603        state_set(&child_ep->com, CONNECTING);
2604        child_ep->com.dev = dev;
2605        child_ep->com.cm_id = NULL;
2606
2607        if (iptype == 4) {
2608                struct sockaddr_in *sin = (struct sockaddr_in *)
2609                        &child_ep->com.local_addr;
2610
2611                sin->sin_family = AF_INET;
2612                sin->sin_port = local_port;
2613                sin->sin_addr.s_addr = *(__be32 *)local_ip;
2614
2615                sin = (struct sockaddr_in *)&child_ep->com.local_addr;
2616                sin->sin_family = AF_INET;
2617                sin->sin_port = ((struct sockaddr_in *)
2618                                 &parent_ep->com.local_addr)->sin_port;
2619                sin->sin_addr.s_addr = *(__be32 *)local_ip;
2620
2621                sin = (struct sockaddr_in *)&child_ep->com.remote_addr;
2622                sin->sin_family = AF_INET;
2623                sin->sin_port = peer_port;
2624                sin->sin_addr.s_addr = *(__be32 *)peer_ip;
2625        } else {
2626                sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2627                sin6->sin6_family = PF_INET6;
2628                sin6->sin6_port = local_port;
2629                memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2630
2631                sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2632                sin6->sin6_family = PF_INET6;
2633                sin6->sin6_port = ((struct sockaddr_in6 *)
2634                                   &parent_ep->com.local_addr)->sin6_port;
2635                memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2636
2637                sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr;
2638                sin6->sin6_family = PF_INET6;
2639                sin6->sin6_port = peer_port;
2640                memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
2641        }
2642
2643        c4iw_get_ep(&parent_ep->com);
2644        child_ep->parent_ep = parent_ep;
2645        child_ep->tos = tos;
2646        child_ep->dst = dst;
2647        child_ep->hwtid = hwtid;
2648
2649        pr_debug("tx_chan %u smac_idx %u rss_qid %u\n",
2650                 child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
2651
2652        timer_setup(&child_ep->timer, ep_timeout, 0);
2653        cxgb4_insert_tid(t, child_ep, hwtid,
2654                         child_ep->com.local_addr.ss_family);
2655        insert_ep_tid(child_ep);
2656        if (accept_cr(child_ep, skb, req)) {
2657                c4iw_put_ep(&parent_ep->com);
2658                release_ep_resources(child_ep);
2659        } else {
2660                set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
2661        }
2662        if (iptype == 6) {
2663                sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2664                cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0],
2665                               (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2666        }
2667        goto out;
2668fail:
2669        c4iw_put_ep(&child_ep->com);
2670reject:
2671        reject_cr(dev, hwtid, skb);
2672out:
2673        if (parent_ep)
2674                c4iw_put_ep(&parent_ep->com);
2675        return 0;
2676}
2677
2678static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
2679{
2680        struct c4iw_ep *ep;
2681        struct cpl_pass_establish *req = cplhdr(skb);
2682        unsigned int tid = GET_TID(req);
2683        int ret;
2684        u16 tcp_opt = ntohs(req->tcp_opt);
2685
2686        ep = get_ep_from_tid(dev, tid);
2687        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2688        ep->snd_seq = be32_to_cpu(req->snd_isn);
2689        ep->rcv_seq = be32_to_cpu(req->rcv_isn);
2690        ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
2691
2692        pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, tcp_opt);
2693
2694        set_emss(ep, tcp_opt);
2695
2696        dst_confirm(ep->dst);
2697        mutex_lock(&ep->com.mutex);
2698        ep->com.state = MPA_REQ_WAIT;
2699        start_ep_timer(ep);
2700        set_bit(PASS_ESTAB, &ep->com.history);
2701        ret = send_flowc(ep);
2702        mutex_unlock(&ep->com.mutex);
2703        if (ret)
2704                c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
2705        c4iw_put_ep(&ep->com);
2706
2707        return 0;
2708}
2709
2710static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
2711{
2712        struct cpl_peer_close *hdr = cplhdr(skb);
2713        struct c4iw_ep *ep;
2714        struct c4iw_qp_attributes attrs;
2715        int disconnect = 1;
2716        int release = 0;
2717        unsigned int tid = GET_TID(hdr);
2718        int ret;
2719
2720        ep = get_ep_from_tid(dev, tid);
2721        if (!ep)
2722                return 0;
2723
2724        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2725        dst_confirm(ep->dst);
2726
2727        set_bit(PEER_CLOSE, &ep->com.history);
2728        mutex_lock(&ep->com.mutex);
2729        switch (ep->com.state) {
2730        case MPA_REQ_WAIT:
2731                __state_set(&ep->com, CLOSING);
2732                break;
2733        case MPA_REQ_SENT:
2734                __state_set(&ep->com, CLOSING);
2735                connect_reply_upcall(ep, -ECONNRESET);
2736                break;
2737        case MPA_REQ_RCVD:
2738
2739                /*
2740                 * We're gonna mark this puppy DEAD, but keep
2741                 * the reference on it until the ULP accepts or
2742                 * rejects the CR. Also wake up anyone waiting
2743                 * in rdma connection migration (see c4iw_accept_cr()).
2744                 */
2745                __state_set(&ep->com, CLOSING);
2746                pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
2747                c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2748                break;
2749        case MPA_REP_SENT:
2750                __state_set(&ep->com, CLOSING);
2751                pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
2752                c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2753                break;
2754        case FPDU_MODE:
2755                start_ep_timer(ep);
2756                __state_set(&ep->com, CLOSING);
2757                attrs.next_state = C4IW_QP_STATE_CLOSING;
2758                ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2759                                       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2760                if (ret != -ECONNRESET) {
2761                        peer_close_upcall(ep);
2762                        disconnect = 1;
2763                }
2764                break;
2765        case ABORTING:
2766                disconnect = 0;
2767                break;
2768        case CLOSING:
2769                __state_set(&ep->com, MORIBUND);
2770                disconnect = 0;
2771                break;
2772        case MORIBUND:
2773                (void)stop_ep_timer(ep);
2774                if (ep->com.cm_id && ep->com.qp) {
2775                        attrs.next_state = C4IW_QP_STATE_IDLE;
2776                        c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2777                                       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2778                }
2779                close_complete_upcall(ep, 0);
2780                __state_set(&ep->com, DEAD);
2781                release = 1;
2782                disconnect = 0;
2783                break;
2784        case DEAD:
2785                disconnect = 0;
2786                break;
2787        default:
2788                WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
2789        }
2790        mutex_unlock(&ep->com.mutex);
2791        if (disconnect)
2792                c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
2793        if (release)
2794                release_ep_resources(ep);
2795        c4iw_put_ep(&ep->com);
2796        return 0;
2797}
2798
2799static void finish_peer_abort(struct c4iw_dev *dev, struct c4iw_ep *ep)
2800{
2801        complete_cached_srq_buffers(ep, ep->srqe_idx);
2802        if (ep->com.cm_id && ep->com.qp) {
2803                struct c4iw_qp_attributes attrs;
2804
2805                attrs.next_state = C4IW_QP_STATE_ERROR;
2806                c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2807                               C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2808        }
2809        peer_abort_upcall(ep);
2810        release_ep_resources(ep);
2811        c4iw_put_ep(&ep->com);
2812}
2813
2814static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2815{
2816        struct cpl_abort_req_rss6 *req = cplhdr(skb);
2817        struct c4iw_ep *ep;
2818        struct sk_buff *rpl_skb;
2819        struct c4iw_qp_attributes attrs;
2820        int ret;
2821        int release = 0;
2822        unsigned int tid = GET_TID(req);
2823        u8 status;
2824        u32 srqidx;
2825
2826        u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
2827
2828        ep = get_ep_from_tid(dev, tid);
2829        if (!ep)
2830                return 0;
2831
2832        status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status));
2833
2834        if (cxgb_is_neg_adv(status)) {
2835                pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
2836                         ep->hwtid, status, neg_adv_str(status));
2837                ep->stats.abort_neg_adv++;
2838                mutex_lock(&dev->rdev.stats.lock);
2839                dev->rdev.stats.neg_adv++;
2840                mutex_unlock(&dev->rdev.stats.lock);
2841                goto deref_ep;
2842        }
2843
2844        pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
2845                 ep->com.state);
2846        set_bit(PEER_ABORT, &ep->com.history);
2847
2848        /*
2849         * Wake up any threads in rdma_init() or rdma_fini().
2850         * However, this is not needed if com state is just
2851         * MPA_REQ_SENT
2852         */
2853        if (ep->com.state != MPA_REQ_SENT)
2854                c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2855
2856        mutex_lock(&ep->com.mutex);
2857        switch (ep->com.state) {
2858        case CONNECTING:
2859                c4iw_put_ep(&ep->parent_ep->com);
2860                break;
2861        case MPA_REQ_WAIT:
2862                (void)stop_ep_timer(ep);
2863                break;
2864        case MPA_REQ_SENT:
2865                (void)stop_ep_timer(ep);
2866                if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 ||
2867                    (mpa_rev == 2 && ep->tried_with_mpa_v1))
2868                        connect_reply_upcall(ep, -ECONNRESET);
2869                else {
2870                        /*
2871                         * we just don't send notification upwards because we
2872                         * want to retry with mpa_v1 without upper layers even
2873                         * knowing it.
2874                         *
2875                         * do some housekeeping so as to re-initiate the
2876                         * connection
2877                         */
2878                        pr_info("%s: mpa_rev=%d. Retrying with mpav1\n",
2879                                __func__, mpa_rev);
2880                        ep->retry_with_mpa_v1 = 1;
2881                }
2882                break;
2883        case MPA_REP_SENT:
2884                break;
2885        case MPA_REQ_RCVD:
2886                break;
2887        case MORIBUND:
2888        case CLOSING:
2889                stop_ep_timer(ep);
2890                /*FALLTHROUGH*/
2891        case FPDU_MODE:
2892                if (ep->com.qp && ep->com.qp->srq) {
2893                        srqidx = ABORT_RSS_SRQIDX_G(
2894                                        be32_to_cpu(req->srqidx_status));
2895                        if (srqidx) {
2896                                complete_cached_srq_buffers(ep,
2897                                                            req->srqidx_status);
2898                        } else {
2899                                /* Hold ep ref until finish_peer_abort() */
2900                                c4iw_get_ep(&ep->com);
2901                                __state_set(&ep->com, ABORTING);
2902                                set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags);
2903                                read_tcb(ep);
2904                                break;
2905
2906                        }
2907                }
2908
2909                if (ep->com.cm_id && ep->com.qp) {
2910                        attrs.next_state = C4IW_QP_STATE_ERROR;
2911                        ret = c4iw_modify_qp(ep->com.qp->rhp,
2912                                     ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
2913                                     &attrs, 1);
2914                        if (ret)
2915                                pr_err("%s - qp <- error failed!\n", __func__);
2916                }
2917                peer_abort_upcall(ep);
2918                break;
2919        case ABORTING:
2920                break;
2921        case DEAD:
2922                pr_warn("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
2923                mutex_unlock(&ep->com.mutex);
2924                goto deref_ep;
2925        default:
2926                WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
2927                break;
2928        }
2929        dst_confirm(ep->dst);
2930        if (ep->com.state != ABORTING) {
2931                __state_set(&ep->com, DEAD);
2932                /* we don't release if we want to retry with mpa_v1 */
2933                if (!ep->retry_with_mpa_v1)
2934                        release = 1;
2935        }
2936        mutex_unlock(&ep->com.mutex);
2937
2938        rpl_skb = skb_dequeue(&ep->com.ep_skb_list);
2939        if (WARN_ON(!rpl_skb)) {
2940                release = 1;
2941                goto out;
2942        }
2943
2944        cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx);
2945
2946        c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
2947out:
2948        if (release)
2949                release_ep_resources(ep);
2950        else if (ep->retry_with_mpa_v1) {
2951                if (ep->com.remote_addr.ss_family == AF_INET6) {
2952                        struct sockaddr_in6 *sin6 =
2953                                        (struct sockaddr_in6 *)
2954                                        &ep->com.local_addr;
2955                        cxgb4_clip_release(
2956                                        ep->com.dev->rdev.lldi.ports[0],
2957                                        (const u32 *)&sin6->sin6_addr.s6_addr,
2958                                        1);
2959                }
2960                xa_erase_irq(&ep->com.dev->hwtids, ep->hwtid);
2961                cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
2962                                 ep->com.local_addr.ss_family);
2963                dst_release(ep->dst);
2964                cxgb4_l2t_release(ep->l2t);
2965                c4iw_reconnect(ep);
2966        }
2967
2968deref_ep:
2969        c4iw_put_ep(&ep->com);
2970        /* Dereferencing ep, referenced in peer_abort_intr() */
2971        c4iw_put_ep(&ep->com);
2972        return 0;
2973}
2974
2975static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2976{
2977        struct c4iw_ep *ep;
2978        struct c4iw_qp_attributes attrs;
2979        struct cpl_close_con_rpl *rpl = cplhdr(skb);
2980        int release = 0;
2981        unsigned int tid = GET_TID(rpl);
2982
2983        ep = get_ep_from_tid(dev, tid);
2984        if (!ep)
2985                return 0;
2986
2987        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2988
2989        /* The cm_id may be null if we failed to connect */
2990        mutex_lock(&ep->com.mutex);
2991        set_bit(CLOSE_CON_RPL, &ep->com.history);
2992        switch (ep->com.state) {
2993        case CLOSING:
2994                __state_set(&ep->com, MORIBUND);
2995                break;
2996        case MORIBUND:
2997                (void)stop_ep_timer(ep);
2998                if ((ep->com.cm_id) && (ep->com.qp)) {
2999                        attrs.next_state = C4IW_QP_STATE_IDLE;
3000                        c4iw_modify_qp(ep->com.qp->rhp,
3001                                             ep->com.qp,
3002                                             C4IW_QP_ATTR_NEXT_STATE,
3003                                             &attrs, 1);
3004                }
3005                close_complete_upcall(ep, 0);
3006                __state_set(&ep->com, DEAD);
3007                release = 1;
3008                break;
3009        case ABORTING:
3010        case DEAD:
3011                break;
3012        default:
3013                WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
3014                break;
3015        }
3016        mutex_unlock(&ep->com.mutex);
3017        if (release)
3018                release_ep_resources(ep);
3019        c4iw_put_ep(&ep->com);
3020        return 0;
3021}
3022
3023static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
3024{
3025        struct cpl_rdma_terminate *rpl = cplhdr(skb);
3026        unsigned int tid = GET_TID(rpl);
3027        struct c4iw_ep *ep;
3028        struct c4iw_qp_attributes attrs;
3029
3030        ep = get_ep_from_tid(dev, tid);
3031
3032        if (ep) {
3033                if (ep->com.qp) {
3034                        pr_warn("TERM received tid %u qpid %u\n", tid,
3035                                ep->com.qp->wq.sq.qid);
3036                        attrs.next_state = C4IW_QP_STATE_TERMINATE;
3037                        c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
3038                                       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
3039                }
3040
3041                c4iw_put_ep(&ep->com);
3042        } else
3043                pr_warn("TERM received tid %u no ep/qp\n", tid);
3044
3045        return 0;
3046}
3047
3048/*
3049 * Upcall from the adapter indicating data has been transmitted.
3050 * For us its just the single MPA request or reply.  We can now free
3051 * the skb holding the mpa message.
3052 */
3053static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
3054{
3055        struct c4iw_ep *ep;
3056        struct cpl_fw4_ack *hdr = cplhdr(skb);
3057        u8 credits = hdr->credits;
3058        unsigned int tid = GET_TID(hdr);
3059
3060
3061        ep = get_ep_from_tid(dev, tid);
3062        if (!ep)
3063                return 0;
3064        pr_debug("ep %p tid %u credits %u\n",
3065                 ep, ep->hwtid, credits);
3066        if (credits == 0) {
3067                pr_debug("0 credit ack ep %p tid %u state %u\n",
3068                         ep, ep->hwtid, state_read(&ep->com));
3069                goto out;
3070        }
3071
3072        dst_confirm(ep->dst);
3073        if (ep->mpa_skb) {
3074                pr_debug("last streaming msg ack ep %p tid %u state %u initiator %u freeing skb\n",
3075                         ep, ep->hwtid, state_read(&ep->com),
3076                         ep->mpa_attr.initiator ? 1 : 0);
3077                mutex_lock(&ep->com.mutex);
3078                kfree_skb(ep->mpa_skb);
3079                ep->mpa_skb = NULL;
3080                if (test_bit(STOP_MPA_TIMER, &ep->com.flags))
3081                        stop_ep_timer(ep);
3082                mutex_unlock(&ep->com.mutex);
3083        }
3084out:
3085        c4iw_put_ep(&ep->com);
3086        return 0;
3087}
3088
3089int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
3090{
3091        int abort;
3092        struct c4iw_ep *ep = to_ep(cm_id);
3093
3094        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
3095
3096        mutex_lock(&ep->com.mutex);
3097        if (ep->com.state != MPA_REQ_RCVD) {
3098                mutex_unlock(&ep->com.mutex);
3099                c4iw_put_ep(&ep->com);
3100                return -ECONNRESET;
3101        }
3102        set_bit(ULP_REJECT, &ep->com.history);
3103        if (mpa_rev == 0)
3104                abort = 1;
3105        else
3106                abort = send_mpa_reject(ep, pdata, pdata_len);
3107        mutex_unlock(&ep->com.mutex);
3108
3109        stop_ep_timer(ep);
3110        c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL);
3111        c4iw_put_ep(&ep->com);
3112        return 0;
3113}
3114
3115int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3116{
3117        int err;
3118        struct c4iw_qp_attributes attrs;
3119        enum c4iw_qp_attr_mask mask;
3120        struct c4iw_ep *ep = to_ep(cm_id);
3121        struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
3122        struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
3123        int abort = 0;
3124
3125        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
3126
3127        mutex_lock(&ep->com.mutex);
3128        if (ep->com.state != MPA_REQ_RCVD) {
3129                err = -ECONNRESET;
3130                goto err_out;
3131        }
3132
3133        if (!qp) {
3134                err = -EINVAL;
3135                goto err_out;
3136        }
3137
3138        set_bit(ULP_ACCEPT, &ep->com.history);
3139        if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) ||
3140            (conn_param->ird > cur_max_read_depth(ep->com.dev))) {
3141                err = -EINVAL;
3142                goto err_abort;
3143        }
3144
3145        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
3146                if (conn_param->ord > ep->ird) {
3147                        if (RELAXED_IRD_NEGOTIATION) {
3148                                conn_param->ord = ep->ird;
3149                        } else {
3150                                ep->ird = conn_param->ird;
3151                                ep->ord = conn_param->ord;
3152                                send_mpa_reject(ep, conn_param->private_data,
3153                                                conn_param->private_data_len);
3154                                err = -ENOMEM;
3155                                goto err_abort;
3156                        }
3157                }
3158                if (conn_param->ird < ep->ord) {
3159                        if (RELAXED_IRD_NEGOTIATION &&
3160                            ep->ord <= h->rdev.lldi.max_ordird_qp) {
3161                                conn_param->ird = ep->ord;
3162                        } else {
3163                                err = -ENOMEM;
3164                                goto err_abort;
3165                        }
3166                }
3167        }
3168        ep->ird = conn_param->ird;
3169        ep->ord = conn_param->ord;
3170
3171        if (ep->mpa_attr.version == 1) {
3172                if (peer2peer && ep->ird == 0)
3173                        ep->ird = 1;
3174        } else {
3175                if (peer2peer &&
3176                    (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) &&
3177                    (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0)
3178                        ep->ird = 1;
3179        }
3180
3181        pr_debug("ird %d ord %d\n", ep->ird, ep->ord);
3182
3183        ep->com.cm_id = cm_id;
3184        ref_cm_id(&ep->com);
3185        ep->com.qp = qp;
3186        ref_qp(ep);
3187
3188        /* bind QP to EP and move to RTS */
3189        attrs.mpa_attr = ep->mpa_attr;
3190        attrs.max_ird = ep->ird;
3191        attrs.max_ord = ep->ord;
3192        attrs.llp_stream_handle = ep;
3193        attrs.next_state = C4IW_QP_STATE_RTS;
3194
3195        /* bind QP and TID with INIT_WR */
3196        mask = C4IW_QP_ATTR_NEXT_STATE |
3197                             C4IW_QP_ATTR_LLP_STREAM_HANDLE |
3198                             C4IW_QP_ATTR_MPA_ATTR |
3199                             C4IW_QP_ATTR_MAX_IRD |
3200                             C4IW_QP_ATTR_MAX_ORD;
3201
3202        err = c4iw_modify_qp(ep->com.qp->rhp,
3203                             ep->com.qp, mask, &attrs, 1);
3204        if (err)
3205                goto err_deref_cm_id;
3206
3207        set_bit(STOP_MPA_TIMER, &ep->com.flags);
3208        err = send_mpa_reply(ep, conn_param->private_data,
3209                             conn_param->private_data_len);
3210        if (err)
3211                goto err_deref_cm_id;
3212
3213        __state_set(&ep->com, FPDU_MODE);
3214        established_upcall(ep);
3215        mutex_unlock(&ep->com.mutex);
3216        c4iw_put_ep(&ep->com);
3217        return 0;
3218err_deref_cm_id:
3219        deref_cm_id(&ep->com);
3220err_abort:
3221        abort = 1;
3222err_out:
3223        mutex_unlock(&ep->com.mutex);
3224        if (abort)
3225                c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
3226        c4iw_put_ep(&ep->com);
3227        return err;
3228}
3229
3230static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
3231{
3232        struct in_device *ind;
3233        int found = 0;
3234        struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
3235        struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
3236        const struct in_ifaddr *ifa;
3237
3238        ind = in_dev_get(dev->rdev.lldi.ports[0]);
3239        if (!ind)
3240                return -EADDRNOTAVAIL;
3241        rcu_read_lock();
3242        in_dev_for_each_ifa_rcu(ifa, ind) {
3243                if (ifa->ifa_flags & IFA_F_SECONDARY)
3244                        continue;
3245                laddr->sin_addr.s_addr = ifa->ifa_address;
3246                raddr->sin_addr.s_addr = ifa->ifa_address;
3247                found = 1;
3248                break;
3249        }
3250        rcu_read_unlock();
3251
3252        in_dev_put(ind);
3253        return found ? 0 : -EADDRNOTAVAIL;
3254}
3255
3256static int get_lladdr(struct net_device *dev, struct in6_addr *addr,
3257                      unsigned char banned_flags)
3258{
3259        struct inet6_dev *idev;
3260        int err = -EADDRNOTAVAIL;
3261
3262        rcu_read_lock();
3263        idev = __in6_dev_get(dev);
3264        if (idev != NULL) {
3265                struct inet6_ifaddr *ifp;
3266
3267                read_lock_bh(&idev->lock);
3268                list_for_each_entry(ifp, &idev->addr_list, if_list) {
3269                        if (ifp->scope == IFA_LINK &&
3270                            !(ifp->flags & banned_flags)) {
3271                                memcpy(addr, &ifp->addr, 16);
3272                                err = 0;
3273                                break;
3274                        }
3275                }
3276                read_unlock_bh(&idev->lock);
3277        }
3278        rcu_read_unlock();
3279        return err;
3280}
3281
3282static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
3283{
3284        struct in6_addr uninitialized_var(addr);
3285        struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
3286        struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
3287
3288        if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) {
3289                memcpy(la6->sin6_addr.s6_addr, &addr, 16);
3290                memcpy(ra6->sin6_addr.s6_addr, &addr, 16);
3291                return 0;
3292        }
3293        return -EADDRNOTAVAIL;
3294}
3295
3296int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3297{
3298        struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
3299        struct c4iw_ep *ep;
3300        int err = 0;
3301        struct sockaddr_in *laddr;
3302        struct sockaddr_in *raddr;
3303        struct sockaddr_in6 *laddr6;
3304        struct sockaddr_in6 *raddr6;
3305        __u8 *ra;
3306        int iptype;
3307
3308        if ((conn_param->ord > cur_max_read_depth(dev)) ||
3309            (conn_param->ird > cur_max_read_depth(dev))) {
3310                err = -EINVAL;
3311                goto out;
3312        }
3313        ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
3314        if (!ep) {
3315                pr_err("%s - cannot alloc ep\n", __func__);
3316                err = -ENOMEM;
3317                goto out;
3318        }
3319
3320        skb_queue_head_init(&ep->com.ep_skb_list);
3321        if (alloc_ep_skb_list(&ep->com.ep_skb_list, CN_MAX_CON_BUF)) {
3322                err = -ENOMEM;
3323                goto fail1;
3324        }
3325
3326        timer_setup(&ep->timer, ep_timeout, 0);
3327        ep->plen = conn_param->private_data_len;
3328        if (ep->plen)
3329                memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
3330                       conn_param->private_data, ep->plen);
3331        ep->ird = conn_param->ird;
3332        ep->ord = conn_param->ord;
3333
3334        if (peer2peer && ep->ord == 0)
3335                ep->ord = 1;
3336
3337        ep->com.cm_id = cm_id;
3338        ref_cm_id(&ep->com);
3339        cm_id->provider_data = ep;
3340        ep->com.dev = dev;
3341        ep->com.qp = get_qhp(dev, conn_param->qpn);
3342        if (!ep->com.qp) {
3343                pr_warn("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
3344                err = -EINVAL;
3345                goto fail2;
3346        }
3347        ref_qp(ep);
3348        pr_debug("qpn 0x%x qp %p cm_id %p\n", conn_param->qpn,
3349                 ep->com.qp, cm_id);
3350
3351        /*
3352         * Allocate an active TID to initiate a TCP connection.
3353         */
3354        ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep);
3355        if (ep->atid == -1) {
3356                pr_err("%s - cannot alloc atid\n", __func__);
3357                err = -ENOMEM;
3358                goto fail2;
3359        }
3360        err = xa_insert_irq(&dev->atids, ep->atid, ep, GFP_KERNEL);
3361        if (err)
3362                goto fail5;
3363
3364        memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3365               sizeof(ep->com.local_addr));
3366        memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
3367               sizeof(ep->com.remote_addr));
3368
3369        laddr = (struct sockaddr_in *)&ep->com.local_addr;
3370        raddr = (struct sockaddr_in *)&ep->com.remote_addr;
3371        laddr6 = (struct sockaddr_in6 *)&ep->com.local_addr;
3372        raddr6 = (struct sockaddr_in6 *) &ep->com.remote_addr;
3373
3374        if (cm_id->m_remote_addr.ss_family == AF_INET) {
3375                iptype = 4;
3376                ra = (__u8 *)&raddr->sin_addr;
3377
3378                /*
3379                 * Handle loopback requests to INADDR_ANY.
3380                 */
3381                if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
3382                        err = pick_local_ipaddrs(dev, cm_id);
3383                        if (err)
3384                                goto fail2;
3385                }
3386
3387                /* find a route */
3388                pr_debug("saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
3389                         &laddr->sin_addr, ntohs(laddr->sin_port),
3390                         ra, ntohs(raddr->sin_port));
3391                ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
3392                                          laddr->sin_addr.s_addr,
3393                                          raddr->sin_addr.s_addr,
3394                                          laddr->sin_port,
3395                                          raddr->sin_port, cm_id->tos);
3396        } else {
3397                iptype = 6;
3398                ra = (__u8 *)&raddr6->sin6_addr;
3399
3400                /*
3401                 * Handle loopback requests to INADDR_ANY.
3402                 */
3403                if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
3404                        err = pick_local_ip6addrs(dev, cm_id);
3405                        if (err)
3406                                goto fail2;
3407                }
3408
3409                /* find a route */
3410                pr_debug("saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
3411                         laddr6->sin6_addr.s6_addr,
3412                         ntohs(laddr6->sin6_port),
3413                         raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
3414                ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
3415                                           laddr6->sin6_addr.s6_addr,
3416                                           raddr6->sin6_addr.s6_addr,
3417                                           laddr6->sin6_port,
3418                                           raddr6->sin6_port, cm_id->tos,
3419                                           raddr6->sin6_scope_id);
3420        }
3421        if (!ep->dst) {
3422                pr_err("%s - cannot find route\n", __func__);
3423                err = -EHOSTUNREACH;
3424                goto fail3;
3425        }
3426
3427        err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
3428                        ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
3429        if (err) {
3430                pr_err("%s - cannot alloc l2e\n", __func__);
3431                goto fail4;
3432        }
3433
3434        pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
3435                 ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
3436                 ep->l2t->idx);
3437
3438        state_set(&ep->com, CONNECTING);
3439        ep->tos = cm_id->tos;
3440
3441        /* send connect request to rnic */
3442        err = send_connect(ep);
3443        if (!err)
3444                goto out;
3445
3446        cxgb4_l2t_release(ep->l2t);
3447fail4:
3448        dst_release(ep->dst);
3449fail3:
3450        xa_erase_irq(&ep->com.dev->atids, ep->atid);
3451fail5:
3452        cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
3453fail2:
3454        skb_queue_purge(&ep->com.ep_skb_list);
3455        deref_cm_id(&ep->com);
3456fail1:
3457        c4iw_put_ep(&ep->com);
3458out:
3459        return err;
3460}
3461
3462static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3463{
3464        int err;
3465        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
3466                                    &ep->com.local_addr;
3467
3468        if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
3469                err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
3470                                     (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3471                if (err)
3472                        return err;
3473        }
3474        c4iw_init_wr_wait(ep->com.wr_waitp);
3475        err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
3476                                   ep->stid, &sin6->sin6_addr,
3477                                   sin6->sin6_port,
3478                                   ep->com.dev->rdev.lldi.rxq_ids[0]);
3479        if (!err)
3480                err = c4iw_wait_for_reply(&ep->com.dev->rdev,
3481                                          ep->com.wr_waitp,
3482                                          0, 0, __func__);
3483        else if (err > 0)
3484                err = net_xmit_errno(err);
3485        if (err) {
3486                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3487                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3488                pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
3489                       err, ep->stid,
3490                       sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
3491        }
3492        return err;
3493}
3494
3495static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3496{
3497        int err;
3498        struct sockaddr_in *sin = (struct sockaddr_in *)
3499                                  &ep->com.local_addr;
3500
3501        if (dev->rdev.lldi.enable_fw_ofld_conn) {
3502                do {
3503                        err = cxgb4_create_server_filter(
3504                                ep->com.dev->rdev.lldi.ports[0], ep->stid,
3505                                sin->sin_addr.s_addr, sin->sin_port, 0,
3506                                ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0);
3507                        if (err == -EBUSY) {
3508                                if (c4iw_fatal_error(&ep->com.dev->rdev)) {
3509                                        err = -EIO;
3510                                        break;
3511                                }
3512                                set_current_state(TASK_UNINTERRUPTIBLE);
3513                                schedule_timeout(usecs_to_jiffies(100));
3514                        }
3515                } while (err == -EBUSY);
3516        } else {
3517                c4iw_init_wr_wait(ep->com.wr_waitp);
3518                err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0],
3519                                ep->stid, sin->sin_addr.s_addr, sin->sin_port,
3520                                0, ep->com.dev->rdev.lldi.rxq_ids[0]);
3521                if (!err)
3522                        err = c4iw_wait_for_reply(&ep->com.dev->rdev,
3523                                                  ep->com.wr_waitp,
3524                                                  0, 0, __func__);
3525                else if (err > 0)
3526                        err = net_xmit_errno(err);
3527        }
3528        if (err)
3529                pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n"
3530                       , err, ep->stid,
3531                       &sin->sin_addr, ntohs(sin->sin_port));
3532        return err;
3533}
3534
3535int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3536{
3537        int err = 0;
3538        struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
3539        struct c4iw_listen_ep *ep;
3540
3541        might_sleep();
3542
3543        ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
3544        if (!ep) {
3545                pr_err("%s - cannot alloc ep\n", __func__);
3546                err = -ENOMEM;
3547                goto fail1;
3548        }
3549        skb_queue_head_init(&ep->com.ep_skb_list);
3550        pr_debug("ep %p\n", ep);
3551        ep->com.cm_id = cm_id;
3552        ref_cm_id(&ep->com);
3553        ep->com.dev = dev;
3554        ep->backlog = backlog;
3555        memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3556               sizeof(ep->com.local_addr));
3557
3558        /*
3559         * Allocate a server TID.
3560         */
3561        if (dev->rdev.lldi.enable_fw_ofld_conn &&
3562            ep->com.local_addr.ss_family == AF_INET)
3563                ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids,
3564                                             cm_id->m_local_addr.ss_family, ep);
3565        else
3566                ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids,
3567                                            cm_id->m_local_addr.ss_family, ep);
3568
3569        if (ep->stid == -1) {
3570                pr_err("%s - cannot alloc stid\n", __func__);
3571                err = -ENOMEM;
3572                goto fail2;
3573        }
3574        err = xa_insert_irq(&dev->stids, ep->stid, ep, GFP_KERNEL);
3575        if (err)
3576                goto fail3;
3577
3578        state_set(&ep->com, LISTEN);
3579        if (ep->com.local_addr.ss_family == AF_INET)
3580                err = create_server4(dev, ep);
3581        else
3582                err = create_server6(dev, ep);
3583        if (!err) {
3584                cm_id->provider_data = ep;
3585                goto out;
3586        }
3587        xa_erase_irq(&ep->com.dev->stids, ep->stid);
3588fail3:
3589        cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3590                        ep->com.local_addr.ss_family);
3591fail2:
3592        deref_cm_id(&ep->com);
3593        c4iw_put_ep(&ep->com);
3594fail1:
3595out:
3596        return err;
3597}
3598
3599int c4iw_destroy_listen(struct iw_cm_id *cm_id)
3600{
3601        int err;
3602        struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
3603
3604        pr_debug("ep %p\n", ep);
3605
3606        might_sleep();
3607        state_set(&ep->com, DEAD);
3608        if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn &&
3609            ep->com.local_addr.ss_family == AF_INET) {
3610                err = cxgb4_remove_server_filter(
3611                        ep->com.dev->rdev.lldi.ports[0], ep->stid,
3612                        ep->com.dev->rdev.lldi.rxq_ids[0], 0);
3613        } else {
3614                struct sockaddr_in6 *sin6;
3615                c4iw_init_wr_wait(ep->com.wr_waitp);
3616                err = cxgb4_remove_server(
3617                                ep->com.dev->rdev.lldi.ports[0], ep->stid,
3618                                ep->com.dev->rdev.lldi.rxq_ids[0], 0);
3619                if (err)
3620                        goto done;
3621                err = c4iw_wait_for_reply(&ep->com.dev->rdev, ep->com.wr_waitp,
3622                                          0, 0, __func__);
3623                sin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
3624                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3625                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3626        }
3627        xa_erase_irq(&ep->com.dev->stids, ep->stid);
3628        cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3629                        ep->com.local_addr.ss_family);
3630done:
3631        deref_cm_id(&ep->com);
3632        c4iw_put_ep(&ep->com);
3633        return err;
3634}
3635
3636int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
3637{
3638        int ret = 0;
3639        int close = 0;
3640        int fatal = 0;
3641        struct c4iw_rdev *rdev;
3642
3643        mutex_lock(&ep->com.mutex);
3644
3645        pr_debug("ep %p state %s, abrupt %d\n", ep,
3646                 states[ep->com.state], abrupt);
3647
3648        /*
3649         * Ref the ep here in case we have fatal errors causing the
3650         * ep to be released and freed.
3651         */
3652        c4iw_get_ep(&ep->com);
3653
3654        rdev = &ep->com.dev->rdev;
3655        if (c4iw_fatal_error(rdev)) {
3656                fatal = 1;
3657                close_complete_upcall(ep, -EIO);
3658                ep->com.state = DEAD;
3659        }
3660        switch (ep->com.state) {
3661        case MPA_REQ_WAIT:
3662        case MPA_REQ_SENT:
3663        case MPA_REQ_RCVD:
3664        case MPA_REP_SENT:
3665        case FPDU_MODE:
3666        case CONNECTING:
3667                close = 1;
3668                if (abrupt)
3669                        ep->com.state = ABORTING;
3670                else {
3671                        ep->com.state = CLOSING;
3672
3673                        /*
3674                         * if we close before we see the fw4_ack() then we fix
3675                         * up the timer state since we're reusing it.
3676                         */
3677                        if (ep->mpa_skb &&
3678                            test_bit(STOP_MPA_TIMER, &ep->com.flags)) {
3679                                clear_bit(STOP_MPA_TIMER, &ep->com.flags);
3680                                stop_ep_timer(ep);
3681                        }
3682                        start_ep_timer(ep);
3683                }
3684                set_bit(CLOSE_SENT, &ep->com.flags);
3685                break;
3686        case CLOSING:
3687                if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
3688                        close = 1;
3689                        if (abrupt) {
3690                                (void)stop_ep_timer(ep);
3691                                ep->com.state = ABORTING;
3692                        } else
3693                                ep->com.state = MORIBUND;
3694                }
3695                break;
3696        case MORIBUND:
3697        case ABORTING:
3698        case DEAD:
3699                pr_debug("ignoring disconnect ep %p state %u\n",
3700                         ep, ep->com.state);
3701                break;
3702        default:
3703                WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
3704                break;
3705        }
3706
3707        if (close) {
3708                if (abrupt) {
3709                        set_bit(EP_DISC_ABORT, &ep->com.history);
3710                        ret = send_abort(ep);
3711                } else {
3712                        set_bit(EP_DISC_CLOSE, &ep->com.history);
3713                        ret = send_halfclose(ep);
3714                }
3715                if (ret) {
3716                        set_bit(EP_DISC_FAIL, &ep->com.history);
3717                        if (!abrupt) {
3718                                stop_ep_timer(ep);
3719                                close_complete_upcall(ep, -EIO);
3720                        }
3721                        if (ep->com.qp) {
3722                                struct c4iw_qp_attributes attrs;
3723
3724                                attrs.next_state = C4IW_QP_STATE_ERROR;
3725                                ret = c4iw_modify_qp(ep->com.qp->rhp,
3726                                                     ep->com.qp,
3727                                                     C4IW_QP_ATTR_NEXT_STATE,
3728                                                     &attrs, 1);
3729                                if (ret)
3730                                        pr_err("%s - qp <- error failed!\n",
3731                                               __func__);
3732                        }
3733                        fatal = 1;
3734                }
3735        }
3736        mutex_unlock(&ep->com.mutex);
3737        c4iw_put_ep(&ep->com);
3738        if (fatal)
3739                release_ep_resources(ep);
3740        return ret;
3741}
3742
3743static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3744                        struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
3745{
3746        struct c4iw_ep *ep;
3747        int atid = be32_to_cpu(req->tid);
3748
3749        ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids,
3750                                           (__force u32) req->tid);
3751        if (!ep)
3752                return;
3753
3754        switch (req->retval) {
3755        case FW_ENOMEM:
3756                set_bit(ACT_RETRY_NOMEM, &ep->com.history);
3757                if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
3758                        send_fw_act_open_req(ep, atid);
3759                        return;
3760                }
3761                /* fall through */
3762        case FW_EADDRINUSE:
3763                set_bit(ACT_RETRY_INUSE, &ep->com.history);
3764                if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
3765                        send_fw_act_open_req(ep, atid);
3766                        return;
3767                }
3768                break;
3769        default:
3770                pr_info("%s unexpected ofld conn wr retval %d\n",
3771                       __func__, req->retval);
3772                break;
3773        }
3774        pr_err("active ofld_connect_wr failure %d atid %d\n",
3775               req->retval, atid);
3776        mutex_lock(&dev->rdev.stats.lock);
3777        dev->rdev.stats.act_ofld_conn_fails++;
3778        mutex_unlock(&dev->rdev.stats.lock);
3779        connect_reply_upcall(ep, status2errno(req->retval));
3780        state_set(&ep->com, DEAD);
3781        if (ep->com.remote_addr.ss_family == AF_INET6) {
3782                struct sockaddr_in6 *sin6 =
3783                        (struct sockaddr_in6 *)&ep->com.local_addr;
3784                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3785                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3786        }
3787        xa_erase_irq(&dev->atids, atid);
3788        cxgb4_free_atid(dev->rdev.lldi.tids, atid);
3789        dst_release(ep->dst);
3790        cxgb4_l2t_release(ep->l2t);
3791        c4iw_put_ep(&ep->com);
3792}
3793
3794static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3795                        struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
3796{
3797        struct sk_buff *rpl_skb;
3798        struct cpl_pass_accept_req *cpl;
3799        int ret;
3800
3801        rpl_skb = (struct sk_buff *)(unsigned long)req->cookie;
3802        if (req->retval) {
3803                pr_err("%s passive open failure %d\n", __func__, req->retval);
3804                mutex_lock(&dev->rdev.stats.lock);
3805                dev->rdev.stats.pas_ofld_conn_fails++;
3806                mutex_unlock(&dev->rdev.stats.lock);
3807                kfree_skb(rpl_skb);
3808        } else {
3809                cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb);
3810                OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ,
3811                                        (__force u32) htonl(
3812                                        (__force u32) req->tid)));
3813                ret = pass_accept_req(dev, rpl_skb);
3814                if (!ret)
3815                        kfree_skb(rpl_skb);
3816        }
3817        return;
3818}
3819
3820static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word)
3821{
3822        u64 tlo = be64_to_cpu(tcb[((31 - word) / 2)]);
3823        u64 thi = be64_to_cpu(tcb[((31 - word) / 2) - 1]);
3824        u64 t;
3825        u32 shift = 32;
3826
3827        t = (thi << shift) | (tlo >> shift);
3828
3829        return t;
3830}
3831
3832static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift)
3833{
3834        u32 v;
3835        u64 t = be64_to_cpu(tcb[(31 - word) / 2]);
3836
3837        if (word & 0x1)
3838                shift += 32;
3839        v = (t >> shift) & mask;
3840        return v;
3841}
3842
3843static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
3844{
3845        struct cpl_get_tcb_rpl *rpl = cplhdr(skb);
3846        __be64 *tcb = (__be64 *)(rpl + 1);
3847        unsigned int tid = GET_TID(rpl);
3848        struct c4iw_ep *ep;
3849        u64 t_flags_64;
3850        u32 rx_pdu_out;
3851
3852        ep = get_ep_from_tid(dev, tid);
3853        if (!ep)
3854                return 0;
3855        /* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to
3856         * determine if there's a rx PDU feedback event pending.
3857         *
3858         * If that bit is set, it means we'll need to re-read the TCB's
3859         * rq_start value. The final value is the one present in a TCB
3860         * with the TF_RX_PDU_OUT bit cleared.
3861         */
3862
3863        t_flags_64 = t4_tcb_get_field64(tcb, TCB_T_FLAGS_W);
3864        rx_pdu_out = (t_flags_64 & TF_RX_PDU_OUT_V(1)) >> TF_RX_PDU_OUT_S;
3865
3866        c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */
3867        c4iw_put_ep(&ep->com); /* from read_tcb() */
3868
3869        /* If TF_RX_PDU_OUT bit is set, re-read the TCB */
3870        if (rx_pdu_out) {
3871                if (++ep->rx_pdu_out_cnt >= 2) {
3872                        WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n");
3873                        goto cleanup;
3874                }
3875                read_tcb(ep);
3876                return 0;
3877        }
3878
3879        ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_W,
3880                        TCB_RQ_START_S);
3881cleanup:
3882        pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx);
3883
3884        if (test_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags))
3885                finish_peer_abort(dev, ep);
3886        else if (test_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags))
3887                send_abort_req(ep);
3888        else
3889                WARN_ONCE(1, "unexpected state!");
3890
3891        return 0;
3892}
3893
3894static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
3895{
3896        struct cpl_fw6_msg *rpl = cplhdr(skb);
3897        struct cpl_fw6_msg_ofld_connection_wr_rpl *req;
3898
3899        switch (rpl->type) {
3900        case FW6_TYPE_CQE:
3901                c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]);
3902                break;
3903        case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
3904                req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data;
3905                switch (req->t_state) {
3906                case TCP_SYN_SENT:
3907                        active_ofld_conn_reply(dev, skb, req);
3908                        break;
3909                case TCP_SYN_RECV:
3910                        passive_ofld_conn_reply(dev, skb, req);
3911                        break;
3912                default:
3913                        pr_err("%s unexpected ofld conn wr state %d\n",
3914                               __func__, req->t_state);
3915                        break;
3916                }
3917                break;
3918        }
3919        return 0;
3920}
3921
3922static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
3923{
3924        __be32 l2info;
3925        __be16 hdr_len, vlantag, len;
3926        u16 eth_hdr_len;
3927        int tcp_hdr_len, ip_hdr_len;
3928        u8 intf;
3929        struct cpl_rx_pkt *cpl = cplhdr(skb);
3930        struct cpl_pass_accept_req *req;
3931        struct tcp_options_received tmp_opt;
3932        struct c4iw_dev *dev;
3933        enum chip_type type;
3934
3935        dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
3936        /* Store values from cpl_rx_pkt in temporary location. */
3937        vlantag = cpl->vlan;
3938        len = cpl->len;
3939        l2info  = cpl->l2info;
3940        hdr_len = cpl->hdr_len;
3941        intf = cpl->iff;
3942
3943        __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header));
3944
3945        /*
3946         * We need to parse the TCP options from SYN packet.
3947         * to generate cpl_pass_accept_req.
3948         */
3949        memset(&tmp_opt, 0, sizeof(tmp_opt));
3950        tcp_clear_options(&tmp_opt);
3951        tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL);
3952
3953        req = __skb_push(skb, sizeof(*req));
3954        memset(req, 0, sizeof(*req));
3955        req->l2info = cpu_to_be16(SYN_INTF_V(intf) |
3956                         SYN_MAC_IDX_V(RX_MACIDX_G(
3957                         be32_to_cpu(l2info))) |
3958                         SYN_XACT_MATCH_F);
3959        type = dev->rdev.lldi.adapter_type;
3960        tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len));
3961        ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len));
3962        req->hdr_len =
3963                cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info))));
3964        if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) {
3965                eth_hdr_len = is_t4(type) ?
3966                                RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) :
3967                                RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info));
3968                req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) |
3969                                            IP_HDR_LEN_V(ip_hdr_len) |
3970                                            ETH_HDR_LEN_V(eth_hdr_len));
3971        } else { /* T6 and later */
3972                eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info));
3973                req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) |
3974                                            T6_IP_HDR_LEN_V(ip_hdr_len) |
3975                                            T6_ETH_HDR_LEN_V(eth_hdr_len));
3976        }
3977        req->vlan = vlantag;
3978        req->len = len;
3979        req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) |
3980                                    PASS_OPEN_TOS_V(tos));
3981        req->tcpopt.mss = htons(tmp_opt.mss_clamp);
3982        if (tmp_opt.wscale_ok)
3983                req->tcpopt.wsf = tmp_opt.snd_wscale;
3984        req->tcpopt.tstamp = tmp_opt.saw_tstamp;
3985        if (tmp_opt.sack_ok)
3986                req->tcpopt.sack = 1;
3987        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0));
3988        return;
3989}
3990
3991static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
3992                                  __be32 laddr, __be16 lport,
3993                                  __be32 raddr, __be16 rport,
3994                                  u32 rcv_isn, u32 filter, u16 window,
3995                                  u32 rss_qid, u8 port_id)
3996{
3997        struct sk_buff *req_skb;
3998        struct fw_ofld_connection_wr *req;
3999        struct cpl_pass_accept_req *cpl = cplhdr(skb);
4000        int ret;
4001
4002        req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
4003        if (!req_skb)
4004                return;
4005        req = __skb_put_zero(req_skb, sizeof(*req));
4006        req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F);
4007        req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
4008        req->le.version_cpl = htonl(FW_OFLD_CONNECTION_WR_CPL_F);
4009        req->le.filter = (__force __be32) filter;
4010        req->le.lport = lport;
4011        req->le.pport = rport;
4012        req->le.u.ipv4.lip = laddr;
4013        req->le.u.ipv4.pip = raddr;
4014        req->tcb.rcv_nxt = htonl(rcv_isn + 1);
4015        req->tcb.rcv_adv = htons(window);
4016        req->tcb.t_state_to_astid =
4017                 htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_RECV) |
4018                        FW_OFLD_CONNECTION_WR_RCV_SCALE_V(cpl->tcpopt.wsf) |
4019                        FW_OFLD_CONNECTION_WR_ASTID_V(
4020                        PASS_OPEN_TID_G(ntohl(cpl->tos_stid))));
4021
4022        /*
4023         * We store the qid in opt2 which will be used by the firmware
4024         * to send us the wr response.
4025         */
4026        req->tcb.opt2 = htonl(RSS_QUEUE_V(rss_qid));
4027
4028        /*
4029         * We initialize the MSS index in TCB to 0xF.
4030         * So that when driver sends cpl_pass_accept_rpl
4031         * TCB picks up the correct value. If this was 0
4032         * TP will ignore any value > 0 for MSS index.
4033         */
4034        req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF));
4035        req->cookie = (uintptr_t)skb;
4036
4037        set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
4038        ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
4039        if (ret < 0) {
4040                pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__,
4041                       ret);
4042                kfree_skb(skb);
4043                kfree_skb(req_skb);
4044        }
4045}
4046
4047/*
4048 * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt
4049 * messages when a filter is being used instead of server to
4050 * redirect a syn packet. When packets hit filter they are redirected
4051 * to the offload queue and driver tries to establish the connection
4052 * using firmware work request.
4053 */
4054static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
4055{
4056        int stid;
4057        unsigned int filter;
4058        struct ethhdr *eh = NULL;
4059        struct vlan_ethhdr *vlan_eh = NULL;
4060        struct iphdr *iph;
4061        struct tcphdr *tcph;
4062        struct rss_header *rss = (void *)skb->data;
4063        struct cpl_rx_pkt *cpl = (void *)skb->data;
4064        struct cpl_pass_accept_req *req = (void *)(rss + 1);
4065        struct l2t_entry *e;
4066        struct dst_entry *dst;
4067        struct c4iw_ep *lep = NULL;
4068        u16 window;
4069        struct port_info *pi;
4070        struct net_device *pdev;
4071        u16 rss_qid, eth_hdr_len;
4072        int step;
4073        struct neighbour *neigh;
4074
4075        /* Drop all non-SYN packets */
4076        if (!(cpl->l2info & cpu_to_be32(RXF_SYN_F)))
4077                goto reject;
4078
4079        /*
4080         * Drop all packets which did not hit the filter.
4081         * Unlikely to happen.
4082         */
4083        if (!(rss->filter_hit && rss->filter_tid))
4084                goto reject;
4085
4086        /*
4087         * Calculate the server tid from filter hit index from cpl_rx_pkt.
4088         */
4089        stid = (__force int) cpu_to_be32((__force u32) rss->hash_val);
4090
4091        lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
4092        if (!lep) {
4093                pr_warn("%s connect request on invalid stid %d\n",
4094                        __func__, stid);
4095                goto reject;
4096        }
4097
4098        switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) {
4099        case CHELSIO_T4:
4100                eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4101                break;
4102        case CHELSIO_T5:
4103                eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4104                break;
4105        case CHELSIO_T6:
4106                eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4107                break;
4108        default:
4109                pr_err("T%d Chip is not supported\n",
4110                       CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type));
4111                goto reject;
4112        }
4113
4114        if (eth_hdr_len == ETH_HLEN) {
4115                eh = (struct ethhdr *)(req + 1);
4116                iph = (struct iphdr *)(eh + 1);
4117        } else {
4118                vlan_eh = (struct vlan_ethhdr *)(req + 1);
4119                iph = (struct iphdr *)(vlan_eh + 1);
4120                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan));
4121        }
4122
4123        if (iph->version != 0x4)
4124                goto reject;
4125
4126        tcph = (struct tcphdr *)(iph + 1);
4127        skb_set_network_header(skb, (void *)iph - (void *)rss);
4128        skb_set_transport_header(skb, (void *)tcph - (void *)rss);
4129        skb_get(skb);
4130
4131        pr_debug("lip 0x%x lport %u pip 0x%x pport %u tos %d\n",
4132                 ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
4133                 ntohs(tcph->source), iph->tos);
4134
4135        dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
4136                              iph->daddr, iph->saddr, tcph->dest,
4137                              tcph->source, iph->tos);
4138        if (!dst) {
4139                pr_err("%s - failed to find dst entry!\n", __func__);
4140                goto reject;
4141        }
4142        neigh = dst_neigh_lookup_skb(dst, skb);
4143
4144        if (!neigh) {
4145                pr_err("%s - failed to allocate neigh!\n", __func__);
4146                goto free_dst;
4147        }
4148
4149        if (neigh->dev->flags & IFF_LOOPBACK) {
4150                pdev = ip_dev_find(&init_net, iph->daddr);
4151                e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
4152                                    pdev, 0);
4153                pi = (struct port_info *)netdev_priv(pdev);
4154                dev_put(pdev);
4155        } else {
4156                pdev = get_real_dev(neigh->dev);
4157                e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
4158                                        pdev, 0);
4159                pi = (struct port_info *)netdev_priv(pdev);
4160        }
4161        neigh_release(neigh);
4162        if (!e) {
4163                pr_err("%s - failed to allocate l2t entry!\n",
4164                       __func__);
4165                goto free_dst;
4166        }
4167
4168        step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan;
4169        rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step];
4170        window = (__force u16) htons((__force u16)tcph->window);
4171
4172        /* Calcuate filter portion for LE region. */
4173        filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple(
4174                                                    dev->rdev.lldi.ports[0],
4175                                                    e));
4176
4177        /*
4178         * Synthesize the cpl_pass_accept_req. We have everything except the
4179         * TID. Once firmware sends a reply with TID we update the TID field
4180         * in cpl and pass it through the regular cpl_pass_accept_req path.
4181         */
4182        build_cpl_pass_accept_req(skb, stid, iph->tos);
4183        send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr,
4184                              tcph->source, ntohl(tcph->seq), filter, window,
4185                              rss_qid, pi->port_id);
4186        cxgb4_l2t_release(e);
4187free_dst:
4188        dst_release(dst);
4189reject:
4190        if (lep)
4191                c4iw_put_ep(&lep->com);
4192        return 0;
4193}
4194
4195/*
4196 * These are the real handlers that are called from a
4197 * work queue.
4198 */
4199static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = {
4200        [CPL_ACT_ESTABLISH] = act_establish,
4201        [CPL_ACT_OPEN_RPL] = act_open_rpl,
4202        [CPL_RX_DATA] = rx_data,
4203        [CPL_ABORT_RPL_RSS] = abort_rpl,
4204        [CPL_ABORT_RPL] = abort_rpl,
4205        [CPL_PASS_OPEN_RPL] = pass_open_rpl,
4206        [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
4207        [CPL_PASS_ACCEPT_REQ] = pass_accept_req,
4208        [CPL_PASS_ESTABLISH] = pass_establish,
4209        [CPL_PEER_CLOSE] = peer_close,
4210        [CPL_ABORT_REQ_RSS] = peer_abort,
4211        [CPL_CLOSE_CON_RPL] = close_con_rpl,
4212        [CPL_RDMA_TERMINATE] = terminate,
4213        [CPL_FW4_ACK] = fw4_ack,
4214        [CPL_GET_TCB_RPL] = read_tcb_rpl,
4215        [CPL_FW6_MSG] = deferred_fw6_msg,
4216        [CPL_RX_PKT] = rx_pkt,
4217        [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe,
4218        [FAKE_CPL_PASS_PUT_EP_SAFE] = _put_pass_ep_safe
4219};
4220
4221static void process_timeout(struct c4iw_ep *ep)
4222{
4223        struct c4iw_qp_attributes attrs;
4224        int abort = 1;
4225
4226        mutex_lock(&ep->com.mutex);
4227        pr_debug("ep %p tid %u state %d\n", ep, ep->hwtid, ep->com.state);
4228        set_bit(TIMEDOUT, &ep->com.history);
4229        switch (ep->com.state) {
4230        case MPA_REQ_SENT:
4231                connect_reply_upcall(ep, -ETIMEDOUT);
4232                break;
4233        case MPA_REQ_WAIT:
4234        case MPA_REQ_RCVD:
4235        case MPA_REP_SENT:
4236        case FPDU_MODE:
4237                break;
4238        case CLOSING:
4239        case MORIBUND:
4240                if (ep->com.cm_id && ep->com.qp) {
4241                        attrs.next_state = C4IW_QP_STATE_ERROR;
4242                        c4iw_modify_qp(ep->com.qp->rhp,
4243                                     ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
4244                                     &attrs, 1);
4245                }
4246                close_complete_upcall(ep, -ETIMEDOUT);
4247                break;
4248        case ABORTING:
4249        case DEAD:
4250
4251                /*
4252                 * These states are expected if the ep timed out at the same
4253                 * time as another thread was calling stop_ep_timer().
4254                 * So we silently do nothing for these states.
4255                 */
4256                abort = 0;
4257                break;
4258        default:
4259                WARN(1, "%s unexpected state ep %p tid %u state %u\n",
4260                        __func__, ep, ep->hwtid, ep->com.state);
4261                abort = 0;
4262        }
4263        mutex_unlock(&ep->com.mutex);
4264        if (abort)
4265                c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
4266        c4iw_put_ep(&ep->com);
4267}
4268
4269static void process_timedout_eps(void)
4270{
4271        struct c4iw_ep *ep;
4272
4273        spin_lock_irq(&timeout_lock);
4274        while (!list_empty(&timeout_list)) {
4275                struct list_head *tmp;
4276
4277                tmp = timeout_list.next;
4278                list_del(tmp);
4279                tmp->next = NULL;
4280                tmp->prev = NULL;
4281                spin_unlock_irq(&timeout_lock);
4282                ep = list_entry(tmp, struct c4iw_ep, entry);
4283                process_timeout(ep);
4284                spin_lock_irq(&timeout_lock);
4285        }
4286        spin_unlock_irq(&timeout_lock);
4287}
4288
4289static void process_work(struct work_struct *work)
4290{
4291        struct sk_buff *skb = NULL;
4292        struct c4iw_dev *dev;
4293        struct cpl_act_establish *rpl;
4294        unsigned int opcode;
4295        int ret;
4296
4297        process_timedout_eps();
4298        while ((skb = skb_dequeue(&rxq))) {
4299                rpl = cplhdr(skb);
4300                dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
4301                opcode = rpl->ot.opcode;
4302
4303                if (opcode >= ARRAY_SIZE(work_handlers) ||
4304                    !work_handlers[opcode]) {
4305                        pr_err("No handler for opcode 0x%x.\n", opcode);
4306                        kfree_skb(skb);
4307                } else {
4308                        ret = work_handlers[opcode](dev, skb);
4309                        if (!ret)
4310                                kfree_skb(skb);
4311                }
4312                process_timedout_eps();
4313        }
4314}
4315
4316static DECLARE_WORK(skb_work, process_work);
4317
4318static void ep_timeout(struct timer_list *t)
4319{
4320        struct c4iw_ep *ep = from_timer(ep, t, timer);
4321        int kickit = 0;
4322
4323        spin_lock(&timeout_lock);
4324        if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
4325                /*
4326                 * Only insert if it is not already on the list.
4327                 */
4328                if (!ep->entry.next) {
4329                        list_add_tail(&ep->entry, &timeout_list);
4330                        kickit = 1;
4331                }
4332        }
4333        spin_unlock(&timeout_lock);
4334        if (kickit)
4335                queue_work(workq, &skb_work);
4336}
4337
4338/*
4339 * All the CM events are handled on a work queue to have a safe context.
4340 */
4341static int sched(struct c4iw_dev *dev, struct sk_buff *skb)
4342{
4343
4344        /*
4345         * Save dev in the skb->cb area.
4346         */
4347        *((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev;
4348
4349        /*
4350         * Queue the skb and schedule the worker thread.
4351         */
4352        skb_queue_tail(&rxq, skb);
4353        queue_work(workq, &skb_work);
4354        return 0;
4355}
4356
4357static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
4358{
4359        struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
4360
4361        if (rpl->status != CPL_ERR_NONE) {
4362                pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
4363                       rpl->status, GET_TID(rpl));
4364        }
4365        kfree_skb(skb);
4366        return 0;
4367}
4368
4369static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
4370{
4371        struct cpl_fw6_msg *rpl = cplhdr(skb);
4372        struct c4iw_wr_wait *wr_waitp;
4373        int ret;
4374
4375        pr_debug("type %u\n", rpl->type);
4376
4377        switch (rpl->type) {
4378        case FW6_TYPE_WR_RPL:
4379                ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
4380                wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
4381                pr_debug("wr_waitp %p ret %u\n", wr_waitp, ret);
4382                if (wr_waitp)
4383                        c4iw_wake_up_deref(wr_waitp, ret ? -ret : 0);
4384                kfree_skb(skb);
4385                break;
4386        case FW6_TYPE_CQE:
4387        case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
4388                sched(dev, skb);
4389                break;
4390        default:
4391                pr_err("%s unexpected fw6 msg type %u\n",
4392                       __func__, rpl->type);
4393                kfree_skb(skb);
4394                break;
4395        }
4396        return 0;
4397}
4398
4399static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
4400{
4401        struct cpl_abort_req_rss *req = cplhdr(skb);
4402        struct c4iw_ep *ep;
4403        unsigned int tid = GET_TID(req);
4404
4405        ep = get_ep_from_tid(dev, tid);
4406        /* This EP will be dereferenced in peer_abort() */
4407        if (!ep) {
4408                pr_warn("Abort on non-existent endpoint, tid %d\n", tid);
4409                kfree_skb(skb);
4410                return 0;
4411        }
4412        if (cxgb_is_neg_adv(req->status)) {
4413                pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
4414                         ep->hwtid, req->status,
4415                         neg_adv_str(req->status));
4416                goto out;
4417        }
4418        pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state);
4419
4420        c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
4421out:
4422        sched(dev, skb);
4423        return 0;
4424}
4425
4426/*
4427 * Most upcalls from the T4 Core go to sched() to
4428 * schedule the processing on a work queue.
4429 */
4430c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
4431        [CPL_ACT_ESTABLISH] = sched,
4432        [CPL_ACT_OPEN_RPL] = sched,
4433        [CPL_RX_DATA] = sched,
4434        [CPL_ABORT_RPL_RSS] = sched,
4435        [CPL_ABORT_RPL] = sched,
4436        [CPL_PASS_OPEN_RPL] = sched,
4437        [CPL_CLOSE_LISTSRV_RPL] = sched,
4438        [CPL_PASS_ACCEPT_REQ] = sched,
4439        [CPL_PASS_ESTABLISH] = sched,
4440        [CPL_PEER_CLOSE] = sched,
4441        [CPL_CLOSE_CON_RPL] = sched,
4442        [CPL_ABORT_REQ_RSS] = peer_abort_intr,
4443        [CPL_RDMA_TERMINATE] = sched,
4444        [CPL_FW4_ACK] = sched,
4445        [CPL_SET_TCB_RPL] = set_tcb_rpl,
4446        [CPL_GET_TCB_RPL] = sched,
4447        [CPL_FW6_MSG] = fw6_msg,
4448        [CPL_RX_PKT] = sched
4449};
4450
4451int __init c4iw_cm_init(void)
4452{
4453        spin_lock_init(&timeout_lock);
4454        skb_queue_head_init(&rxq);
4455
4456        workq = alloc_ordered_workqueue("iw_cxgb4", WQ_MEM_RECLAIM);
4457        if (!workq)
4458                return -ENOMEM;
4459
4460        return 0;
4461}
4462
4463void c4iw_cm_term(void)
4464{
4465        WARN_ON(!list_empty(&timeout_list));
4466        flush_workqueue(workq);
4467        destroy_workqueue(workq);
4468}
4469