linux/drivers/infiniband/hw/cxgb3/iwch_cm.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/module.h>
  33#include <linux/list.h>
  34#include <linux/slab.h>
  35#include <linux/workqueue.h>
  36#include <linux/skbuff.h>
  37#include <linux/timer.h>
  38#include <linux/notifier.h>
  39#include <linux/inetdevice.h>
  40
  41#include <net/neighbour.h>
  42#include <net/netevent.h>
  43#include <net/route.h>
  44
  45#include "tcb.h"
  46#include "cxgb3_offload.h"
  47#include "iwch.h"
  48#include "iwch_provider.h"
  49#include "iwch_cm.h"
  50
  51static char *states[] = {
  52        "idle",
  53        "listen",
  54        "connecting",
  55        "mpa_wait_req",
  56        "mpa_req_sent",
  57        "mpa_req_rcvd",
  58        "mpa_rep_sent",
  59        "fpdu_mode",
  60        "aborting",
  61        "closing",
  62        "moribund",
  63        "dead",
  64        NULL,
  65};
  66
  67int peer2peer = 0;
  68module_param(peer2peer, int, 0644);
  69MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
  70
  71static int ep_timeout_secs = 60;
  72module_param(ep_timeout_secs, int, 0644);
  73MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
  74                                   "in seconds (default=60)");
  75
  76static int mpa_rev = 1;
  77module_param(mpa_rev, int, 0644);
  78MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
  79                 "1 is spec compliant. (default=1)");
  80
  81static int markers_enabled = 0;
  82module_param(markers_enabled, int, 0644);
  83MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
  84
  85static int crc_enabled = 1;
  86module_param(crc_enabled, int, 0644);
  87MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
  88
  89static int rcv_win = 256 * 1024;
  90module_param(rcv_win, int, 0644);
  91MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)");
  92
  93static int snd_win = 32 * 1024;
  94module_param(snd_win, int, 0644);
  95MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)");
  96
  97static unsigned int nocong = 0;
  98module_param(nocong, uint, 0644);
  99MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)");
 100
 101static unsigned int cong_flavor = 1;
 102module_param(cong_flavor, uint, 0644);
 103MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)");
 104
 105static struct workqueue_struct *workq;
 106
 107static struct sk_buff_head rxq;
 108
 109static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
 110static void ep_timeout(struct timer_list *t);
 111static void connect_reply_upcall(struct iwch_ep *ep, int status);
 112
 113static void start_ep_timer(struct iwch_ep *ep)
 114{
 115        pr_debug("%s ep %p\n", __func__, ep);
 116        if (timer_pending(&ep->timer)) {
 117                pr_debug("%s stopped / restarted timer ep %p\n", __func__, ep);
 118                del_timer_sync(&ep->timer);
 119        } else
 120                get_ep(&ep->com);
 121        ep->timer.expires = jiffies + ep_timeout_secs * HZ;
 122        add_timer(&ep->timer);
 123}
 124
 125static void stop_ep_timer(struct iwch_ep *ep)
 126{
 127        pr_debug("%s ep %p\n", __func__, ep);
 128        if (!timer_pending(&ep->timer)) {
 129                WARN(1, "%s timer stopped when its not running!  ep %p state %u\n",
 130                        __func__, ep, ep->com.state);
 131                return;
 132        }
 133        del_timer_sync(&ep->timer);
 134        put_ep(&ep->com);
 135}
 136
 137static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e)
 138{
 139        int     error = 0;
 140        struct cxio_rdev *rdev;
 141
 142        rdev = (struct cxio_rdev *)tdev->ulp;
 143        if (cxio_fatal_error(rdev)) {
 144                kfree_skb(skb);
 145                return -EIO;
 146        }
 147        error = l2t_send(tdev, skb, l2e);
 148        if (error < 0)
 149                kfree_skb(skb);
 150        return error < 0 ? error : 0;
 151}
 152
 153int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
 154{
 155        int     error = 0;
 156        struct cxio_rdev *rdev;
 157
 158        rdev = (struct cxio_rdev *)tdev->ulp;
 159        if (cxio_fatal_error(rdev)) {
 160                kfree_skb(skb);
 161                return -EIO;
 162        }
 163        error = cxgb3_ofld_send(tdev, skb);
 164        if (error < 0)
 165                kfree_skb(skb);
 166        return error < 0 ? error : 0;
 167}
 168
 169static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
 170{
 171        struct cpl_tid_release *req;
 172
 173        skb = get_skb(skb, sizeof *req, GFP_KERNEL);
 174        if (!skb)
 175                return;
 176        req = skb_put(skb, sizeof(*req));
 177        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 178        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
 179        skb->priority = CPL_PRIORITY_SETUP;
 180        iwch_cxgb3_ofld_send(tdev, skb);
 181        return;
 182}
 183
 184int iwch_quiesce_tid(struct iwch_ep *ep)
 185{
 186        struct cpl_set_tcb_field *req;
 187        struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 188
 189        if (!skb)
 190                return -ENOMEM;
 191        req = skb_put(skb, sizeof(*req));
 192        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 193        req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 194        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
 195        req->reply = 0;
 196        req->cpu_idx = 0;
 197        req->word = htons(W_TCB_RX_QUIESCE);
 198        req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
 199        req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
 200
 201        skb->priority = CPL_PRIORITY_DATA;
 202        return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
 203}
 204
 205int iwch_resume_tid(struct iwch_ep *ep)
 206{
 207        struct cpl_set_tcb_field *req;
 208        struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 209
 210        if (!skb)
 211                return -ENOMEM;
 212        req = skb_put(skb, sizeof(*req));
 213        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 214        req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 215        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
 216        req->reply = 0;
 217        req->cpu_idx = 0;
 218        req->word = htons(W_TCB_RX_QUIESCE);
 219        req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
 220        req->val = 0;
 221
 222        skb->priority = CPL_PRIORITY_DATA;
 223        return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
 224}
 225
 226static void set_emss(struct iwch_ep *ep, u16 opt)
 227{
 228        pr_debug("%s ep %p opt %u\n", __func__, ep, opt);
 229        ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40;
 230        if (G_TCPOPT_TSTAMP(opt))
 231                ep->emss -= 12;
 232        if (ep->emss < 128)
 233                ep->emss = 128;
 234        pr_debug("emss=%d\n", ep->emss);
 235}
 236
 237static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
 238{
 239        unsigned long flags;
 240        enum iwch_ep_state state;
 241
 242        spin_lock_irqsave(&epc->lock, flags);
 243        state = epc->state;
 244        spin_unlock_irqrestore(&epc->lock, flags);
 245        return state;
 246}
 247
 248static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
 249{
 250        epc->state = new;
 251}
 252
 253static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
 254{
 255        unsigned long flags;
 256
 257        spin_lock_irqsave(&epc->lock, flags);
 258        pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
 259        __state_set(epc, new);
 260        spin_unlock_irqrestore(&epc->lock, flags);
 261        return;
 262}
 263
 264static void *alloc_ep(int size, gfp_t gfp)
 265{
 266        struct iwch_ep_common *epc;
 267
 268        epc = kzalloc(size, gfp);
 269        if (epc) {
 270                kref_init(&epc->kref);
 271                spin_lock_init(&epc->lock);
 272                init_waitqueue_head(&epc->waitq);
 273        }
 274        pr_debug("%s alloc ep %p\n", __func__, epc);
 275        return epc;
 276}
 277
 278void __free_ep(struct kref *kref)
 279{
 280        struct iwch_ep *ep;
 281        ep = container_of(container_of(kref, struct iwch_ep_common, kref),
 282                          struct iwch_ep, com);
 283        pr_debug("%s ep %p state %s\n",
 284                 __func__, ep, states[state_read(&ep->com)]);
 285        if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
 286                cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
 287                dst_release(ep->dst);
 288                l2t_release(ep->com.tdev, ep->l2t);
 289        }
 290        kfree(ep);
 291}
 292
 293static void release_ep_resources(struct iwch_ep *ep)
 294{
 295        pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
 296        set_bit(RELEASE_RESOURCES, &ep->com.flags);
 297        put_ep(&ep->com);
 298}
 299
 300static int status2errno(int status)
 301{
 302        switch (status) {
 303        case CPL_ERR_NONE:
 304                return 0;
 305        case CPL_ERR_CONN_RESET:
 306                return -ECONNRESET;
 307        case CPL_ERR_ARP_MISS:
 308                return -EHOSTUNREACH;
 309        case CPL_ERR_CONN_TIMEDOUT:
 310                return -ETIMEDOUT;
 311        case CPL_ERR_TCAM_FULL:
 312                return -ENOMEM;
 313        case CPL_ERR_CONN_EXIST:
 314                return -EADDRINUSE;
 315        default:
 316                return -EIO;
 317        }
 318}
 319
 320/*
 321 * Try and reuse skbs already allocated...
 322 */
 323static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
 324{
 325        if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
 326                skb_trim(skb, 0);
 327                skb_get(skb);
 328        } else {
 329                skb = alloc_skb(len, gfp);
 330        }
 331        return skb;
 332}
 333
 334static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip,
 335                                 __be32 peer_ip, __be16 local_port,
 336                                 __be16 peer_port, u8 tos)
 337{
 338        struct rtable *rt;
 339        struct flowi4 fl4;
 340
 341        rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
 342                                   peer_port, local_port, IPPROTO_TCP,
 343                                   tos, 0);
 344        if (IS_ERR(rt))
 345                return NULL;
 346        return rt;
 347}
 348
 349static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu)
 350{
 351        int i = 0;
 352
 353        while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
 354                ++i;
 355        return i;
 356}
 357
 358static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
 359{
 360        pr_debug("%s t3cdev %p\n", __func__, dev);
 361        kfree_skb(skb);
 362}
 363
 364/*
 365 * Handle an ARP failure for an active open.
 366 */
 367static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
 368{
 369        pr_err("ARP failure during connect\n");
 370        kfree_skb(skb);
 371}
 372
 373/*
 374 * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
 375 * and send it along.
 376 */
 377static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
 378{
 379        struct cpl_abort_req *req = cplhdr(skb);
 380
 381        pr_debug("%s t3cdev %p\n", __func__, dev);
 382        req->cmd = CPL_ABORT_NO_RST;
 383        iwch_cxgb3_ofld_send(dev, skb);
 384}
 385
 386static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
 387{
 388        struct cpl_close_con_req *req;
 389        struct sk_buff *skb;
 390
 391        pr_debug("%s ep %p\n", __func__, ep);
 392        skb = get_skb(NULL, sizeof(*req), gfp);
 393        if (!skb) {
 394                pr_err("%s - failed to alloc skb\n", __func__);
 395                return -ENOMEM;
 396        }
 397        skb->priority = CPL_PRIORITY_DATA;
 398        set_arp_failure_handler(skb, arp_failure_discard);
 399        req = skb_put(skb, sizeof(*req));
 400        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
 401        req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 402        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid));
 403        return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 404}
 405
 406static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
 407{
 408        struct cpl_abort_req *req;
 409
 410        pr_debug("%s ep %p\n", __func__, ep);
 411        skb = get_skb(skb, sizeof(*req), gfp);
 412        if (!skb) {
 413                pr_err("%s - failed to alloc skb\n", __func__);
 414                return -ENOMEM;
 415        }
 416        skb->priority = CPL_PRIORITY_DATA;
 417        set_arp_failure_handler(skb, abort_arp_failure);
 418        req = skb_put_zero(skb, sizeof(*req));
 419        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
 420        req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 421        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
 422        req->cmd = CPL_ABORT_SEND_RST;
 423        return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 424}
 425
 426static int send_connect(struct iwch_ep *ep)
 427{
 428        struct cpl_act_open_req *req;
 429        struct sk_buff *skb;
 430        u32 opt0h, opt0l, opt2;
 431        unsigned int mtu_idx;
 432        int wscale;
 433
 434        pr_debug("%s ep %p\n", __func__, ep);
 435
 436        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 437        if (!skb) {
 438                pr_err("%s - failed to alloc skb\n", __func__);
 439                return -ENOMEM;
 440        }
 441        mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
 442        wscale = compute_wscale(rcv_win);
 443        opt0h = V_NAGLE(0) |
 444            V_NO_CONG(nocong) |
 445            V_KEEP_ALIVE(1) |
 446            F_TCAM_BYPASS |
 447            V_WND_SCALE(wscale) |
 448            V_MSS_IDX(mtu_idx) |
 449            V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
 450        opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
 451        opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) |
 452               V_CONG_CONTROL_FLAVOR(cong_flavor);
 453        skb->priority = CPL_PRIORITY_SETUP;
 454        set_arp_failure_handler(skb, act_open_req_arp_failure);
 455
 456        req = skb_put(skb, sizeof(*req));
 457        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 458        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid));
 459        req->local_port = ep->com.local_addr.sin_port;
 460        req->peer_port = ep->com.remote_addr.sin_port;
 461        req->local_ip = ep->com.local_addr.sin_addr.s_addr;
 462        req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
 463        req->opt0h = htonl(opt0h);
 464        req->opt0l = htonl(opt0l);
 465        req->params = 0;
 466        req->opt2 = htonl(opt2);
 467        return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 468}
 469
 470static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
 471{
 472        int mpalen;
 473        struct tx_data_wr *req;
 474        struct mpa_message *mpa;
 475        int len;
 476
 477        pr_debug("%s ep %p pd_len %d\n", __func__, ep, ep->plen);
 478
 479        BUG_ON(skb_cloned(skb));
 480
 481        mpalen = sizeof(*mpa) + ep->plen;
 482        if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) {
 483                kfree_skb(skb);
 484                skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
 485                if (!skb) {
 486                        connect_reply_upcall(ep, -ENOMEM);
 487                        return;
 488                }
 489        }
 490        skb_trim(skb, 0);
 491        skb_reserve(skb, sizeof(*req));
 492        skb_put(skb, mpalen);
 493        skb->priority = CPL_PRIORITY_DATA;
 494        mpa = (struct mpa_message *) skb->data;
 495        memset(mpa, 0, sizeof(*mpa));
 496        memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
 497        mpa->flags = (crc_enabled ? MPA_CRC : 0) |
 498                     (markers_enabled ? MPA_MARKERS : 0);
 499        mpa->private_data_size = htons(ep->plen);
 500        mpa->revision = mpa_rev;
 501
 502        if (ep->plen)
 503                memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
 504
 505        /*
 506         * Reference the mpa skb.  This ensures the data area
 507         * will remain in memory until the hw acks the tx.
 508         * Function tx_ack() will deref it.
 509         */
 510        skb_get(skb);
 511        set_arp_failure_handler(skb, arp_failure_discard);
 512        skb_reset_transport_header(skb);
 513        len = skb->len;
 514        req = skb_push(skb, sizeof(*req));
 515        req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
 516        req->wr_lo = htonl(V_WR_TID(ep->hwtid));
 517        req->len = htonl(len);
 518        req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
 519                           V_TX_SNDBUF(snd_win>>15));
 520        req->flags = htonl(F_TX_INIT);
 521        req->sndseq = htonl(ep->snd_seq);
 522        BUG_ON(ep->mpa_skb);
 523        ep->mpa_skb = skb;
 524        iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 525        start_ep_timer(ep);
 526        state_set(&ep->com, MPA_REQ_SENT);
 527        return;
 528}
 529
 530static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
 531{
 532        int mpalen;
 533        struct tx_data_wr *req;
 534        struct mpa_message *mpa;
 535        struct sk_buff *skb;
 536
 537        pr_debug("%s ep %p plen %d\n", __func__, ep, plen);
 538
 539        mpalen = sizeof(*mpa) + plen;
 540
 541        skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
 542        if (!skb) {
 543                pr_err("%s - cannot alloc skb!\n", __func__);
 544                return -ENOMEM;
 545        }
 546        skb_reserve(skb, sizeof(*req));
 547        mpa = skb_put(skb, mpalen);
 548        memset(mpa, 0, sizeof(*mpa));
 549        memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
 550        mpa->flags = MPA_REJECT;
 551        mpa->revision = mpa_rev;
 552        mpa->private_data_size = htons(plen);
 553        if (plen)
 554                memcpy(mpa->private_data, pdata, plen);
 555
 556        /*
 557         * Reference the mpa skb again.  This ensures the data area
 558         * will remain in memory until the hw acks the tx.
 559         * Function tx_ack() will deref it.
 560         */
 561        skb_get(skb);
 562        skb->priority = CPL_PRIORITY_DATA;
 563        set_arp_failure_handler(skb, arp_failure_discard);
 564        skb_reset_transport_header(skb);
 565        req = skb_push(skb, sizeof(*req));
 566        req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
 567        req->wr_lo = htonl(V_WR_TID(ep->hwtid));
 568        req->len = htonl(mpalen);
 569        req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
 570                           V_TX_SNDBUF(snd_win>>15));
 571        req->flags = htonl(F_TX_INIT);
 572        req->sndseq = htonl(ep->snd_seq);
 573        BUG_ON(ep->mpa_skb);
 574        ep->mpa_skb = skb;
 575        return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 576}
 577
 578static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
 579{
 580        int mpalen;
 581        struct tx_data_wr *req;
 582        struct mpa_message *mpa;
 583        int len;
 584        struct sk_buff *skb;
 585
 586        pr_debug("%s ep %p plen %d\n", __func__, ep, plen);
 587
 588        mpalen = sizeof(*mpa) + plen;
 589
 590        skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
 591        if (!skb) {
 592                pr_err("%s - cannot alloc skb!\n", __func__);
 593                return -ENOMEM;
 594        }
 595        skb->priority = CPL_PRIORITY_DATA;
 596        skb_reserve(skb, sizeof(*req));
 597        mpa = skb_put(skb, mpalen);
 598        memset(mpa, 0, sizeof(*mpa));
 599        memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
 600        mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
 601                     (markers_enabled ? MPA_MARKERS : 0);
 602        mpa->revision = mpa_rev;
 603        mpa->private_data_size = htons(plen);
 604        if (plen)
 605                memcpy(mpa->private_data, pdata, plen);
 606
 607        /*
 608         * Reference the mpa skb.  This ensures the data area
 609         * will remain in memory until the hw acks the tx.
 610         * Function tx_ack() will deref it.
 611         */
 612        skb_get(skb);
 613        set_arp_failure_handler(skb, arp_failure_discard);
 614        skb_reset_transport_header(skb);
 615        len = skb->len;
 616        req = skb_push(skb, sizeof(*req));
 617        req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
 618        req->wr_lo = htonl(V_WR_TID(ep->hwtid));
 619        req->len = htonl(len);
 620        req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
 621                           V_TX_SNDBUF(snd_win>>15));
 622        req->flags = htonl(F_TX_INIT);
 623        req->sndseq = htonl(ep->snd_seq);
 624        ep->mpa_skb = skb;
 625        state_set(&ep->com, MPA_REP_SENT);
 626        return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 627}
 628
 629static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 630{
 631        struct iwch_ep *ep = ctx;
 632        struct cpl_act_establish *req = cplhdr(skb);
 633        unsigned int tid = GET_TID(req);
 634
 635        pr_debug("%s ep %p tid %d\n", __func__, ep, tid);
 636
 637        dst_confirm(ep->dst);
 638
 639        /* setup the hwtid for this connection */
 640        ep->hwtid = tid;
 641        cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
 642
 643        ep->snd_seq = ntohl(req->snd_isn);
 644        ep->rcv_seq = ntohl(req->rcv_isn);
 645
 646        set_emss(ep, ntohs(req->tcp_opt));
 647
 648        /* dealloc the atid */
 649        cxgb3_free_atid(ep->com.tdev, ep->atid);
 650
 651        /* start MPA negotiation */
 652        send_mpa_req(ep, skb);
 653
 654        return 0;
 655}
 656
 657static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
 658{
 659        pr_debug("%s ep %p\n", __FILE__, ep);
 660        state_set(&ep->com, ABORTING);
 661        send_abort(ep, skb, gfp);
 662}
 663
 664static void close_complete_upcall(struct iwch_ep *ep)
 665{
 666        struct iw_cm_event event;
 667
 668        pr_debug("%s ep %p\n", __func__, ep);
 669        memset(&event, 0, sizeof(event));
 670        event.event = IW_CM_EVENT_CLOSE;
 671        if (ep->com.cm_id) {
 672                pr_debug("close complete delivered ep %p cm_id %p tid %d\n",
 673                         ep, ep->com.cm_id, ep->hwtid);
 674                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 675                ep->com.cm_id->rem_ref(ep->com.cm_id);
 676                ep->com.cm_id = NULL;
 677                ep->com.qp = NULL;
 678        }
 679}
 680
 681static void peer_close_upcall(struct iwch_ep *ep)
 682{
 683        struct iw_cm_event event;
 684
 685        pr_debug("%s ep %p\n", __func__, ep);
 686        memset(&event, 0, sizeof(event));
 687        event.event = IW_CM_EVENT_DISCONNECT;
 688        if (ep->com.cm_id) {
 689                pr_debug("peer close delivered ep %p cm_id %p tid %d\n",
 690                         ep, ep->com.cm_id, ep->hwtid);
 691                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 692        }
 693}
 694
 695static void peer_abort_upcall(struct iwch_ep *ep)
 696{
 697        struct iw_cm_event event;
 698
 699        pr_debug("%s ep %p\n", __func__, ep);
 700        memset(&event, 0, sizeof(event));
 701        event.event = IW_CM_EVENT_CLOSE;
 702        event.status = -ECONNRESET;
 703        if (ep->com.cm_id) {
 704                pr_debug("abort delivered ep %p cm_id %p tid %d\n", ep,
 705                         ep->com.cm_id, ep->hwtid);
 706                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 707                ep->com.cm_id->rem_ref(ep->com.cm_id);
 708                ep->com.cm_id = NULL;
 709                ep->com.qp = NULL;
 710        }
 711}
 712
 713static void connect_reply_upcall(struct iwch_ep *ep, int status)
 714{
 715        struct iw_cm_event event;
 716
 717        pr_debug("%s ep %p status %d\n", __func__, ep, status);
 718        memset(&event, 0, sizeof(event));
 719        event.event = IW_CM_EVENT_CONNECT_REPLY;
 720        event.status = status;
 721        memcpy(&event.local_addr, &ep->com.local_addr,
 722               sizeof(ep->com.local_addr));
 723        memcpy(&event.remote_addr, &ep->com.remote_addr,
 724               sizeof(ep->com.remote_addr));
 725
 726        if ((status == 0) || (status == -ECONNREFUSED)) {
 727                event.private_data_len = ep->plen;
 728                event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
 729        }
 730        if (ep->com.cm_id) {
 731                pr_debug("%s ep %p tid %d status %d\n", __func__, ep,
 732                         ep->hwtid, status);
 733                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 734        }
 735        if (status < 0) {
 736                ep->com.cm_id->rem_ref(ep->com.cm_id);
 737                ep->com.cm_id = NULL;
 738                ep->com.qp = NULL;
 739        }
 740}
 741
 742static void connect_request_upcall(struct iwch_ep *ep)
 743{
 744        struct iw_cm_event event;
 745
 746        pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
 747        memset(&event, 0, sizeof(event));
 748        event.event = IW_CM_EVENT_CONNECT_REQUEST;
 749        memcpy(&event.local_addr, &ep->com.local_addr,
 750               sizeof(ep->com.local_addr));
 751        memcpy(&event.remote_addr, &ep->com.remote_addr,
 752               sizeof(ep->com.local_addr));
 753        event.private_data_len = ep->plen;
 754        event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
 755        event.provider_data = ep;
 756        /*
 757         * Until ird/ord negotiation via MPAv2 support is added, send max
 758         * supported values
 759         */
 760        event.ird = event.ord = 8;
 761        if (state_read(&ep->parent_ep->com) != DEAD) {
 762                get_ep(&ep->com);
 763                ep->parent_ep->com.cm_id->event_handler(
 764                                                ep->parent_ep->com.cm_id,
 765                                                &event);
 766        }
 767        put_ep(&ep->parent_ep->com);
 768        ep->parent_ep = NULL;
 769}
 770
 771static void established_upcall(struct iwch_ep *ep)
 772{
 773        struct iw_cm_event event;
 774
 775        pr_debug("%s ep %p\n", __func__, ep);
 776        memset(&event, 0, sizeof(event));
 777        event.event = IW_CM_EVENT_ESTABLISHED;
 778        /*
 779         * Until ird/ord negotiation via MPAv2 support is added, send max
 780         * supported values
 781         */
 782        event.ird = event.ord = 8;
 783        if (ep->com.cm_id) {
 784                pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
 785                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 786        }
 787}
 788
 789static int update_rx_credits(struct iwch_ep *ep, u32 credits)
 790{
 791        struct cpl_rx_data_ack *req;
 792        struct sk_buff *skb;
 793
 794        pr_debug("%s ep %p credits %u\n", __func__, ep, credits);
 795        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 796        if (!skb) {
 797                pr_err("update_rx_credits - cannot alloc skb!\n");
 798                return 0;
 799        }
 800
 801        req = skb_put(skb, sizeof(*req));
 802        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 803        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
 804        req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
 805        skb->priority = CPL_PRIORITY_ACK;
 806        iwch_cxgb3_ofld_send(ep->com.tdev, skb);
 807        return credits;
 808}
 809
 810static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
 811{
 812        struct mpa_message *mpa;
 813        u16 plen;
 814        struct iwch_qp_attributes attrs;
 815        enum iwch_qp_attr_mask mask;
 816        int err;
 817
 818        pr_debug("%s ep %p\n", __func__, ep);
 819
 820        /*
 821         * Stop mpa timer.  If it expired, then the state has
 822         * changed and we bail since ep_timeout already aborted
 823         * the connection.
 824         */
 825        stop_ep_timer(ep);
 826        if (state_read(&ep->com) != MPA_REQ_SENT)
 827                return;
 828
 829        /*
 830         * If we get more than the supported amount of private data
 831         * then we must fail this connection.
 832         */
 833        if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
 834                err = -EINVAL;
 835                goto err;
 836        }
 837
 838        /*
 839         * copy the new data into our accumulation buffer.
 840         */
 841        skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
 842                                  skb->len);
 843        ep->mpa_pkt_len += skb->len;
 844
 845        /*
 846         * if we don't even have the mpa message, then bail.
 847         */
 848        if (ep->mpa_pkt_len < sizeof(*mpa))
 849                return;
 850        mpa = (struct mpa_message *) ep->mpa_pkt;
 851
 852        /* Validate MPA header. */
 853        if (mpa->revision != mpa_rev) {
 854                err = -EPROTO;
 855                goto err;
 856        }
 857        if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
 858                err = -EPROTO;
 859                goto err;
 860        }
 861
 862        plen = ntohs(mpa->private_data_size);
 863
 864        /*
 865         * Fail if there's too much private data.
 866         */
 867        if (plen > MPA_MAX_PRIVATE_DATA) {
 868                err = -EPROTO;
 869                goto err;
 870        }
 871
 872        /*
 873         * If plen does not account for pkt size
 874         */
 875        if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
 876                err = -EPROTO;
 877                goto err;
 878        }
 879
 880        ep->plen = (u8) plen;
 881
 882        /*
 883         * If we don't have all the pdata yet, then bail.
 884         * We'll continue process when more data arrives.
 885         */
 886        if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
 887                return;
 888
 889        if (mpa->flags & MPA_REJECT) {
 890                err = -ECONNREFUSED;
 891                goto err;
 892        }
 893
 894        /*
 895         * If we get here we have accumulated the entire mpa
 896         * start reply message including private data. And
 897         * the MPA header is valid.
 898         */
 899        state_set(&ep->com, FPDU_MODE);
 900        ep->mpa_attr.initiator = 1;
 901        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
 902        ep->mpa_attr.recv_marker_enabled = markers_enabled;
 903        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
 904        ep->mpa_attr.version = mpa_rev;
 905        pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n",
 906                 __func__,
 907                 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
 908                 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
 909
 910        attrs.mpa_attr = ep->mpa_attr;
 911        attrs.max_ird = ep->ird;
 912        attrs.max_ord = ep->ord;
 913        attrs.llp_stream_handle = ep;
 914        attrs.next_state = IWCH_QP_STATE_RTS;
 915
 916        mask = IWCH_QP_ATTR_NEXT_STATE |
 917            IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
 918            IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
 919
 920        /* bind QP and TID with INIT_WR */
 921        err = iwch_modify_qp(ep->com.qp->rhp,
 922                             ep->com.qp, mask, &attrs, 1);
 923        if (err)
 924                goto err;
 925
 926        if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) {
 927                iwch_post_zb_read(ep);
 928        }
 929
 930        goto out;
 931err:
 932        abort_connection(ep, skb, GFP_KERNEL);
 933out:
 934        connect_reply_upcall(ep, err);
 935        return;
 936}
 937
 938static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
 939{
 940        struct mpa_message *mpa;
 941        u16 plen;
 942
 943        pr_debug("%s ep %p\n", __func__, ep);
 944
 945        /*
 946         * Stop mpa timer.  If it expired, then the state has
 947         * changed and we bail since ep_timeout already aborted
 948         * the connection.
 949         */
 950        stop_ep_timer(ep);
 951        if (state_read(&ep->com) != MPA_REQ_WAIT)
 952                return;
 953
 954        /*
 955         * If we get more than the supported amount of private data
 956         * then we must fail this connection.
 957         */
 958        if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
 959                abort_connection(ep, skb, GFP_KERNEL);
 960                return;
 961        }
 962
 963        pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
 964
 965        /*
 966         * Copy the new data into our accumulation buffer.
 967         */
 968        skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
 969                                  skb->len);
 970        ep->mpa_pkt_len += skb->len;
 971
 972        /*
 973         * If we don't even have the mpa message, then bail.
 974         * We'll continue process when more data arrives.
 975         */
 976        if (ep->mpa_pkt_len < sizeof(*mpa))
 977                return;
 978        pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
 979        mpa = (struct mpa_message *) ep->mpa_pkt;
 980
 981        /*
 982         * Validate MPA Header.
 983         */
 984        if (mpa->revision != mpa_rev) {
 985                abort_connection(ep, skb, GFP_KERNEL);
 986                return;
 987        }
 988
 989        if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
 990                abort_connection(ep, skb, GFP_KERNEL);
 991                return;
 992        }
 993
 994        plen = ntohs(mpa->private_data_size);
 995
 996        /*
 997         * Fail if there's too much private data.
 998         */
 999        if (plen > MPA_MAX_PRIVATE_DATA) {
1000                abort_connection(ep, skb, GFP_KERNEL);
1001                return;
1002        }
1003
1004        /*
1005         * If plen does not account for pkt size
1006         */
1007        if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1008                abort_connection(ep, skb, GFP_KERNEL);
1009                return;
1010        }
1011        ep->plen = (u8) plen;
1012
1013        /*
1014         * If we don't have all the pdata yet, then bail.
1015         */
1016        if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1017                return;
1018
1019        /*
1020         * If we get here we have accumulated the entire mpa
1021         * start reply message including private data.
1022         */
1023        ep->mpa_attr.initiator = 0;
1024        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1025        ep->mpa_attr.recv_marker_enabled = markers_enabled;
1026        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1027        ep->mpa_attr.version = mpa_rev;
1028        pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n",
1029                 __func__,
1030                 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1031                 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
1032
1033        state_set(&ep->com, MPA_REQ_RCVD);
1034
1035        /* drive upcall */
1036        connect_request_upcall(ep);
1037        return;
1038}
1039
1040static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1041{
1042        struct iwch_ep *ep = ctx;
1043        struct cpl_rx_data *hdr = cplhdr(skb);
1044        unsigned int dlen = ntohs(hdr->len);
1045
1046        pr_debug("%s ep %p dlen %u\n", __func__, ep, dlen);
1047
1048        skb_pull(skb, sizeof(*hdr));
1049        skb_trim(skb, dlen);
1050
1051        ep->rcv_seq += dlen;
1052        BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
1053
1054        switch (state_read(&ep->com)) {
1055        case MPA_REQ_SENT:
1056                process_mpa_reply(ep, skb);
1057                break;
1058        case MPA_REQ_WAIT:
1059                process_mpa_request(ep, skb);
1060                break;
1061        case MPA_REP_SENT:
1062                break;
1063        default:
1064                pr_err("%s Unexpected streaming data. ep %p state %d tid %d\n",
1065                       __func__, ep, state_read(&ep->com), ep->hwtid);
1066
1067                /*
1068                 * The ep will timeout and inform the ULP of the failure.
1069                 * See ep_timeout().
1070                 */
1071                break;
1072        }
1073
1074        /* update RX credits */
1075        update_rx_credits(ep, dlen);
1076
1077        return CPL_RET_BUF_DONE;
1078}
1079
1080/*
1081 * Upcall from the adapter indicating data has been transmitted.
1082 * For us its just the single MPA request or reply.  We can now free
1083 * the skb holding the mpa message.
1084 */
1085static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1086{
1087        struct iwch_ep *ep = ctx;
1088        struct cpl_wr_ack *hdr = cplhdr(skb);
1089        unsigned int credits = ntohs(hdr->credits);
1090        unsigned long flags;
1091        int post_zb = 0;
1092
1093        pr_debug("%s ep %p credits %u\n", __func__, ep, credits);
1094
1095        if (credits == 0) {
1096                pr_debug("%s 0 credit ack  ep %p state %u\n",
1097                         __func__, ep, state_read(&ep->com));
1098                return CPL_RET_BUF_DONE;
1099        }
1100
1101        spin_lock_irqsave(&ep->com.lock, flags);
1102        BUG_ON(credits != 1);
1103        dst_confirm(ep->dst);
1104        if (!ep->mpa_skb) {
1105                pr_debug("%s rdma_init wr_ack ep %p state %u\n",
1106                         __func__, ep, ep->com.state);
1107                if (ep->mpa_attr.initiator) {
1108                        pr_debug("%s initiator ep %p state %u\n",
1109                                 __func__, ep, ep->com.state);
1110                        if (peer2peer && ep->com.state == FPDU_MODE)
1111                                post_zb = 1;
1112                } else {
1113                        pr_debug("%s responder ep %p state %u\n",
1114                                 __func__, ep, ep->com.state);
1115                        if (ep->com.state == MPA_REQ_RCVD) {
1116                                ep->com.rpl_done = 1;
1117                                wake_up(&ep->com.waitq);
1118                        }
1119                }
1120        } else {
1121                pr_debug("%s lsm ack ep %p state %u freeing skb\n",
1122                         __func__, ep, ep->com.state);
1123                kfree_skb(ep->mpa_skb);
1124                ep->mpa_skb = NULL;
1125        }
1126        spin_unlock_irqrestore(&ep->com.lock, flags);
1127        if (post_zb)
1128                iwch_post_zb_read(ep);
1129        return CPL_RET_BUF_DONE;
1130}
1131
1132static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1133{
1134        struct iwch_ep *ep = ctx;
1135        unsigned long flags;
1136        int release = 0;
1137
1138        pr_debug("%s ep %p\n", __func__, ep);
1139        BUG_ON(!ep);
1140
1141        /*
1142         * We get 2 abort replies from the HW.  The first one must
1143         * be ignored except for scribbling that we need one more.
1144         */
1145        if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) {
1146                return CPL_RET_BUF_DONE;
1147        }
1148
1149        spin_lock_irqsave(&ep->com.lock, flags);
1150        switch (ep->com.state) {
1151        case ABORTING:
1152                close_complete_upcall(ep);
1153                __state_set(&ep->com, DEAD);
1154                release = 1;
1155                break;
1156        default:
1157                pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
1158                break;
1159        }
1160        spin_unlock_irqrestore(&ep->com.lock, flags);
1161
1162        if (release)
1163                release_ep_resources(ep);
1164        return CPL_RET_BUF_DONE;
1165}
1166
1167/*
1168 * Return whether a failed active open has allocated a TID
1169 */
1170static inline int act_open_has_tid(int status)
1171{
1172        return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
1173               status != CPL_ERR_ARP_MISS;
1174}
1175
1176static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1177{
1178        struct iwch_ep *ep = ctx;
1179        struct cpl_act_open_rpl *rpl = cplhdr(skb);
1180
1181        pr_debug("%s ep %p status %u errno %d\n", __func__, ep, rpl->status,
1182                 status2errno(rpl->status));
1183        connect_reply_upcall(ep, status2errno(rpl->status));
1184        state_set(&ep->com, DEAD);
1185        if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status))
1186                release_tid(ep->com.tdev, GET_TID(rpl), NULL);
1187        cxgb3_free_atid(ep->com.tdev, ep->atid);
1188        dst_release(ep->dst);
1189        l2t_release(ep->com.tdev, ep->l2t);
1190        put_ep(&ep->com);
1191        return CPL_RET_BUF_DONE;
1192}
1193
1194static int listen_start(struct iwch_listen_ep *ep)
1195{
1196        struct sk_buff *skb;
1197        struct cpl_pass_open_req *req;
1198
1199        pr_debug("%s ep %p\n", __func__, ep);
1200        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1201        if (!skb) {
1202                pr_err("t3c_listen_start failed to alloc skb!\n");
1203                return -ENOMEM;
1204        }
1205
1206        req = skb_put(skb, sizeof(*req));
1207        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1208        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid));
1209        req->local_port = ep->com.local_addr.sin_port;
1210        req->local_ip = ep->com.local_addr.sin_addr.s_addr;
1211        req->peer_port = 0;
1212        req->peer_ip = 0;
1213        req->peer_netmask = 0;
1214        req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
1215        req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10));
1216        req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
1217
1218        skb->priority = 1;
1219        return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
1220}
1221
1222static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1223{
1224        struct iwch_listen_ep *ep = ctx;
1225        struct cpl_pass_open_rpl *rpl = cplhdr(skb);
1226
1227        pr_debug("%s ep %p status %d error %d\n", __func__, ep,
1228                 rpl->status, status2errno(rpl->status));
1229        ep->com.rpl_err = status2errno(rpl->status);
1230        ep->com.rpl_done = 1;
1231        wake_up(&ep->com.waitq);
1232
1233        return CPL_RET_BUF_DONE;
1234}
1235
1236static int listen_stop(struct iwch_listen_ep *ep)
1237{
1238        struct sk_buff *skb;
1239        struct cpl_close_listserv_req *req;
1240
1241        pr_debug("%s ep %p\n", __func__, ep);
1242        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1243        if (!skb) {
1244                pr_err("%s - failed to alloc skb\n", __func__);
1245                return -ENOMEM;
1246        }
1247        req = skb_put(skb, sizeof(*req));
1248        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1249        req->cpu_idx = 0;
1250        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
1251        skb->priority = 1;
1252        return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
1253}
1254
1255static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
1256                             void *ctx)
1257{
1258        struct iwch_listen_ep *ep = ctx;
1259        struct cpl_close_listserv_rpl *rpl = cplhdr(skb);
1260
1261        pr_debug("%s ep %p\n", __func__, ep);
1262        ep->com.rpl_err = status2errno(rpl->status);
1263        ep->com.rpl_done = 1;
1264        wake_up(&ep->com.waitq);
1265        return CPL_RET_BUF_DONE;
1266}
1267
1268static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
1269{
1270        struct cpl_pass_accept_rpl *rpl;
1271        unsigned int mtu_idx;
1272        u32 opt0h, opt0l, opt2;
1273        int wscale;
1274
1275        pr_debug("%s ep %p\n", __func__, ep);
1276        BUG_ON(skb_cloned(skb));
1277        skb_trim(skb, sizeof(*rpl));
1278        skb_get(skb);
1279        mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
1280        wscale = compute_wscale(rcv_win);
1281        opt0h = V_NAGLE(0) |
1282            V_NO_CONG(nocong) |
1283            V_KEEP_ALIVE(1) |
1284            F_TCAM_BYPASS |
1285            V_WND_SCALE(wscale) |
1286            V_MSS_IDX(mtu_idx) |
1287            V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
1288        opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
1289        opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) |
1290               V_CONG_CONTROL_FLAVOR(cong_flavor);
1291
1292        rpl = cplhdr(skb);
1293        rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1294        OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid));
1295        rpl->peer_ip = peer_ip;
1296        rpl->opt0h = htonl(opt0h);
1297        rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT);
1298        rpl->opt2 = htonl(opt2);
1299        rpl->rsvd = rpl->opt2;  /* workaround for HW bug */
1300        skb->priority = CPL_PRIORITY_SETUP;
1301        iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
1302
1303        return;
1304}
1305
1306static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
1307                      struct sk_buff *skb)
1308{
1309        pr_debug("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid,
1310                 peer_ip);
1311        BUG_ON(skb_cloned(skb));
1312        skb_trim(skb, sizeof(struct cpl_tid_release));
1313        skb_get(skb);
1314
1315        if (tdev->type != T3A)
1316                release_tid(tdev, hwtid, skb);
1317        else {
1318                struct cpl_pass_accept_rpl *rpl;
1319
1320                rpl = cplhdr(skb);
1321                skb->priority = CPL_PRIORITY_SETUP;
1322                rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1323                OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
1324                                                      hwtid));
1325                rpl->peer_ip = peer_ip;
1326                rpl->opt0h = htonl(F_TCAM_BYPASS);
1327                rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
1328                rpl->opt2 = 0;
1329                rpl->rsvd = rpl->opt2;
1330                iwch_cxgb3_ofld_send(tdev, skb);
1331        }
1332}
1333
1334static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1335{
1336        struct iwch_ep *child_ep, *parent_ep = ctx;
1337        struct cpl_pass_accept_req *req = cplhdr(skb);
1338        unsigned int hwtid = GET_TID(req);
1339        struct dst_entry *dst;
1340        struct l2t_entry *l2t;
1341        struct rtable *rt;
1342        struct iff_mac tim;
1343
1344        pr_debug("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
1345
1346        if (state_read(&parent_ep->com) != LISTEN) {
1347                pr_err("%s - listening ep not in LISTEN\n", __func__);
1348                goto reject;
1349        }
1350
1351        /*
1352         * Find the netdev for this connection request.
1353         */
1354        tim.mac_addr = req->dst_mac;
1355        tim.vlan_tag = ntohs(req->vlan_tag);
1356        if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
1357                pr_err("%s bad dst mac %pM\n", __func__, req->dst_mac);
1358                goto reject;
1359        }
1360
1361        /* Find output route */
1362        rt = find_route(tdev,
1363                        req->local_ip,
1364                        req->peer_ip,
1365                        req->local_port,
1366                        req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid)));
1367        if (!rt) {
1368                pr_err("%s - failed to find dst entry!\n", __func__);
1369                goto reject;
1370        }
1371        dst = &rt->dst;
1372        l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip);
1373        if (!l2t) {
1374                pr_err("%s - failed to allocate l2t entry!\n", __func__);
1375                dst_release(dst);
1376                goto reject;
1377        }
1378        child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
1379        if (!child_ep) {
1380                pr_err("%s - failed to allocate ep entry!\n", __func__);
1381                l2t_release(tdev, l2t);
1382                dst_release(dst);
1383                goto reject;
1384        }
1385        state_set(&child_ep->com, CONNECTING);
1386        child_ep->com.tdev = tdev;
1387        child_ep->com.cm_id = NULL;
1388        child_ep->com.local_addr.sin_family = AF_INET;
1389        child_ep->com.local_addr.sin_port = req->local_port;
1390        child_ep->com.local_addr.sin_addr.s_addr = req->local_ip;
1391        child_ep->com.remote_addr.sin_family = AF_INET;
1392        child_ep->com.remote_addr.sin_port = req->peer_port;
1393        child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip;
1394        get_ep(&parent_ep->com);
1395        child_ep->parent_ep = parent_ep;
1396        child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid));
1397        child_ep->l2t = l2t;
1398        child_ep->dst = dst;
1399        child_ep->hwtid = hwtid;
1400        timer_setup(&child_ep->timer, ep_timeout, 0);
1401        cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid);
1402        accept_cr(child_ep, req->peer_ip, skb);
1403        goto out;
1404reject:
1405        reject_cr(tdev, hwtid, req->peer_ip, skb);
1406out:
1407        return CPL_RET_BUF_DONE;
1408}
1409
1410static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1411{
1412        struct iwch_ep *ep = ctx;
1413        struct cpl_pass_establish *req = cplhdr(skb);
1414
1415        pr_debug("%s ep %p\n", __func__, ep);
1416        ep->snd_seq = ntohl(req->snd_isn);
1417        ep->rcv_seq = ntohl(req->rcv_isn);
1418
1419        set_emss(ep, ntohs(req->tcp_opt));
1420
1421        dst_confirm(ep->dst);
1422        state_set(&ep->com, MPA_REQ_WAIT);
1423        start_ep_timer(ep);
1424
1425        return CPL_RET_BUF_DONE;
1426}
1427
1428static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1429{
1430        struct iwch_ep *ep = ctx;
1431        struct iwch_qp_attributes attrs;
1432        unsigned long flags;
1433        int disconnect = 1;
1434        int release = 0;
1435
1436        pr_debug("%s ep %p\n", __func__, ep);
1437        dst_confirm(ep->dst);
1438
1439        spin_lock_irqsave(&ep->com.lock, flags);
1440        switch (ep->com.state) {
1441        case MPA_REQ_WAIT:
1442                __state_set(&ep->com, CLOSING);
1443                break;
1444        case MPA_REQ_SENT:
1445                __state_set(&ep->com, CLOSING);
1446                connect_reply_upcall(ep, -ECONNRESET);
1447                break;
1448        case MPA_REQ_RCVD:
1449
1450                /*
1451                 * We're gonna mark this puppy DEAD, but keep
1452                 * the reference on it until the ULP accepts or
1453                 * rejects the CR. Also wake up anyone waiting
1454                 * in rdma connection migration (see iwch_accept_cr()).
1455                 */
1456                __state_set(&ep->com, CLOSING);
1457                ep->com.rpl_done = 1;
1458                ep->com.rpl_err = -ECONNRESET;
1459                pr_debug("waking up ep %p\n", ep);
1460                wake_up(&ep->com.waitq);
1461                break;
1462        case MPA_REP_SENT:
1463                __state_set(&ep->com, CLOSING);
1464                ep->com.rpl_done = 1;
1465                ep->com.rpl_err = -ECONNRESET;
1466                pr_debug("waking up ep %p\n", ep);
1467                wake_up(&ep->com.waitq);
1468                break;
1469        case FPDU_MODE:
1470                start_ep_timer(ep);
1471                __state_set(&ep->com, CLOSING);
1472                attrs.next_state = IWCH_QP_STATE_CLOSING;
1473                iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
1474                               IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
1475                peer_close_upcall(ep);
1476                break;
1477        case ABORTING:
1478                disconnect = 0;
1479                break;
1480        case CLOSING:
1481                __state_set(&ep->com, MORIBUND);
1482                disconnect = 0;
1483                break;
1484        case MORIBUND:
1485                stop_ep_timer(ep);
1486                if (ep->com.cm_id && ep->com.qp) {
1487                        attrs.next_state = IWCH_QP_STATE_IDLE;
1488                        iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
1489                                       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
1490                }
1491                close_complete_upcall(ep);
1492                __state_set(&ep->com, DEAD);
1493                release = 1;
1494                disconnect = 0;
1495                break;
1496        case DEAD:
1497                disconnect = 0;
1498                break;
1499        default:
1500                BUG_ON(1);
1501        }
1502        spin_unlock_irqrestore(&ep->com.lock, flags);
1503        if (disconnect)
1504                iwch_ep_disconnect(ep, 0, GFP_KERNEL);
1505        if (release)
1506                release_ep_resources(ep);
1507        return CPL_RET_BUF_DONE;
1508}
1509
1510/*
1511 * Returns whether an ABORT_REQ_RSS message is a negative advice.
1512 */
1513static int is_neg_adv_abort(unsigned int status)
1514{
1515        return status == CPL_ERR_RTX_NEG_ADVICE ||
1516               status == CPL_ERR_PERSIST_NEG_ADVICE;
1517}
1518
1519static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1520{
1521        struct cpl_abort_req_rss *req = cplhdr(skb);
1522        struct iwch_ep *ep = ctx;
1523        struct cpl_abort_rpl *rpl;
1524        struct sk_buff *rpl_skb;
1525        struct iwch_qp_attributes attrs;
1526        int ret;
1527        int release = 0;
1528        unsigned long flags;
1529
1530        if (is_neg_adv_abort(req->status)) {
1531                pr_debug("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
1532                         ep->hwtid);
1533                t3_l2t_send_event(ep->com.tdev, ep->l2t);
1534                return CPL_RET_BUF_DONE;
1535        }
1536
1537        /*
1538         * We get 2 peer aborts from the HW.  The first one must
1539         * be ignored except for scribbling that we need one more.
1540         */
1541        if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) {
1542                return CPL_RET_BUF_DONE;
1543        }
1544
1545        spin_lock_irqsave(&ep->com.lock, flags);
1546        pr_debug("%s ep %p state %u\n", __func__, ep, ep->com.state);
1547        switch (ep->com.state) {
1548        case CONNECTING:
1549                break;
1550        case MPA_REQ_WAIT:
1551                stop_ep_timer(ep);
1552                break;
1553        case MPA_REQ_SENT:
1554                stop_ep_timer(ep);
1555                connect_reply_upcall(ep, -ECONNRESET);
1556                break;
1557        case MPA_REP_SENT:
1558                ep->com.rpl_done = 1;
1559                ep->com.rpl_err = -ECONNRESET;
1560                pr_debug("waking up ep %p\n", ep);
1561                wake_up(&ep->com.waitq);
1562                break;
1563        case MPA_REQ_RCVD:
1564
1565                /*
1566                 * We're gonna mark this puppy DEAD, but keep
1567                 * the reference on it until the ULP accepts or
1568                 * rejects the CR. Also wake up anyone waiting
1569                 * in rdma connection migration (see iwch_accept_cr()).
1570                 */
1571                ep->com.rpl_done = 1;
1572                ep->com.rpl_err = -ECONNRESET;
1573                pr_debug("waking up ep %p\n", ep);
1574                wake_up(&ep->com.waitq);
1575                break;
1576        case MORIBUND:
1577        case CLOSING:
1578                stop_ep_timer(ep);
1579                /*FALLTHROUGH*/
1580        case FPDU_MODE:
1581                if (ep->com.cm_id && ep->com.qp) {
1582                        attrs.next_state = IWCH_QP_STATE_ERROR;
1583                        ret = iwch_modify_qp(ep->com.qp->rhp,
1584                                     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1585                                     &attrs, 1);
1586                        if (ret)
1587                                pr_err("%s - qp <- error failed!\n", __func__);
1588                }
1589                peer_abort_upcall(ep);
1590                break;
1591        case ABORTING:
1592                break;
1593        case DEAD:
1594                pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
1595                spin_unlock_irqrestore(&ep->com.lock, flags);
1596                return CPL_RET_BUF_DONE;
1597        default:
1598                BUG_ON(1);
1599                break;
1600        }
1601        dst_confirm(ep->dst);
1602        if (ep->com.state != ABORTING) {
1603                __state_set(&ep->com, DEAD);
1604                release = 1;
1605        }
1606        spin_unlock_irqrestore(&ep->com.lock, flags);
1607
1608        rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
1609        if (!rpl_skb) {
1610                pr_err("%s - cannot allocate skb!\n", __func__);
1611                release = 1;
1612                goto out;
1613        }
1614        rpl_skb->priority = CPL_PRIORITY_DATA;
1615        rpl = skb_put(rpl_skb, sizeof(*rpl));
1616        rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
1617        rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
1618        OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
1619        rpl->cmd = CPL_ABORT_NO_RST;
1620        iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb);
1621out:
1622        if (release)
1623                release_ep_resources(ep);
1624        return CPL_RET_BUF_DONE;
1625}
1626
1627static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1628{
1629        struct iwch_ep *ep = ctx;
1630        struct iwch_qp_attributes attrs;
1631        unsigned long flags;
1632        int release = 0;
1633
1634        pr_debug("%s ep %p\n", __func__, ep);
1635        BUG_ON(!ep);
1636
1637        /* The cm_id may be null if we failed to connect */
1638        spin_lock_irqsave(&ep->com.lock, flags);
1639        switch (ep->com.state) {
1640        case CLOSING:
1641                __state_set(&ep->com, MORIBUND);
1642                break;
1643        case MORIBUND:
1644                stop_ep_timer(ep);
1645                if ((ep->com.cm_id) && (ep->com.qp)) {
1646                        attrs.next_state = IWCH_QP_STATE_IDLE;
1647                        iwch_modify_qp(ep->com.qp->rhp,
1648                                             ep->com.qp,
1649                                             IWCH_QP_ATTR_NEXT_STATE,
1650                                             &attrs, 1);
1651                }
1652                close_complete_upcall(ep);
1653                __state_set(&ep->com, DEAD);
1654                release = 1;
1655                break;
1656        case ABORTING:
1657        case DEAD:
1658                break;
1659        default:
1660                BUG_ON(1);
1661                break;
1662        }
1663        spin_unlock_irqrestore(&ep->com.lock, flags);
1664        if (release)
1665                release_ep_resources(ep);
1666        return CPL_RET_BUF_DONE;
1667}
1668
1669/*
1670 * T3A does 3 things when a TERM is received:
1671 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
1672 * 2) generate an async event on the QP with the TERMINATE opcode
1673 * 3) post a TERMINATE opcode cqe into the associated CQ.
1674 *
1675 * For (1), we save the message in the qp for later consumer consumption.
1676 * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
1677 * For (3), we toss the CQE in cxio_poll_cq().
1678 *
1679 * terminate() handles case (1)...
1680 */
1681static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1682{
1683        struct iwch_ep *ep = ctx;
1684
1685        if (state_read(&ep->com) != FPDU_MODE)
1686                return CPL_RET_BUF_DONE;
1687
1688        pr_debug("%s ep %p\n", __func__, ep);
1689        skb_pull(skb, sizeof(struct cpl_rdma_terminate));
1690        pr_debug("%s saving %d bytes of term msg\n", __func__, skb->len);
1691        skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
1692                                  skb->len);
1693        ep->com.qp->attr.terminate_msg_len = skb->len;
1694        ep->com.qp->attr.is_terminate_local = 0;
1695        return CPL_RET_BUF_DONE;
1696}
1697
1698static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1699{
1700        struct cpl_rdma_ec_status *rep = cplhdr(skb);
1701        struct iwch_ep *ep = ctx;
1702
1703        pr_debug("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid,
1704                 rep->status);
1705        if (rep->status) {
1706                struct iwch_qp_attributes attrs;
1707
1708                pr_err("%s BAD CLOSE - Aborting tid %u\n",
1709                       __func__, ep->hwtid);
1710                stop_ep_timer(ep);
1711                attrs.next_state = IWCH_QP_STATE_ERROR;
1712                iwch_modify_qp(ep->com.qp->rhp,
1713                               ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1714                               &attrs, 1);
1715                abort_connection(ep, NULL, GFP_KERNEL);
1716        }
1717        return CPL_RET_BUF_DONE;
1718}
1719
1720static void ep_timeout(struct timer_list *t)
1721{
1722        struct iwch_ep *ep = from_timer(ep, t, timer);
1723        struct iwch_qp_attributes attrs;
1724        unsigned long flags;
1725        int abort = 1;
1726
1727        spin_lock_irqsave(&ep->com.lock, flags);
1728        pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
1729                 ep->com.state);
1730        switch (ep->com.state) {
1731        case MPA_REQ_SENT:
1732                __state_set(&ep->com, ABORTING);
1733                connect_reply_upcall(ep, -ETIMEDOUT);
1734                break;
1735        case MPA_REQ_WAIT:
1736                __state_set(&ep->com, ABORTING);
1737                break;
1738        case CLOSING:
1739        case MORIBUND:
1740                if (ep->com.cm_id && ep->com.qp) {
1741                        attrs.next_state = IWCH_QP_STATE_ERROR;
1742                        iwch_modify_qp(ep->com.qp->rhp,
1743                                     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1744                                     &attrs, 1);
1745                }
1746                __state_set(&ep->com, ABORTING);
1747                break;
1748        default:
1749                WARN(1, "%s unexpected state ep %p state %u\n",
1750                        __func__, ep, ep->com.state);
1751                abort = 0;
1752        }
1753        spin_unlock_irqrestore(&ep->com.lock, flags);
1754        if (abort)
1755                abort_connection(ep, NULL, GFP_ATOMIC);
1756        put_ep(&ep->com);
1757}
1758
1759int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
1760{
1761        struct iwch_ep *ep = to_ep(cm_id);
1762
1763        pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1764
1765        if (state_read(&ep->com) == DEAD) {
1766                put_ep(&ep->com);
1767                return -ECONNRESET;
1768        }
1769        BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
1770        if (mpa_rev == 0)
1771                abort_connection(ep, NULL, GFP_KERNEL);
1772        else {
1773                send_mpa_reject(ep, pdata, pdata_len);
1774                iwch_ep_disconnect(ep, 0, GFP_KERNEL);
1775        }
1776        put_ep(&ep->com);
1777        return 0;
1778}
1779
1780int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1781{
1782        int err;
1783        struct iwch_qp_attributes attrs;
1784        enum iwch_qp_attr_mask mask;
1785        struct iwch_ep *ep = to_ep(cm_id);
1786        struct iwch_dev *h = to_iwch_dev(cm_id->device);
1787        struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
1788
1789        pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1790        if (state_read(&ep->com) == DEAD) {
1791                err = -ECONNRESET;
1792                goto err;
1793        }
1794
1795        BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
1796        BUG_ON(!qp);
1797
1798        if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
1799            (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
1800                abort_connection(ep, NULL, GFP_KERNEL);
1801                err = -EINVAL;
1802                goto err;
1803        }
1804
1805        cm_id->add_ref(cm_id);
1806        ep->com.cm_id = cm_id;
1807        ep->com.qp = qp;
1808
1809        ep->ird = conn_param->ird;
1810        ep->ord = conn_param->ord;
1811
1812        if (peer2peer && ep->ird == 0)
1813                ep->ird = 1;
1814
1815        pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
1816
1817        /* bind QP to EP and move to RTS */
1818        attrs.mpa_attr = ep->mpa_attr;
1819        attrs.max_ird = ep->ird;
1820        attrs.max_ord = ep->ord;
1821        attrs.llp_stream_handle = ep;
1822        attrs.next_state = IWCH_QP_STATE_RTS;
1823
1824        /* bind QP and TID with INIT_WR */
1825        mask = IWCH_QP_ATTR_NEXT_STATE |
1826                             IWCH_QP_ATTR_LLP_STREAM_HANDLE |
1827                             IWCH_QP_ATTR_MPA_ATTR |
1828                             IWCH_QP_ATTR_MAX_IRD |
1829                             IWCH_QP_ATTR_MAX_ORD;
1830
1831        err = iwch_modify_qp(ep->com.qp->rhp,
1832                             ep->com.qp, mask, &attrs, 1);
1833        if (err)
1834                goto err1;
1835
1836        /* if needed, wait for wr_ack */
1837        if (iwch_rqes_posted(qp)) {
1838                wait_event(ep->com.waitq, ep->com.rpl_done);
1839                err = ep->com.rpl_err;
1840                if (err)
1841                        goto err1;
1842        }
1843
1844        err = send_mpa_reply(ep, conn_param->private_data,
1845                             conn_param->private_data_len);
1846        if (err)
1847                goto err1;
1848
1849
1850        state_set(&ep->com, FPDU_MODE);
1851        established_upcall(ep);
1852        put_ep(&ep->com);
1853        return 0;
1854err1:
1855        ep->com.cm_id = NULL;
1856        ep->com.qp = NULL;
1857        cm_id->rem_ref(cm_id);
1858err:
1859        put_ep(&ep->com);
1860        return err;
1861}
1862
1863static int is_loopback_dst(struct iw_cm_id *cm_id)
1864{
1865        struct net_device *dev;
1866        struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
1867
1868        dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr);
1869        if (!dev)
1870                return 0;
1871        dev_put(dev);
1872        return 1;
1873}
1874
1875int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1876{
1877        struct iwch_dev *h = to_iwch_dev(cm_id->device);
1878        struct iwch_ep *ep;
1879        struct rtable *rt;
1880        int err = 0;
1881        struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
1882        struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
1883
1884        if (cm_id->m_remote_addr.ss_family != PF_INET) {
1885                err = -ENOSYS;
1886                goto out;
1887        }
1888
1889        if (is_loopback_dst(cm_id)) {
1890                err = -ENOSYS;
1891                goto out;
1892        }
1893
1894        ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
1895        if (!ep) {
1896                pr_err("%s - cannot alloc ep\n", __func__);
1897                err = -ENOMEM;
1898                goto out;
1899        }
1900        timer_setup(&ep->timer, ep_timeout, 0);
1901        ep->plen = conn_param->private_data_len;
1902        if (ep->plen)
1903                memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
1904                       conn_param->private_data, ep->plen);
1905        ep->ird = conn_param->ird;
1906        ep->ord = conn_param->ord;
1907
1908        if (peer2peer && ep->ord == 0)
1909                ep->ord = 1;
1910
1911        ep->com.tdev = h->rdev.t3cdev_p;
1912
1913        cm_id->add_ref(cm_id);
1914        ep->com.cm_id = cm_id;
1915        ep->com.qp = get_qhp(h, conn_param->qpn);
1916        BUG_ON(!ep->com.qp);
1917        pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
1918                 ep->com.qp, cm_id);
1919
1920        /*
1921         * Allocate an active TID to initiate a TCP connection.
1922         */
1923        ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep);
1924        if (ep->atid == -1) {
1925                pr_err("%s - cannot alloc atid\n", __func__);
1926                err = -ENOMEM;
1927                goto fail2;
1928        }
1929
1930        /* find a route */
1931        rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr,
1932                        raddr->sin_addr.s_addr, laddr->sin_port,
1933                        raddr->sin_port, IPTOS_LOWDELAY);
1934        if (!rt) {
1935                pr_err("%s - cannot find route\n", __func__);
1936                err = -EHOSTUNREACH;
1937                goto fail3;
1938        }
1939        ep->dst = &rt->dst;
1940        ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL,
1941                             &raddr->sin_addr.s_addr);
1942        if (!ep->l2t) {
1943                pr_err("%s - cannot alloc l2e\n", __func__);
1944                err = -ENOMEM;
1945                goto fail4;
1946        }
1947
1948        state_set(&ep->com, CONNECTING);
1949        ep->tos = IPTOS_LOWDELAY;
1950        memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
1951               sizeof(ep->com.local_addr));
1952        memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
1953               sizeof(ep->com.remote_addr));
1954
1955        /* send connect request to rnic */
1956        err = send_connect(ep);
1957        if (!err)
1958                goto out;
1959
1960        l2t_release(h->rdev.t3cdev_p, ep->l2t);
1961fail4:
1962        dst_release(ep->dst);
1963fail3:
1964        cxgb3_free_atid(ep->com.tdev, ep->atid);
1965fail2:
1966        cm_id->rem_ref(cm_id);
1967        put_ep(&ep->com);
1968out:
1969        return err;
1970}
1971
1972int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
1973{
1974        int err = 0;
1975        struct iwch_dev *h = to_iwch_dev(cm_id->device);
1976        struct iwch_listen_ep *ep;
1977
1978
1979        might_sleep();
1980
1981        if (cm_id->m_local_addr.ss_family != PF_INET) {
1982                err = -ENOSYS;
1983                goto fail1;
1984        }
1985
1986        ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
1987        if (!ep) {
1988                pr_err("%s - cannot alloc ep\n", __func__);
1989                err = -ENOMEM;
1990                goto fail1;
1991        }
1992        pr_debug("%s ep %p\n", __func__, ep);
1993        ep->com.tdev = h->rdev.t3cdev_p;
1994        cm_id->add_ref(cm_id);
1995        ep->com.cm_id = cm_id;
1996        ep->backlog = backlog;
1997        memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
1998               sizeof(ep->com.local_addr));
1999
2000        /*
2001         * Allocate a server TID.
2002         */
2003        ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep);
2004        if (ep->stid == -1) {
2005                pr_err("%s - cannot alloc atid\n", __func__);
2006                err = -ENOMEM;
2007                goto fail2;
2008        }
2009
2010        state_set(&ep->com, LISTEN);
2011        err = listen_start(ep);
2012        if (err)
2013                goto fail3;
2014
2015        /* wait for pass_open_rpl */
2016        wait_event(ep->com.waitq, ep->com.rpl_done);
2017        err = ep->com.rpl_err;
2018        if (!err) {
2019                cm_id->provider_data = ep;
2020                goto out;
2021        }
2022fail3:
2023        cxgb3_free_stid(ep->com.tdev, ep->stid);
2024fail2:
2025        cm_id->rem_ref(cm_id);
2026        put_ep(&ep->com);
2027fail1:
2028out:
2029        return err;
2030}
2031
2032int iwch_destroy_listen(struct iw_cm_id *cm_id)
2033{
2034        int err;
2035        struct iwch_listen_ep *ep = to_listen_ep(cm_id);
2036
2037        pr_debug("%s ep %p\n", __func__, ep);
2038
2039        might_sleep();
2040        state_set(&ep->com, DEAD);
2041        ep->com.rpl_done = 0;
2042        ep->com.rpl_err = 0;
2043        err = listen_stop(ep);
2044        if (err)
2045                goto done;
2046        wait_event(ep->com.waitq, ep->com.rpl_done);
2047        cxgb3_free_stid(ep->com.tdev, ep->stid);
2048done:
2049        err = ep->com.rpl_err;
2050        cm_id->rem_ref(cm_id);
2051        put_ep(&ep->com);
2052        return err;
2053}
2054
2055int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
2056{
2057        int ret=0;
2058        unsigned long flags;
2059        int close = 0;
2060        int fatal = 0;
2061        struct t3cdev *tdev;
2062        struct cxio_rdev *rdev;
2063
2064        spin_lock_irqsave(&ep->com.lock, flags);
2065
2066        pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep,
2067                 states[ep->com.state], abrupt);
2068
2069        tdev = (struct t3cdev *)ep->com.tdev;
2070        rdev = (struct cxio_rdev *)tdev->ulp;
2071        if (cxio_fatal_error(rdev)) {
2072                fatal = 1;
2073                close_complete_upcall(ep);
2074                ep->com.state = DEAD;
2075        }
2076        switch (ep->com.state) {
2077        case MPA_REQ_WAIT:
2078        case MPA_REQ_SENT:
2079        case MPA_REQ_RCVD:
2080        case MPA_REP_SENT:
2081        case FPDU_MODE:
2082                close = 1;
2083                if (abrupt)
2084                        ep->com.state = ABORTING;
2085                else {
2086                        ep->com.state = CLOSING;
2087                        start_ep_timer(ep);
2088                }
2089                set_bit(CLOSE_SENT, &ep->com.flags);
2090                break;
2091        case CLOSING:
2092                if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
2093                        close = 1;
2094                        if (abrupt) {
2095                                stop_ep_timer(ep);
2096                                ep->com.state = ABORTING;
2097                        } else
2098                                ep->com.state = MORIBUND;
2099                }
2100                break;
2101        case MORIBUND:
2102        case ABORTING:
2103        case DEAD:
2104                pr_debug("%s ignoring disconnect ep %p state %u\n",
2105                         __func__, ep, ep->com.state);
2106                break;
2107        default:
2108                BUG();
2109                break;
2110        }
2111
2112        spin_unlock_irqrestore(&ep->com.lock, flags);
2113        if (close) {
2114                if (abrupt)
2115                        ret = send_abort(ep, NULL, gfp);
2116                else
2117                        ret = send_halfclose(ep, gfp);
2118                if (ret)
2119                        fatal = 1;
2120        }
2121        if (fatal)
2122                release_ep_resources(ep);
2123        return ret;
2124}
2125
2126int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
2127                     struct l2t_entry *l2t)
2128{
2129        struct iwch_ep *ep = ctx;
2130
2131        if (ep->dst != old)
2132                return 0;
2133
2134        pr_debug("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
2135                 l2t);
2136        dst_hold(new);
2137        l2t_release(ep->com.tdev, ep->l2t);
2138        ep->l2t = l2t;
2139        dst_release(old);
2140        ep->dst = new;
2141        return 1;
2142}
2143
2144/*
2145 * All the CM events are handled on a work queue to have a safe context.
2146 * These are the real handlers that are called from the work queue.
2147 */
2148static const cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS] = {
2149        [CPL_ACT_ESTABLISH]     = act_establish,
2150        [CPL_ACT_OPEN_RPL]      = act_open_rpl,
2151        [CPL_RX_DATA]           = rx_data,
2152        [CPL_TX_DMA_ACK]        = tx_ack,
2153        [CPL_ABORT_RPL_RSS]     = abort_rpl,
2154        [CPL_ABORT_RPL]         = abort_rpl,
2155        [CPL_PASS_OPEN_RPL]     = pass_open_rpl,
2156        [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
2157        [CPL_PASS_ACCEPT_REQ]   = pass_accept_req,
2158        [CPL_PASS_ESTABLISH]    = pass_establish,
2159        [CPL_PEER_CLOSE]        = peer_close,
2160        [CPL_ABORT_REQ_RSS]     = peer_abort,
2161        [CPL_CLOSE_CON_RPL]     = close_con_rpl,
2162        [CPL_RDMA_TERMINATE]    = terminate,
2163        [CPL_RDMA_EC_STATUS]    = ec_status,
2164};
2165
2166static void process_work(struct work_struct *work)
2167{
2168        struct sk_buff *skb = NULL;
2169        void *ep;
2170        struct t3cdev *tdev;
2171        int ret;
2172
2173        while ((skb = skb_dequeue(&rxq))) {
2174                ep = *((void **) (skb->cb));
2175                tdev = *((struct t3cdev **) (skb->cb + sizeof(void *)));
2176                ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep);
2177                if (ret & CPL_RET_BUF_DONE)
2178                        kfree_skb(skb);
2179
2180                /*
2181                 * ep was referenced in sched(), and is freed here.
2182                 */
2183                put_ep((struct iwch_ep_common *)ep);
2184        }
2185}
2186
2187static DECLARE_WORK(skb_work, process_work);
2188
2189static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
2190{
2191        struct iwch_ep_common *epc = ctx;
2192
2193        get_ep(epc);
2194
2195        /*
2196         * Save ctx and tdev in the skb->cb area.
2197         */
2198        *((void **) skb->cb) = ctx;
2199        *((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev;
2200
2201        /*
2202         * Queue the skb and schedule the worker thread.
2203         */
2204        skb_queue_tail(&rxq, skb);
2205        queue_work(workq, &skb_work);
2206        return 0;
2207}
2208
2209static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
2210{
2211        struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
2212
2213        if (rpl->status != CPL_ERR_NONE) {
2214                pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
2215                       rpl->status, GET_TID(rpl));
2216        }
2217        return CPL_RET_BUF_DONE;
2218}
2219
2220/*
2221 * All upcalls from the T3 Core go to sched() to schedule the
2222 * processing on a work queue.
2223 */
2224cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS] = {
2225        [CPL_ACT_ESTABLISH]     = sched,
2226        [CPL_ACT_OPEN_RPL]      = sched,
2227        [CPL_RX_DATA]           = sched,
2228        [CPL_TX_DMA_ACK]        = sched,
2229        [CPL_ABORT_RPL_RSS]     = sched,
2230        [CPL_ABORT_RPL]         = sched,
2231        [CPL_PASS_OPEN_RPL]     = sched,
2232        [CPL_CLOSE_LISTSRV_RPL] = sched,
2233        [CPL_PASS_ACCEPT_REQ]   = sched,
2234        [CPL_PASS_ESTABLISH]    = sched,
2235        [CPL_PEER_CLOSE]        = sched,
2236        [CPL_CLOSE_CON_RPL]     = sched,
2237        [CPL_ABORT_REQ_RSS]     = sched,
2238        [CPL_RDMA_TERMINATE]    = sched,
2239        [CPL_RDMA_EC_STATUS]    = sched,
2240        [CPL_SET_TCB_RPL]       = set_tcb_rpl,
2241};
2242
2243int __init iwch_cm_init(void)
2244{
2245        skb_queue_head_init(&rxq);
2246
2247        workq = alloc_ordered_workqueue("iw_cxgb3", WQ_MEM_RECLAIM);
2248        if (!workq)
2249                return -ENOMEM;
2250
2251        return 0;
2252}
2253
2254void __exit iwch_cm_term(void)
2255{
2256        flush_workqueue(workq);
2257        destroy_workqueue(workq);
2258}
2259