linux/drivers/infiniband/hw/cxgb3/iwch_cm.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/module.h>
  33#include <linux/list.h>
  34#include <linux/slab.h>
  35#include <linux/workqueue.h>
  36#include <linux/skbuff.h>
  37#include <linux/timer.h>
  38#include <linux/notifier.h>
  39#include <linux/inetdevice.h>
  40
  41#include <net/neighbour.h>
  42#include <net/netevent.h>
  43#include <net/route.h>
  44
  45#include "tcb.h"
  46#include "cxgb3_offload.h"
  47#include "iwch.h"
  48#include "iwch_provider.h"
  49#include "iwch_cm.h"
  50
  51static char *states[] = {
  52        "idle",
  53        "listen",
  54        "connecting",
  55        "mpa_wait_req",
  56        "mpa_req_sent",
  57        "mpa_req_rcvd",
  58        "mpa_rep_sent",
  59        "fpdu_mode",
  60        "aborting",
  61        "closing",
  62        "moribund",
  63        "dead",
  64        NULL,
  65};
  66
  67int peer2peer = 0;
  68module_param(peer2peer, int, 0644);
  69MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
  70
  71static int ep_timeout_secs = 60;
  72module_param(ep_timeout_secs, int, 0644);
  73MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
  74                                   "in seconds (default=60)");
  75
  76static int mpa_rev = 1;
  77module_param(mpa_rev, int, 0644);
  78MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
  79                 "1 is spec compliant. (default=1)");
  80
  81static int markers_enabled = 0;
  82module_param(markers_enabled, int, 0644);
  83MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
  84
  85static int crc_enabled = 1;
  86module_param(crc_enabled, int, 0644);
  87MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
  88
  89static int rcv_win = 256 * 1024;
  90module_param(rcv_win, int, 0644);
  91MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)");
  92
  93static int snd_win = 32 * 1024;
  94module_param(snd_win, int, 0644);
  95MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)");
  96
  97static unsigned int nocong = 0;
  98module_param(nocong, uint, 0644);
  99MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)");
 100
 101static unsigned int cong_flavor = 1;
 102module_param(cong_flavor, uint, 0644);
 103MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)");
 104
 105static struct workqueue_struct *workq;
 106
 107static struct sk_buff_head rxq;
 108
 109static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
 110static void ep_timeout(unsigned long arg);
 111static void connect_reply_upcall(struct iwch_ep *ep, int status);
 112
 113static void start_ep_timer(struct iwch_ep *ep)
 114{
 115        pr_debug("%s ep %p\n", __func__, ep);
 116        if (timer_pending(&ep->timer)) {
 117                pr_debug("%s stopped / restarted timer ep %p\n", __func__, ep);
 118                del_timer_sync(&ep->timer);
 119        } else
 120                get_ep(&ep->com);
 121        ep->timer.expires = jiffies + ep_timeout_secs * HZ;
 122        ep->timer.data = (unsigned long)ep;
 123        ep->timer.function = ep_timeout;
 124        add_timer(&ep->timer);
 125}
 126
 127static void stop_ep_timer(struct iwch_ep *ep)
 128{
 129        pr_debug("%s ep %p\n", __func__, ep);
 130        if (!timer_pending(&ep->timer)) {
 131                WARN(1, "%s timer stopped when its not running!  ep %p state %u\n",
 132                        __func__, ep, ep->com.state);
 133                return;
 134        }
 135        del_timer_sync(&ep->timer);
 136        put_ep(&ep->com);
 137}
 138
 139static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e)
 140{
 141        int     error = 0;
 142        struct cxio_rdev *rdev;
 143
 144        rdev = (struct cxio_rdev *)tdev->ulp;
 145        if (cxio_fatal_error(rdev)) {
 146                kfree_skb(skb);
 147                return -EIO;
 148        }
 149        error = l2t_send(tdev, skb, l2e);
 150        if (error < 0)
 151                kfree_skb(skb);
 152        return error < 0 ? error : 0;
 153}
 154
 155int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
 156{
 157        int     error = 0;
 158        struct cxio_rdev *rdev;
 159
 160        rdev = (struct cxio_rdev *)tdev->ulp;
 161        if (cxio_fatal_error(rdev)) {
 162                kfree_skb(skb);
 163                return -EIO;
 164        }
 165        error = cxgb3_ofld_send(tdev, skb);
 166        if (error < 0)
 167                kfree_skb(skb);
 168        return error < 0 ? error : 0;
 169}
 170
 171static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
 172{
 173        struct cpl_tid_release *req;
 174
 175        skb = get_skb(skb, sizeof *req, GFP_KERNEL);
 176        if (!skb)
 177                return;
 178        req = skb_put(skb, sizeof(*req));
 179        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 180        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
 181        skb->priority = CPL_PRIORITY_SETUP;
 182        iwch_cxgb3_ofld_send(tdev, skb);
 183        return;
 184}
 185
 186int iwch_quiesce_tid(struct iwch_ep *ep)
 187{
 188        struct cpl_set_tcb_field *req;
 189        struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 190
 191        if (!skb)
 192                return -ENOMEM;
 193        req = skb_put(skb, sizeof(*req));
 194        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 195        req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 196        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
 197        req->reply = 0;
 198        req->cpu_idx = 0;
 199        req->word = htons(W_TCB_RX_QUIESCE);
 200        req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
 201        req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
 202
 203        skb->priority = CPL_PRIORITY_DATA;
 204        return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
 205}
 206
 207int iwch_resume_tid(struct iwch_ep *ep)
 208{
 209        struct cpl_set_tcb_field *req;
 210        struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 211
 212        if (!skb)
 213                return -ENOMEM;
 214        req = skb_put(skb, sizeof(*req));
 215        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 216        req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 217        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
 218        req->reply = 0;
 219        req->cpu_idx = 0;
 220        req->word = htons(W_TCB_RX_QUIESCE);
 221        req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
 222        req->val = 0;
 223
 224        skb->priority = CPL_PRIORITY_DATA;
 225        return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
 226}
 227
 228static void set_emss(struct iwch_ep *ep, u16 opt)
 229{
 230        pr_debug("%s ep %p opt %u\n", __func__, ep, opt);
 231        ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40;
 232        if (G_TCPOPT_TSTAMP(opt))
 233                ep->emss -= 12;
 234        if (ep->emss < 128)
 235                ep->emss = 128;
 236        pr_debug("emss=%d\n", ep->emss);
 237}
 238
 239static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
 240{
 241        unsigned long flags;
 242        enum iwch_ep_state state;
 243
 244        spin_lock_irqsave(&epc->lock, flags);
 245        state = epc->state;
 246        spin_unlock_irqrestore(&epc->lock, flags);
 247        return state;
 248}
 249
 250static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
 251{
 252        epc->state = new;
 253}
 254
 255static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
 256{
 257        unsigned long flags;
 258
 259        spin_lock_irqsave(&epc->lock, flags);
 260        pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
 261        __state_set(epc, new);
 262        spin_unlock_irqrestore(&epc->lock, flags);
 263        return;
 264}
 265
 266static void *alloc_ep(int size, gfp_t gfp)
 267{
 268        struct iwch_ep_common *epc;
 269
 270        epc = kzalloc(size, gfp);
 271        if (epc) {
 272                kref_init(&epc->kref);
 273                spin_lock_init(&epc->lock);
 274                init_waitqueue_head(&epc->waitq);
 275        }
 276        pr_debug("%s alloc ep %p\n", __func__, epc);
 277        return epc;
 278}
 279
 280void __free_ep(struct kref *kref)
 281{
 282        struct iwch_ep *ep;
 283        ep = container_of(container_of(kref, struct iwch_ep_common, kref),
 284                          struct iwch_ep, com);
 285        pr_debug("%s ep %p state %s\n",
 286                 __func__, ep, states[state_read(&ep->com)]);
 287        if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
 288                cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
 289                dst_release(ep->dst);
 290                l2t_release(ep->com.tdev, ep->l2t);
 291        }
 292        kfree(ep);
 293}
 294
 295static void release_ep_resources(struct iwch_ep *ep)
 296{
 297        pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
 298        set_bit(RELEASE_RESOURCES, &ep->com.flags);
 299        put_ep(&ep->com);
 300}
 301
 302static int status2errno(int status)
 303{
 304        switch (status) {
 305        case CPL_ERR_NONE:
 306                return 0;
 307        case CPL_ERR_CONN_RESET:
 308                return -ECONNRESET;
 309        case CPL_ERR_ARP_MISS:
 310                return -EHOSTUNREACH;
 311        case CPL_ERR_CONN_TIMEDOUT:
 312                return -ETIMEDOUT;
 313        case CPL_ERR_TCAM_FULL:
 314                return -ENOMEM;
 315        case CPL_ERR_CONN_EXIST:
 316                return -EADDRINUSE;
 317        default:
 318                return -EIO;
 319        }
 320}
 321
 322/*
 323 * Try and reuse skbs already allocated...
 324 */
 325static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
 326{
 327        if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
 328                skb_trim(skb, 0);
 329                skb_get(skb);
 330        } else {
 331                skb = alloc_skb(len, gfp);
 332        }
 333        return skb;
 334}
 335
 336static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip,
 337                                 __be32 peer_ip, __be16 local_port,
 338                                 __be16 peer_port, u8 tos)
 339{
 340        struct rtable *rt;
 341        struct flowi4 fl4;
 342
 343        rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
 344                                   peer_port, local_port, IPPROTO_TCP,
 345                                   tos, 0);
 346        if (IS_ERR(rt))
 347                return NULL;
 348        return rt;
 349}
 350
 351static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu)
 352{
 353        int i = 0;
 354
 355        while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
 356                ++i;
 357        return i;
 358}
 359
 360static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
 361{
 362        pr_debug("%s t3cdev %p\n", __func__, dev);
 363        kfree_skb(skb);
 364}
 365
 366/*
 367 * Handle an ARP failure for an active open.
 368 */
 369static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
 370{
 371        pr_err("ARP failure during connect\n");
 372        kfree_skb(skb);
 373}
 374
 375/*
 376 * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
 377 * and send it along.
 378 */
 379static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
 380{
 381        struct cpl_abort_req *req = cplhdr(skb);
 382
 383        pr_debug("%s t3cdev %p\n", __func__, dev);
 384        req->cmd = CPL_ABORT_NO_RST;
 385        iwch_cxgb3_ofld_send(dev, skb);
 386}
 387
 388static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
 389{
 390        struct cpl_close_con_req *req;
 391        struct sk_buff *skb;
 392
 393        pr_debug("%s ep %p\n", __func__, ep);
 394        skb = get_skb(NULL, sizeof(*req), gfp);
 395        if (!skb) {
 396                pr_err("%s - failed to alloc skb\n", __func__);
 397                return -ENOMEM;
 398        }
 399        skb->priority = CPL_PRIORITY_DATA;
 400        set_arp_failure_handler(skb, arp_failure_discard);
 401        req = skb_put(skb, sizeof(*req));
 402        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
 403        req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 404        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid));
 405        return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 406}
 407
 408static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
 409{
 410        struct cpl_abort_req *req;
 411
 412        pr_debug("%s ep %p\n", __func__, ep);
 413        skb = get_skb(skb, sizeof(*req), gfp);
 414        if (!skb) {
 415                pr_err("%s - failed to alloc skb\n", __func__);
 416                return -ENOMEM;
 417        }
 418        skb->priority = CPL_PRIORITY_DATA;
 419        set_arp_failure_handler(skb, abort_arp_failure);
 420        req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req));
 421        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
 422        req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 423        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
 424        req->cmd = CPL_ABORT_SEND_RST;
 425        return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 426}
 427
 428static int send_connect(struct iwch_ep *ep)
 429{
 430        struct cpl_act_open_req *req;
 431        struct sk_buff *skb;
 432        u32 opt0h, opt0l, opt2;
 433        unsigned int mtu_idx;
 434        int wscale;
 435
 436        pr_debug("%s ep %p\n", __func__, ep);
 437
 438        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 439        if (!skb) {
 440                pr_err("%s - failed to alloc skb\n", __func__);
 441                return -ENOMEM;
 442        }
 443        mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
 444        wscale = compute_wscale(rcv_win);
 445        opt0h = V_NAGLE(0) |
 446            V_NO_CONG(nocong) |
 447            V_KEEP_ALIVE(1) |
 448            F_TCAM_BYPASS |
 449            V_WND_SCALE(wscale) |
 450            V_MSS_IDX(mtu_idx) |
 451            V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
 452        opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
 453        opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) |
 454               V_CONG_CONTROL_FLAVOR(cong_flavor);
 455        skb->priority = CPL_PRIORITY_SETUP;
 456        set_arp_failure_handler(skb, act_open_req_arp_failure);
 457
 458        req = skb_put(skb, sizeof(*req));
 459        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 460        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid));
 461        req->local_port = ep->com.local_addr.sin_port;
 462        req->peer_port = ep->com.remote_addr.sin_port;
 463        req->local_ip = ep->com.local_addr.sin_addr.s_addr;
 464        req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
 465        req->opt0h = htonl(opt0h);
 466        req->opt0l = htonl(opt0l);
 467        req->params = 0;
 468        req->opt2 = htonl(opt2);
 469        return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 470}
 471
 472static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
 473{
 474        int mpalen;
 475        struct tx_data_wr *req;
 476        struct mpa_message *mpa;
 477        int len;
 478
 479        pr_debug("%s ep %p pd_len %d\n", __func__, ep, ep->plen);
 480
 481        BUG_ON(skb_cloned(skb));
 482
 483        mpalen = sizeof(*mpa) + ep->plen;
 484        if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) {
 485                kfree_skb(skb);
 486                skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
 487                if (!skb) {
 488                        connect_reply_upcall(ep, -ENOMEM);
 489                        return;
 490                }
 491        }
 492        skb_trim(skb, 0);
 493        skb_reserve(skb, sizeof(*req));
 494        skb_put(skb, mpalen);
 495        skb->priority = CPL_PRIORITY_DATA;
 496        mpa = (struct mpa_message *) skb->data;
 497        memset(mpa, 0, sizeof(*mpa));
 498        memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
 499        mpa->flags = (crc_enabled ? MPA_CRC : 0) |
 500                     (markers_enabled ? MPA_MARKERS : 0);
 501        mpa->private_data_size = htons(ep->plen);
 502        mpa->revision = mpa_rev;
 503
 504        if (ep->plen)
 505                memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
 506
 507        /*
 508         * Reference the mpa skb.  This ensures the data area
 509         * will remain in memory until the hw acks the tx.
 510         * Function tx_ack() will deref it.
 511         */
 512        skb_get(skb);
 513        set_arp_failure_handler(skb, arp_failure_discard);
 514        skb_reset_transport_header(skb);
 515        len = skb->len;
 516        req = skb_push(skb, sizeof(*req));
 517        req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
 518        req->wr_lo = htonl(V_WR_TID(ep->hwtid));
 519        req->len = htonl(len);
 520        req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
 521                           V_TX_SNDBUF(snd_win>>15));
 522        req->flags = htonl(F_TX_INIT);
 523        req->sndseq = htonl(ep->snd_seq);
 524        BUG_ON(ep->mpa_skb);
 525        ep->mpa_skb = skb;
 526        iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 527        start_ep_timer(ep);
 528        state_set(&ep->com, MPA_REQ_SENT);
 529        return;
 530}
 531
 532static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
 533{
 534        int mpalen;
 535        struct tx_data_wr *req;
 536        struct mpa_message *mpa;
 537        struct sk_buff *skb;
 538
 539        pr_debug("%s ep %p plen %d\n", __func__, ep, plen);
 540
 541        mpalen = sizeof(*mpa) + plen;
 542
 543        skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
 544        if (!skb) {
 545                pr_err("%s - cannot alloc skb!\n", __func__);
 546                return -ENOMEM;
 547        }
 548        skb_reserve(skb, sizeof(*req));
 549        mpa = skb_put(skb, mpalen);
 550        memset(mpa, 0, sizeof(*mpa));
 551        memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
 552        mpa->flags = MPA_REJECT;
 553        mpa->revision = mpa_rev;
 554        mpa->private_data_size = htons(plen);
 555        if (plen)
 556                memcpy(mpa->private_data, pdata, plen);
 557
 558        /*
 559         * Reference the mpa skb again.  This ensures the data area
 560         * will remain in memory until the hw acks the tx.
 561         * Function tx_ack() will deref it.
 562         */
 563        skb_get(skb);
 564        skb->priority = CPL_PRIORITY_DATA;
 565        set_arp_failure_handler(skb, arp_failure_discard);
 566        skb_reset_transport_header(skb);
 567        req = skb_push(skb, sizeof(*req));
 568        req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
 569        req->wr_lo = htonl(V_WR_TID(ep->hwtid));
 570        req->len = htonl(mpalen);
 571        req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
 572                           V_TX_SNDBUF(snd_win>>15));
 573        req->flags = htonl(F_TX_INIT);
 574        req->sndseq = htonl(ep->snd_seq);
 575        BUG_ON(ep->mpa_skb);
 576        ep->mpa_skb = skb;
 577        return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 578}
 579
 580static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
 581{
 582        int mpalen;
 583        struct tx_data_wr *req;
 584        struct mpa_message *mpa;
 585        int len;
 586        struct sk_buff *skb;
 587
 588        pr_debug("%s ep %p plen %d\n", __func__, ep, plen);
 589
 590        mpalen = sizeof(*mpa) + plen;
 591
 592        skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
 593        if (!skb) {
 594                pr_err("%s - cannot alloc skb!\n", __func__);
 595                return -ENOMEM;
 596        }
 597        skb->priority = CPL_PRIORITY_DATA;
 598        skb_reserve(skb, sizeof(*req));
 599        mpa = skb_put(skb, mpalen);
 600        memset(mpa, 0, sizeof(*mpa));
 601        memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
 602        mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
 603                     (markers_enabled ? MPA_MARKERS : 0);
 604        mpa->revision = mpa_rev;
 605        mpa->private_data_size = htons(plen);
 606        if (plen)
 607                memcpy(mpa->private_data, pdata, plen);
 608
 609        /*
 610         * Reference the mpa skb.  This ensures the data area
 611         * will remain in memory until the hw acks the tx.
 612         * Function tx_ack() will deref it.
 613         */
 614        skb_get(skb);
 615        set_arp_failure_handler(skb, arp_failure_discard);
 616        skb_reset_transport_header(skb);
 617        len = skb->len;
 618        req = skb_push(skb, sizeof(*req));
 619        req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
 620        req->wr_lo = htonl(V_WR_TID(ep->hwtid));
 621        req->len = htonl(len);
 622        req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
 623                           V_TX_SNDBUF(snd_win>>15));
 624        req->flags = htonl(F_TX_INIT);
 625        req->sndseq = htonl(ep->snd_seq);
 626        ep->mpa_skb = skb;
 627        state_set(&ep->com, MPA_REP_SENT);
 628        return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 629}
 630
 631static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 632{
 633        struct iwch_ep *ep = ctx;
 634        struct cpl_act_establish *req = cplhdr(skb);
 635        unsigned int tid = GET_TID(req);
 636
 637        pr_debug("%s ep %p tid %d\n", __func__, ep, tid);
 638
 639        dst_confirm(ep->dst);
 640
 641        /* setup the hwtid for this connection */
 642        ep->hwtid = tid;
 643        cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
 644
 645        ep->snd_seq = ntohl(req->snd_isn);
 646        ep->rcv_seq = ntohl(req->rcv_isn);
 647
 648        set_emss(ep, ntohs(req->tcp_opt));
 649
 650        /* dealloc the atid */
 651        cxgb3_free_atid(ep->com.tdev, ep->atid);
 652
 653        /* start MPA negotiation */
 654        send_mpa_req(ep, skb);
 655
 656        return 0;
 657}
 658
 659static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
 660{
 661        pr_debug("%s ep %p\n", __FILE__, ep);
 662        state_set(&ep->com, ABORTING);
 663        send_abort(ep, skb, gfp);
 664}
 665
 666static void close_complete_upcall(struct iwch_ep *ep)
 667{
 668        struct iw_cm_event event;
 669
 670        pr_debug("%s ep %p\n", __func__, ep);
 671        memset(&event, 0, sizeof(event));
 672        event.event = IW_CM_EVENT_CLOSE;
 673        if (ep->com.cm_id) {
 674                pr_debug("close complete delivered ep %p cm_id %p tid %d\n",
 675                         ep, ep->com.cm_id, ep->hwtid);
 676                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 677                ep->com.cm_id->rem_ref(ep->com.cm_id);
 678                ep->com.cm_id = NULL;
 679                ep->com.qp = NULL;
 680        }
 681}
 682
 683static void peer_close_upcall(struct iwch_ep *ep)
 684{
 685        struct iw_cm_event event;
 686
 687        pr_debug("%s ep %p\n", __func__, ep);
 688        memset(&event, 0, sizeof(event));
 689        event.event = IW_CM_EVENT_DISCONNECT;
 690        if (ep->com.cm_id) {
 691                pr_debug("peer close delivered ep %p cm_id %p tid %d\n",
 692                         ep, ep->com.cm_id, ep->hwtid);
 693                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 694        }
 695}
 696
 697static void peer_abort_upcall(struct iwch_ep *ep)
 698{
 699        struct iw_cm_event event;
 700
 701        pr_debug("%s ep %p\n", __func__, ep);
 702        memset(&event, 0, sizeof(event));
 703        event.event = IW_CM_EVENT_CLOSE;
 704        event.status = -ECONNRESET;
 705        if (ep->com.cm_id) {
 706                pr_debug("abort delivered ep %p cm_id %p tid %d\n", ep,
 707                         ep->com.cm_id, ep->hwtid);
 708                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 709                ep->com.cm_id->rem_ref(ep->com.cm_id);
 710                ep->com.cm_id = NULL;
 711                ep->com.qp = NULL;
 712        }
 713}
 714
 715static void connect_reply_upcall(struct iwch_ep *ep, int status)
 716{
 717        struct iw_cm_event event;
 718
 719        pr_debug("%s ep %p status %d\n", __func__, ep, status);
 720        memset(&event, 0, sizeof(event));
 721        event.event = IW_CM_EVENT_CONNECT_REPLY;
 722        event.status = status;
 723        memcpy(&event.local_addr, &ep->com.local_addr,
 724               sizeof(ep->com.local_addr));
 725        memcpy(&event.remote_addr, &ep->com.remote_addr,
 726               sizeof(ep->com.remote_addr));
 727
 728        if ((status == 0) || (status == -ECONNREFUSED)) {
 729                event.private_data_len = ep->plen;
 730                event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
 731        }
 732        if (ep->com.cm_id) {
 733                pr_debug("%s ep %p tid %d status %d\n", __func__, ep,
 734                         ep->hwtid, status);
 735                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 736        }
 737        if (status < 0) {
 738                ep->com.cm_id->rem_ref(ep->com.cm_id);
 739                ep->com.cm_id = NULL;
 740                ep->com.qp = NULL;
 741        }
 742}
 743
 744static void connect_request_upcall(struct iwch_ep *ep)
 745{
 746        struct iw_cm_event event;
 747
 748        pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
 749        memset(&event, 0, sizeof(event));
 750        event.event = IW_CM_EVENT_CONNECT_REQUEST;
 751        memcpy(&event.local_addr, &ep->com.local_addr,
 752               sizeof(ep->com.local_addr));
 753        memcpy(&event.remote_addr, &ep->com.remote_addr,
 754               sizeof(ep->com.local_addr));
 755        event.private_data_len = ep->plen;
 756        event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
 757        event.provider_data = ep;
 758        /*
 759         * Until ird/ord negotiation via MPAv2 support is added, send max
 760         * supported values
 761         */
 762        event.ird = event.ord = 8;
 763        if (state_read(&ep->parent_ep->com) != DEAD) {
 764                get_ep(&ep->com);
 765                ep->parent_ep->com.cm_id->event_handler(
 766                                                ep->parent_ep->com.cm_id,
 767                                                &event);
 768        }
 769        put_ep(&ep->parent_ep->com);
 770        ep->parent_ep = NULL;
 771}
 772
 773static void established_upcall(struct iwch_ep *ep)
 774{
 775        struct iw_cm_event event;
 776
 777        pr_debug("%s ep %p\n", __func__, ep);
 778        memset(&event, 0, sizeof(event));
 779        event.event = IW_CM_EVENT_ESTABLISHED;
 780        /*
 781         * Until ird/ord negotiation via MPAv2 support is added, send max
 782         * supported values
 783         */
 784        event.ird = event.ord = 8;
 785        if (ep->com.cm_id) {
 786                pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
 787                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 788        }
 789}
 790
 791static int update_rx_credits(struct iwch_ep *ep, u32 credits)
 792{
 793        struct cpl_rx_data_ack *req;
 794        struct sk_buff *skb;
 795
 796        pr_debug("%s ep %p credits %u\n", __func__, ep, credits);
 797        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 798        if (!skb) {
 799                pr_err("update_rx_credits - cannot alloc skb!\n");
 800                return 0;
 801        }
 802
 803        req = skb_put(skb, sizeof(*req));
 804        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 805        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
 806        req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
 807        skb->priority = CPL_PRIORITY_ACK;
 808        iwch_cxgb3_ofld_send(ep->com.tdev, skb);
 809        return credits;
 810}
 811
 812static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
 813{
 814        struct mpa_message *mpa;
 815        u16 plen;
 816        struct iwch_qp_attributes attrs;
 817        enum iwch_qp_attr_mask mask;
 818        int err;
 819
 820        pr_debug("%s ep %p\n", __func__, ep);
 821
 822        /*
 823         * Stop mpa timer.  If it expired, then the state has
 824         * changed and we bail since ep_timeout already aborted
 825         * the connection.
 826         */
 827        stop_ep_timer(ep);
 828        if (state_read(&ep->com) != MPA_REQ_SENT)
 829                return;
 830
 831        /*
 832         * If we get more than the supported amount of private data
 833         * then we must fail this connection.
 834         */
 835        if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
 836                err = -EINVAL;
 837                goto err;
 838        }
 839
 840        /*
 841         * copy the new data into our accumulation buffer.
 842         */
 843        skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
 844                                  skb->len);
 845        ep->mpa_pkt_len += skb->len;
 846
 847        /*
 848         * if we don't even have the mpa message, then bail.
 849         */
 850        if (ep->mpa_pkt_len < sizeof(*mpa))
 851                return;
 852        mpa = (struct mpa_message *) ep->mpa_pkt;
 853
 854        /* Validate MPA header. */
 855        if (mpa->revision != mpa_rev) {
 856                err = -EPROTO;
 857                goto err;
 858        }
 859        if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
 860                err = -EPROTO;
 861                goto err;
 862        }
 863
 864        plen = ntohs(mpa->private_data_size);
 865
 866        /*
 867         * Fail if there's too much private data.
 868         */
 869        if (plen > MPA_MAX_PRIVATE_DATA) {
 870                err = -EPROTO;
 871                goto err;
 872        }
 873
 874        /*
 875         * If plen does not account for pkt size
 876         */
 877        if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
 878                err = -EPROTO;
 879                goto err;
 880        }
 881
 882        ep->plen = (u8) plen;
 883
 884        /*
 885         * If we don't have all the pdata yet, then bail.
 886         * We'll continue process when more data arrives.
 887         */
 888        if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
 889                return;
 890
 891        if (mpa->flags & MPA_REJECT) {
 892                err = -ECONNREFUSED;
 893                goto err;
 894        }
 895
 896        /*
 897         * If we get here we have accumulated the entire mpa
 898         * start reply message including private data. And
 899         * the MPA header is valid.
 900         */
 901        state_set(&ep->com, FPDU_MODE);
 902        ep->mpa_attr.initiator = 1;
 903        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
 904        ep->mpa_attr.recv_marker_enabled = markers_enabled;
 905        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
 906        ep->mpa_attr.version = mpa_rev;
 907        pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n",
 908                 __func__,
 909                 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
 910                 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
 911
 912        attrs.mpa_attr = ep->mpa_attr;
 913        attrs.max_ird = ep->ird;
 914        attrs.max_ord = ep->ord;
 915        attrs.llp_stream_handle = ep;
 916        attrs.next_state = IWCH_QP_STATE_RTS;
 917
 918        mask = IWCH_QP_ATTR_NEXT_STATE |
 919            IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
 920            IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
 921
 922        /* bind QP and TID with INIT_WR */
 923        err = iwch_modify_qp(ep->com.qp->rhp,
 924                             ep->com.qp, mask, &attrs, 1);
 925        if (err)
 926                goto err;
 927
 928        if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) {
 929                iwch_post_zb_read(ep);
 930        }
 931
 932        goto out;
 933err:
 934        abort_connection(ep, skb, GFP_KERNEL);
 935out:
 936        connect_reply_upcall(ep, err);
 937        return;
 938}
 939
 940static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
 941{
 942        struct mpa_message *mpa;
 943        u16 plen;
 944
 945        pr_debug("%s ep %p\n", __func__, ep);
 946
 947        /*
 948         * Stop mpa timer.  If it expired, then the state has
 949         * changed and we bail since ep_timeout already aborted
 950         * the connection.
 951         */
 952        stop_ep_timer(ep);
 953        if (state_read(&ep->com) != MPA_REQ_WAIT)
 954                return;
 955
 956        /*
 957         * If we get more than the supported amount of private data
 958         * then we must fail this connection.
 959         */
 960        if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
 961                abort_connection(ep, skb, GFP_KERNEL);
 962                return;
 963        }
 964
 965        pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
 966
 967        /*
 968         * Copy the new data into our accumulation buffer.
 969         */
 970        skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
 971                                  skb->len);
 972        ep->mpa_pkt_len += skb->len;
 973
 974        /*
 975         * If we don't even have the mpa message, then bail.
 976         * We'll continue process when more data arrives.
 977         */
 978        if (ep->mpa_pkt_len < sizeof(*mpa))
 979                return;
 980        pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
 981        mpa = (struct mpa_message *) ep->mpa_pkt;
 982
 983        /*
 984         * Validate MPA Header.
 985         */
 986        if (mpa->revision != mpa_rev) {
 987                abort_connection(ep, skb, GFP_KERNEL);
 988                return;
 989        }
 990
 991        if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
 992                abort_connection(ep, skb, GFP_KERNEL);
 993                return;
 994        }
 995
 996        plen = ntohs(mpa->private_data_size);
 997
 998        /*
 999         * Fail if there's too much private data.
1000         */
1001        if (plen > MPA_MAX_PRIVATE_DATA) {
1002                abort_connection(ep, skb, GFP_KERNEL);
1003                return;
1004        }
1005
1006        /*
1007         * If plen does not account for pkt size
1008         */
1009        if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1010                abort_connection(ep, skb, GFP_KERNEL);
1011                return;
1012        }
1013        ep->plen = (u8) plen;
1014
1015        /*
1016         * If we don't have all the pdata yet, then bail.
1017         */
1018        if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1019                return;
1020
1021        /*
1022         * If we get here we have accumulated the entire mpa
1023         * start reply message including private data.
1024         */
1025        ep->mpa_attr.initiator = 0;
1026        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1027        ep->mpa_attr.recv_marker_enabled = markers_enabled;
1028        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1029        ep->mpa_attr.version = mpa_rev;
1030        pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n",
1031                 __func__,
1032                 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1033                 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
1034
1035        state_set(&ep->com, MPA_REQ_RCVD);
1036
1037        /* drive upcall */
1038        connect_request_upcall(ep);
1039        return;
1040}
1041
1042static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1043{
1044        struct iwch_ep *ep = ctx;
1045        struct cpl_rx_data *hdr = cplhdr(skb);
1046        unsigned int dlen = ntohs(hdr->len);
1047
1048        pr_debug("%s ep %p dlen %u\n", __func__, ep, dlen);
1049
1050        skb_pull(skb, sizeof(*hdr));
1051        skb_trim(skb, dlen);
1052
1053        ep->rcv_seq += dlen;
1054        BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
1055
1056        switch (state_read(&ep->com)) {
1057        case MPA_REQ_SENT:
1058                process_mpa_reply(ep, skb);
1059                break;
1060        case MPA_REQ_WAIT:
1061                process_mpa_request(ep, skb);
1062                break;
1063        case MPA_REP_SENT:
1064                break;
1065        default:
1066                pr_err("%s Unexpected streaming data. ep %p state %d tid %d\n",
1067                       __func__, ep, state_read(&ep->com), ep->hwtid);
1068
1069                /*
1070                 * The ep will timeout and inform the ULP of the failure.
1071                 * See ep_timeout().
1072                 */
1073                break;
1074        }
1075
1076        /* update RX credits */
1077        update_rx_credits(ep, dlen);
1078
1079        return CPL_RET_BUF_DONE;
1080}
1081
1082/*
1083 * Upcall from the adapter indicating data has been transmitted.
1084 * For us its just the single MPA request or reply.  We can now free
1085 * the skb holding the mpa message.
1086 */
1087static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1088{
1089        struct iwch_ep *ep = ctx;
1090        struct cpl_wr_ack *hdr = cplhdr(skb);
1091        unsigned int credits = ntohs(hdr->credits);
1092        unsigned long flags;
1093        int post_zb = 0;
1094
1095        pr_debug("%s ep %p credits %u\n", __func__, ep, credits);
1096
1097        if (credits == 0) {
1098                pr_debug("%s 0 credit ack  ep %p state %u\n",
1099                         __func__, ep, state_read(&ep->com));
1100                return CPL_RET_BUF_DONE;
1101        }
1102
1103        spin_lock_irqsave(&ep->com.lock, flags);
1104        BUG_ON(credits != 1);
1105        dst_confirm(ep->dst);
1106        if (!ep->mpa_skb) {
1107                pr_debug("%s rdma_init wr_ack ep %p state %u\n",
1108                         __func__, ep, ep->com.state);
1109                if (ep->mpa_attr.initiator) {
1110                        pr_debug("%s initiator ep %p state %u\n",
1111                                 __func__, ep, ep->com.state);
1112                        if (peer2peer && ep->com.state == FPDU_MODE)
1113                                post_zb = 1;
1114                } else {
1115                        pr_debug("%s responder ep %p state %u\n",
1116                                 __func__, ep, ep->com.state);
1117                        if (ep->com.state == MPA_REQ_RCVD) {
1118                                ep->com.rpl_done = 1;
1119                                wake_up(&ep->com.waitq);
1120                        }
1121                }
1122        } else {
1123                pr_debug("%s lsm ack ep %p state %u freeing skb\n",
1124                         __func__, ep, ep->com.state);
1125                kfree_skb(ep->mpa_skb);
1126                ep->mpa_skb = NULL;
1127        }
1128        spin_unlock_irqrestore(&ep->com.lock, flags);
1129        if (post_zb)
1130                iwch_post_zb_read(ep);
1131        return CPL_RET_BUF_DONE;
1132}
1133
1134static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1135{
1136        struct iwch_ep *ep = ctx;
1137        unsigned long flags;
1138        int release = 0;
1139
1140        pr_debug("%s ep %p\n", __func__, ep);
1141        BUG_ON(!ep);
1142
1143        /*
1144         * We get 2 abort replies from the HW.  The first one must
1145         * be ignored except for scribbling that we need one more.
1146         */
1147        if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) {
1148                return CPL_RET_BUF_DONE;
1149        }
1150
1151        spin_lock_irqsave(&ep->com.lock, flags);
1152        switch (ep->com.state) {
1153        case ABORTING:
1154                close_complete_upcall(ep);
1155                __state_set(&ep->com, DEAD);
1156                release = 1;
1157                break;
1158        default:
1159                pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
1160                break;
1161        }
1162        spin_unlock_irqrestore(&ep->com.lock, flags);
1163
1164        if (release)
1165                release_ep_resources(ep);
1166        return CPL_RET_BUF_DONE;
1167}
1168
1169/*
1170 * Return whether a failed active open has allocated a TID
1171 */
1172static inline int act_open_has_tid(int status)
1173{
1174        return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
1175               status != CPL_ERR_ARP_MISS;
1176}
1177
1178static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1179{
1180        struct iwch_ep *ep = ctx;
1181        struct cpl_act_open_rpl *rpl = cplhdr(skb);
1182
1183        pr_debug("%s ep %p status %u errno %d\n", __func__, ep, rpl->status,
1184                 status2errno(rpl->status));
1185        connect_reply_upcall(ep, status2errno(rpl->status));
1186        state_set(&ep->com, DEAD);
1187        if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status))
1188                release_tid(ep->com.tdev, GET_TID(rpl), NULL);
1189        cxgb3_free_atid(ep->com.tdev, ep->atid);
1190        dst_release(ep->dst);
1191        l2t_release(ep->com.tdev, ep->l2t);
1192        put_ep(&ep->com);
1193        return CPL_RET_BUF_DONE;
1194}
1195
1196static int listen_start(struct iwch_listen_ep *ep)
1197{
1198        struct sk_buff *skb;
1199        struct cpl_pass_open_req *req;
1200
1201        pr_debug("%s ep %p\n", __func__, ep);
1202        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1203        if (!skb) {
1204                pr_err("t3c_listen_start failed to alloc skb!\n");
1205                return -ENOMEM;
1206        }
1207
1208        req = skb_put(skb, sizeof(*req));
1209        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1210        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid));
1211        req->local_port = ep->com.local_addr.sin_port;
1212        req->local_ip = ep->com.local_addr.sin_addr.s_addr;
1213        req->peer_port = 0;
1214        req->peer_ip = 0;
1215        req->peer_netmask = 0;
1216        req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
1217        req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10));
1218        req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
1219
1220        skb->priority = 1;
1221        return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
1222}
1223
1224static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1225{
1226        struct iwch_listen_ep *ep = ctx;
1227        struct cpl_pass_open_rpl *rpl = cplhdr(skb);
1228
1229        pr_debug("%s ep %p status %d error %d\n", __func__, ep,
1230                 rpl->status, status2errno(rpl->status));
1231        ep->com.rpl_err = status2errno(rpl->status);
1232        ep->com.rpl_done = 1;
1233        wake_up(&ep->com.waitq);
1234
1235        return CPL_RET_BUF_DONE;
1236}
1237
1238static int listen_stop(struct iwch_listen_ep *ep)
1239{
1240        struct sk_buff *skb;
1241        struct cpl_close_listserv_req *req;
1242
1243        pr_debug("%s ep %p\n", __func__, ep);
1244        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1245        if (!skb) {
1246                pr_err("%s - failed to alloc skb\n", __func__);
1247                return -ENOMEM;
1248        }
1249        req = skb_put(skb, sizeof(*req));
1250        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1251        req->cpu_idx = 0;
1252        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
1253        skb->priority = 1;
1254        return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
1255}
1256
1257static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
1258                             void *ctx)
1259{
1260        struct iwch_listen_ep *ep = ctx;
1261        struct cpl_close_listserv_rpl *rpl = cplhdr(skb);
1262
1263        pr_debug("%s ep %p\n", __func__, ep);
1264        ep->com.rpl_err = status2errno(rpl->status);
1265        ep->com.rpl_done = 1;
1266        wake_up(&ep->com.waitq);
1267        return CPL_RET_BUF_DONE;
1268}
1269
1270static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
1271{
1272        struct cpl_pass_accept_rpl *rpl;
1273        unsigned int mtu_idx;
1274        u32 opt0h, opt0l, opt2;
1275        int wscale;
1276
1277        pr_debug("%s ep %p\n", __func__, ep);
1278        BUG_ON(skb_cloned(skb));
1279        skb_trim(skb, sizeof(*rpl));
1280        skb_get(skb);
1281        mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
1282        wscale = compute_wscale(rcv_win);
1283        opt0h = V_NAGLE(0) |
1284            V_NO_CONG(nocong) |
1285            V_KEEP_ALIVE(1) |
1286            F_TCAM_BYPASS |
1287            V_WND_SCALE(wscale) |
1288            V_MSS_IDX(mtu_idx) |
1289            V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
1290        opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
1291        opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) |
1292               V_CONG_CONTROL_FLAVOR(cong_flavor);
1293
1294        rpl = cplhdr(skb);
1295        rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1296        OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid));
1297        rpl->peer_ip = peer_ip;
1298        rpl->opt0h = htonl(opt0h);
1299        rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT);
1300        rpl->opt2 = htonl(opt2);
1301        rpl->rsvd = rpl->opt2;  /* workaround for HW bug */
1302        skb->priority = CPL_PRIORITY_SETUP;
1303        iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
1304
1305        return;
1306}
1307
1308static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
1309                      struct sk_buff *skb)
1310{
1311        pr_debug("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid,
1312                 peer_ip);
1313        BUG_ON(skb_cloned(skb));
1314        skb_trim(skb, sizeof(struct cpl_tid_release));
1315        skb_get(skb);
1316
1317        if (tdev->type != T3A)
1318                release_tid(tdev, hwtid, skb);
1319        else {
1320                struct cpl_pass_accept_rpl *rpl;
1321
1322                rpl = cplhdr(skb);
1323                skb->priority = CPL_PRIORITY_SETUP;
1324                rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1325                OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
1326                                                      hwtid));
1327                rpl->peer_ip = peer_ip;
1328                rpl->opt0h = htonl(F_TCAM_BYPASS);
1329                rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
1330                rpl->opt2 = 0;
1331                rpl->rsvd = rpl->opt2;
1332                iwch_cxgb3_ofld_send(tdev, skb);
1333        }
1334}
1335
1336static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1337{
1338        struct iwch_ep *child_ep, *parent_ep = ctx;
1339        struct cpl_pass_accept_req *req = cplhdr(skb);
1340        unsigned int hwtid = GET_TID(req);
1341        struct dst_entry *dst;
1342        struct l2t_entry *l2t;
1343        struct rtable *rt;
1344        struct iff_mac tim;
1345
1346        pr_debug("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
1347
1348        if (state_read(&parent_ep->com) != LISTEN) {
1349                pr_err("%s - listening ep not in LISTEN\n", __func__);
1350                goto reject;
1351        }
1352
1353        /*
1354         * Find the netdev for this connection request.
1355         */
1356        tim.mac_addr = req->dst_mac;
1357        tim.vlan_tag = ntohs(req->vlan_tag);
1358        if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
1359                pr_err("%s bad dst mac %pM\n", __func__, req->dst_mac);
1360                goto reject;
1361        }
1362
1363        /* Find output route */
1364        rt = find_route(tdev,
1365                        req->local_ip,
1366                        req->peer_ip,
1367                        req->local_port,
1368                        req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid)));
1369        if (!rt) {
1370                pr_err("%s - failed to find dst entry!\n", __func__);
1371                goto reject;
1372        }
1373        dst = &rt->dst;
1374        l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip);
1375        if (!l2t) {
1376                pr_err("%s - failed to allocate l2t entry!\n", __func__);
1377                dst_release(dst);
1378                goto reject;
1379        }
1380        child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
1381        if (!child_ep) {
1382                pr_err("%s - failed to allocate ep entry!\n", __func__);
1383                l2t_release(tdev, l2t);
1384                dst_release(dst);
1385                goto reject;
1386        }
1387        state_set(&child_ep->com, CONNECTING);
1388        child_ep->com.tdev = tdev;
1389        child_ep->com.cm_id = NULL;
1390        child_ep->com.local_addr.sin_family = AF_INET;
1391        child_ep->com.local_addr.sin_port = req->local_port;
1392        child_ep->com.local_addr.sin_addr.s_addr = req->local_ip;
1393        child_ep->com.remote_addr.sin_family = AF_INET;
1394        child_ep->com.remote_addr.sin_port = req->peer_port;
1395        child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip;
1396        get_ep(&parent_ep->com);
1397        child_ep->parent_ep = parent_ep;
1398        child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid));
1399        child_ep->l2t = l2t;
1400        child_ep->dst = dst;
1401        child_ep->hwtid = hwtid;
1402        init_timer(&child_ep->timer);
1403        cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid);
1404        accept_cr(child_ep, req->peer_ip, skb);
1405        goto out;
1406reject:
1407        reject_cr(tdev, hwtid, req->peer_ip, skb);
1408out:
1409        return CPL_RET_BUF_DONE;
1410}
1411
1412static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1413{
1414        struct iwch_ep *ep = ctx;
1415        struct cpl_pass_establish *req = cplhdr(skb);
1416
1417        pr_debug("%s ep %p\n", __func__, ep);
1418        ep->snd_seq = ntohl(req->snd_isn);
1419        ep->rcv_seq = ntohl(req->rcv_isn);
1420
1421        set_emss(ep, ntohs(req->tcp_opt));
1422
1423        dst_confirm(ep->dst);
1424        state_set(&ep->com, MPA_REQ_WAIT);
1425        start_ep_timer(ep);
1426
1427        return CPL_RET_BUF_DONE;
1428}
1429
1430static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1431{
1432        struct iwch_ep *ep = ctx;
1433        struct iwch_qp_attributes attrs;
1434        unsigned long flags;
1435        int disconnect = 1;
1436        int release = 0;
1437
1438        pr_debug("%s ep %p\n", __func__, ep);
1439        dst_confirm(ep->dst);
1440
1441        spin_lock_irqsave(&ep->com.lock, flags);
1442        switch (ep->com.state) {
1443        case MPA_REQ_WAIT:
1444                __state_set(&ep->com, CLOSING);
1445                break;
1446        case MPA_REQ_SENT:
1447                __state_set(&ep->com, CLOSING);
1448                connect_reply_upcall(ep, -ECONNRESET);
1449                break;
1450        case MPA_REQ_RCVD:
1451
1452                /*
1453                 * We're gonna mark this puppy DEAD, but keep
1454                 * the reference on it until the ULP accepts or
1455                 * rejects the CR. Also wake up anyone waiting
1456                 * in rdma connection migration (see iwch_accept_cr()).
1457                 */
1458                __state_set(&ep->com, CLOSING);
1459                ep->com.rpl_done = 1;
1460                ep->com.rpl_err = -ECONNRESET;
1461                pr_debug("waking up ep %p\n", ep);
1462                wake_up(&ep->com.waitq);
1463                break;
1464        case MPA_REP_SENT:
1465                __state_set(&ep->com, CLOSING);
1466                ep->com.rpl_done = 1;
1467                ep->com.rpl_err = -ECONNRESET;
1468                pr_debug("waking up ep %p\n", ep);
1469                wake_up(&ep->com.waitq);
1470                break;
1471        case FPDU_MODE:
1472                start_ep_timer(ep);
1473                __state_set(&ep->com, CLOSING);
1474                attrs.next_state = IWCH_QP_STATE_CLOSING;
1475                iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
1476                               IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
1477                peer_close_upcall(ep);
1478                break;
1479        case ABORTING:
1480                disconnect = 0;
1481                break;
1482        case CLOSING:
1483                __state_set(&ep->com, MORIBUND);
1484                disconnect = 0;
1485                break;
1486        case MORIBUND:
1487                stop_ep_timer(ep);
1488                if (ep->com.cm_id && ep->com.qp) {
1489                        attrs.next_state = IWCH_QP_STATE_IDLE;
1490                        iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
1491                                       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
1492                }
1493                close_complete_upcall(ep);
1494                __state_set(&ep->com, DEAD);
1495                release = 1;
1496                disconnect = 0;
1497                break;
1498        case DEAD:
1499                disconnect = 0;
1500                break;
1501        default:
1502                BUG_ON(1);
1503        }
1504        spin_unlock_irqrestore(&ep->com.lock, flags);
1505        if (disconnect)
1506                iwch_ep_disconnect(ep, 0, GFP_KERNEL);
1507        if (release)
1508                release_ep_resources(ep);
1509        return CPL_RET_BUF_DONE;
1510}
1511
1512/*
1513 * Returns whether an ABORT_REQ_RSS message is a negative advice.
1514 */
1515static int is_neg_adv_abort(unsigned int status)
1516{
1517        return status == CPL_ERR_RTX_NEG_ADVICE ||
1518               status == CPL_ERR_PERSIST_NEG_ADVICE;
1519}
1520
1521static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1522{
1523        struct cpl_abort_req_rss *req = cplhdr(skb);
1524        struct iwch_ep *ep = ctx;
1525        struct cpl_abort_rpl *rpl;
1526        struct sk_buff *rpl_skb;
1527        struct iwch_qp_attributes attrs;
1528        int ret;
1529        int release = 0;
1530        unsigned long flags;
1531
1532        if (is_neg_adv_abort(req->status)) {
1533                pr_debug("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
1534                         ep->hwtid);
1535                t3_l2t_send_event(ep->com.tdev, ep->l2t);
1536                return CPL_RET_BUF_DONE;
1537        }
1538
1539        /*
1540         * We get 2 peer aborts from the HW.  The first one must
1541         * be ignored except for scribbling that we need one more.
1542         */
1543        if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) {
1544                return CPL_RET_BUF_DONE;
1545        }
1546
1547        spin_lock_irqsave(&ep->com.lock, flags);
1548        pr_debug("%s ep %p state %u\n", __func__, ep, ep->com.state);
1549        switch (ep->com.state) {
1550        case CONNECTING:
1551                break;
1552        case MPA_REQ_WAIT:
1553                stop_ep_timer(ep);
1554                break;
1555        case MPA_REQ_SENT:
1556                stop_ep_timer(ep);
1557                connect_reply_upcall(ep, -ECONNRESET);
1558                break;
1559        case MPA_REP_SENT:
1560                ep->com.rpl_done = 1;
1561                ep->com.rpl_err = -ECONNRESET;
1562                pr_debug("waking up ep %p\n", ep);
1563                wake_up(&ep->com.waitq);
1564                break;
1565        case MPA_REQ_RCVD:
1566
1567                /*
1568                 * We're gonna mark this puppy DEAD, but keep
1569                 * the reference on it until the ULP accepts or
1570                 * rejects the CR. Also wake up anyone waiting
1571                 * in rdma connection migration (see iwch_accept_cr()).
1572                 */
1573                ep->com.rpl_done = 1;
1574                ep->com.rpl_err = -ECONNRESET;
1575                pr_debug("waking up ep %p\n", ep);
1576                wake_up(&ep->com.waitq);
1577                break;
1578        case MORIBUND:
1579        case CLOSING:
1580                stop_ep_timer(ep);
1581                /*FALLTHROUGH*/
1582        case FPDU_MODE:
1583                if (ep->com.cm_id && ep->com.qp) {
1584                        attrs.next_state = IWCH_QP_STATE_ERROR;
1585                        ret = iwch_modify_qp(ep->com.qp->rhp,
1586                                     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1587                                     &attrs, 1);
1588                        if (ret)
1589                                pr_err("%s - qp <- error failed!\n", __func__);
1590                }
1591                peer_abort_upcall(ep);
1592                break;
1593        case ABORTING:
1594                break;
1595        case DEAD:
1596                pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
1597                spin_unlock_irqrestore(&ep->com.lock, flags);
1598                return CPL_RET_BUF_DONE;
1599        default:
1600                BUG_ON(1);
1601                break;
1602        }
1603        dst_confirm(ep->dst);
1604        if (ep->com.state != ABORTING) {
1605                __state_set(&ep->com, DEAD);
1606                release = 1;
1607        }
1608        spin_unlock_irqrestore(&ep->com.lock, flags);
1609
1610        rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
1611        if (!rpl_skb) {
1612                pr_err("%s - cannot allocate skb!\n", __func__);
1613                release = 1;
1614                goto out;
1615        }
1616        rpl_skb->priority = CPL_PRIORITY_DATA;
1617        rpl = skb_put(rpl_skb, sizeof(*rpl));
1618        rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
1619        rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
1620        OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
1621        rpl->cmd = CPL_ABORT_NO_RST;
1622        iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb);
1623out:
1624        if (release)
1625                release_ep_resources(ep);
1626        return CPL_RET_BUF_DONE;
1627}
1628
1629static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1630{
1631        struct iwch_ep *ep = ctx;
1632        struct iwch_qp_attributes attrs;
1633        unsigned long flags;
1634        int release = 0;
1635
1636        pr_debug("%s ep %p\n", __func__, ep);
1637        BUG_ON(!ep);
1638
1639        /* The cm_id may be null if we failed to connect */
1640        spin_lock_irqsave(&ep->com.lock, flags);
1641        switch (ep->com.state) {
1642        case CLOSING:
1643                __state_set(&ep->com, MORIBUND);
1644                break;
1645        case MORIBUND:
1646                stop_ep_timer(ep);
1647                if ((ep->com.cm_id) && (ep->com.qp)) {
1648                        attrs.next_state = IWCH_QP_STATE_IDLE;
1649                        iwch_modify_qp(ep->com.qp->rhp,
1650                                             ep->com.qp,
1651                                             IWCH_QP_ATTR_NEXT_STATE,
1652                                             &attrs, 1);
1653                }
1654                close_complete_upcall(ep);
1655                __state_set(&ep->com, DEAD);
1656                release = 1;
1657                break;
1658        case ABORTING:
1659        case DEAD:
1660                break;
1661        default:
1662                BUG_ON(1);
1663                break;
1664        }
1665        spin_unlock_irqrestore(&ep->com.lock, flags);
1666        if (release)
1667                release_ep_resources(ep);
1668        return CPL_RET_BUF_DONE;
1669}
1670
1671/*
1672 * T3A does 3 things when a TERM is received:
1673 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
1674 * 2) generate an async event on the QP with the TERMINATE opcode
1675 * 3) post a TERMINATE opcode cqe into the associated CQ.
1676 *
1677 * For (1), we save the message in the qp for later consumer consumption.
1678 * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
1679 * For (3), we toss the CQE in cxio_poll_cq().
1680 *
1681 * terminate() handles case (1)...
1682 */
1683static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1684{
1685        struct iwch_ep *ep = ctx;
1686
1687        if (state_read(&ep->com) != FPDU_MODE)
1688                return CPL_RET_BUF_DONE;
1689
1690        pr_debug("%s ep %p\n", __func__, ep);
1691        skb_pull(skb, sizeof(struct cpl_rdma_terminate));
1692        pr_debug("%s saving %d bytes of term msg\n", __func__, skb->len);
1693        skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
1694                                  skb->len);
1695        ep->com.qp->attr.terminate_msg_len = skb->len;
1696        ep->com.qp->attr.is_terminate_local = 0;
1697        return CPL_RET_BUF_DONE;
1698}
1699
1700static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1701{
1702        struct cpl_rdma_ec_status *rep = cplhdr(skb);
1703        struct iwch_ep *ep = ctx;
1704
1705        pr_debug("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid,
1706                 rep->status);
1707        if (rep->status) {
1708                struct iwch_qp_attributes attrs;
1709
1710                pr_err("%s BAD CLOSE - Aborting tid %u\n",
1711                       __func__, ep->hwtid);
1712                stop_ep_timer(ep);
1713                attrs.next_state = IWCH_QP_STATE_ERROR;
1714                iwch_modify_qp(ep->com.qp->rhp,
1715                               ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1716                               &attrs, 1);
1717                abort_connection(ep, NULL, GFP_KERNEL);
1718        }
1719        return CPL_RET_BUF_DONE;
1720}
1721
1722static void ep_timeout(unsigned long arg)
1723{
1724        struct iwch_ep *ep = (struct iwch_ep *)arg;
1725        struct iwch_qp_attributes attrs;
1726        unsigned long flags;
1727        int abort = 1;
1728
1729        spin_lock_irqsave(&ep->com.lock, flags);
1730        pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
1731                 ep->com.state);
1732        switch (ep->com.state) {
1733        case MPA_REQ_SENT:
1734                __state_set(&ep->com, ABORTING);
1735                connect_reply_upcall(ep, -ETIMEDOUT);
1736                break;
1737        case MPA_REQ_WAIT:
1738                __state_set(&ep->com, ABORTING);
1739                break;
1740        case CLOSING:
1741        case MORIBUND:
1742                if (ep->com.cm_id && ep->com.qp) {
1743                        attrs.next_state = IWCH_QP_STATE_ERROR;
1744                        iwch_modify_qp(ep->com.qp->rhp,
1745                                     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1746                                     &attrs, 1);
1747                }
1748                __state_set(&ep->com, ABORTING);
1749                break;
1750        default:
1751                WARN(1, "%s unexpected state ep %p state %u\n",
1752                        __func__, ep, ep->com.state);
1753                abort = 0;
1754        }
1755        spin_unlock_irqrestore(&ep->com.lock, flags);
1756        if (abort)
1757                abort_connection(ep, NULL, GFP_ATOMIC);
1758        put_ep(&ep->com);
1759}
1760
1761int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
1762{
1763        int err;
1764        struct iwch_ep *ep = to_ep(cm_id);
1765        pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1766
1767        if (state_read(&ep->com) == DEAD) {
1768                put_ep(&ep->com);
1769                return -ECONNRESET;
1770        }
1771        BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
1772        if (mpa_rev == 0)
1773                abort_connection(ep, NULL, GFP_KERNEL);
1774        else {
1775                err = send_mpa_reject(ep, pdata, pdata_len);
1776                err = iwch_ep_disconnect(ep, 0, GFP_KERNEL);
1777        }
1778        put_ep(&ep->com);
1779        return 0;
1780}
1781
1782int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1783{
1784        int err;
1785        struct iwch_qp_attributes attrs;
1786        enum iwch_qp_attr_mask mask;
1787        struct iwch_ep *ep = to_ep(cm_id);
1788        struct iwch_dev *h = to_iwch_dev(cm_id->device);
1789        struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
1790
1791        pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1792        if (state_read(&ep->com) == DEAD) {
1793                err = -ECONNRESET;
1794                goto err;
1795        }
1796
1797        BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
1798        BUG_ON(!qp);
1799
1800        if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
1801            (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
1802                abort_connection(ep, NULL, GFP_KERNEL);
1803                err = -EINVAL;
1804                goto err;
1805        }
1806
1807        cm_id->add_ref(cm_id);
1808        ep->com.cm_id = cm_id;
1809        ep->com.qp = qp;
1810
1811        ep->ird = conn_param->ird;
1812        ep->ord = conn_param->ord;
1813
1814        if (peer2peer && ep->ird == 0)
1815                ep->ird = 1;
1816
1817        pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
1818
1819        /* bind QP to EP and move to RTS */
1820        attrs.mpa_attr = ep->mpa_attr;
1821        attrs.max_ird = ep->ird;
1822        attrs.max_ord = ep->ord;
1823        attrs.llp_stream_handle = ep;
1824        attrs.next_state = IWCH_QP_STATE_RTS;
1825
1826        /* bind QP and TID with INIT_WR */
1827        mask = IWCH_QP_ATTR_NEXT_STATE |
1828                             IWCH_QP_ATTR_LLP_STREAM_HANDLE |
1829                             IWCH_QP_ATTR_MPA_ATTR |
1830                             IWCH_QP_ATTR_MAX_IRD |
1831                             IWCH_QP_ATTR_MAX_ORD;
1832
1833        err = iwch_modify_qp(ep->com.qp->rhp,
1834                             ep->com.qp, mask, &attrs, 1);
1835        if (err)
1836                goto err1;
1837
1838        /* if needed, wait for wr_ack */
1839        if (iwch_rqes_posted(qp)) {
1840                wait_event(ep->com.waitq, ep->com.rpl_done);
1841                err = ep->com.rpl_err;
1842                if (err)
1843                        goto err1;
1844        }
1845
1846        err = send_mpa_reply(ep, conn_param->private_data,
1847                             conn_param->private_data_len);
1848        if (err)
1849                goto err1;
1850
1851
1852        state_set(&ep->com, FPDU_MODE);
1853        established_upcall(ep);
1854        put_ep(&ep->com);
1855        return 0;
1856err1:
1857        ep->com.cm_id = NULL;
1858        ep->com.qp = NULL;
1859        cm_id->rem_ref(cm_id);
1860err:
1861        put_ep(&ep->com);
1862        return err;
1863}
1864
1865static int is_loopback_dst(struct iw_cm_id *cm_id)
1866{
1867        struct net_device *dev;
1868        struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
1869
1870        dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr);
1871        if (!dev)
1872                return 0;
1873        dev_put(dev);
1874        return 1;
1875}
1876
1877int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1878{
1879        struct iwch_dev *h = to_iwch_dev(cm_id->device);
1880        struct iwch_ep *ep;
1881        struct rtable *rt;
1882        int err = 0;
1883        struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
1884        struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
1885
1886        if (cm_id->m_remote_addr.ss_family != PF_INET) {
1887                err = -ENOSYS;
1888                goto out;
1889        }
1890
1891        if (is_loopback_dst(cm_id)) {
1892                err = -ENOSYS;
1893                goto out;
1894        }
1895
1896        ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
1897        if (!ep) {
1898                pr_err("%s - cannot alloc ep\n", __func__);
1899                err = -ENOMEM;
1900                goto out;
1901        }
1902        init_timer(&ep->timer);
1903        ep->plen = conn_param->private_data_len;
1904        if (ep->plen)
1905                memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
1906                       conn_param->private_data, ep->plen);
1907        ep->ird = conn_param->ird;
1908        ep->ord = conn_param->ord;
1909
1910        if (peer2peer && ep->ord == 0)
1911                ep->ord = 1;
1912
1913        ep->com.tdev = h->rdev.t3cdev_p;
1914
1915        cm_id->add_ref(cm_id);
1916        ep->com.cm_id = cm_id;
1917        ep->com.qp = get_qhp(h, conn_param->qpn);
1918        BUG_ON(!ep->com.qp);
1919        pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
1920                 ep->com.qp, cm_id);
1921
1922        /*
1923         * Allocate an active TID to initiate a TCP connection.
1924         */
1925        ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep);
1926        if (ep->atid == -1) {
1927                pr_err("%s - cannot alloc atid\n", __func__);
1928                err = -ENOMEM;
1929                goto fail2;
1930        }
1931
1932        /* find a route */
1933        rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr,
1934                        raddr->sin_addr.s_addr, laddr->sin_port,
1935                        raddr->sin_port, IPTOS_LOWDELAY);
1936        if (!rt) {
1937                pr_err("%s - cannot find route\n", __func__);
1938                err = -EHOSTUNREACH;
1939                goto fail3;
1940        }
1941        ep->dst = &rt->dst;
1942        ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL,
1943                             &raddr->sin_addr.s_addr);
1944        if (!ep->l2t) {
1945                pr_err("%s - cannot alloc l2e\n", __func__);
1946                err = -ENOMEM;
1947                goto fail4;
1948        }
1949
1950        state_set(&ep->com, CONNECTING);
1951        ep->tos = IPTOS_LOWDELAY;
1952        memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
1953               sizeof(ep->com.local_addr));
1954        memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
1955               sizeof(ep->com.remote_addr));
1956
1957        /* send connect request to rnic */
1958        err = send_connect(ep);
1959        if (!err)
1960                goto out;
1961
1962        l2t_release(h->rdev.t3cdev_p, ep->l2t);
1963fail4:
1964        dst_release(ep->dst);
1965fail3:
1966        cxgb3_free_atid(ep->com.tdev, ep->atid);
1967fail2:
1968        cm_id->rem_ref(cm_id);
1969        put_ep(&ep->com);
1970out:
1971        return err;
1972}
1973
1974int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
1975{
1976        int err = 0;
1977        struct iwch_dev *h = to_iwch_dev(cm_id->device);
1978        struct iwch_listen_ep *ep;
1979
1980
1981        might_sleep();
1982
1983        if (cm_id->m_local_addr.ss_family != PF_INET) {
1984                err = -ENOSYS;
1985                goto fail1;
1986        }
1987
1988        ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
1989        if (!ep) {
1990                pr_err("%s - cannot alloc ep\n", __func__);
1991                err = -ENOMEM;
1992                goto fail1;
1993        }
1994        pr_debug("%s ep %p\n", __func__, ep);
1995        ep->com.tdev = h->rdev.t3cdev_p;
1996        cm_id->add_ref(cm_id);
1997        ep->com.cm_id = cm_id;
1998        ep->backlog = backlog;
1999        memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
2000               sizeof(ep->com.local_addr));
2001
2002        /*
2003         * Allocate a server TID.
2004         */
2005        ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep);
2006        if (ep->stid == -1) {
2007                pr_err("%s - cannot alloc atid\n", __func__);
2008                err = -ENOMEM;
2009                goto fail2;
2010        }
2011
2012        state_set(&ep->com, LISTEN);
2013        err = listen_start(ep);
2014        if (err)
2015                goto fail3;
2016
2017        /* wait for pass_open_rpl */
2018        wait_event(ep->com.waitq, ep->com.rpl_done);
2019        err = ep->com.rpl_err;
2020        if (!err) {
2021                cm_id->provider_data = ep;
2022                goto out;
2023        }
2024fail3:
2025        cxgb3_free_stid(ep->com.tdev, ep->stid);
2026fail2:
2027        cm_id->rem_ref(cm_id);
2028        put_ep(&ep->com);
2029fail1:
2030out:
2031        return err;
2032}
2033
2034int iwch_destroy_listen(struct iw_cm_id *cm_id)
2035{
2036        int err;
2037        struct iwch_listen_ep *ep = to_listen_ep(cm_id);
2038
2039        pr_debug("%s ep %p\n", __func__, ep);
2040
2041        might_sleep();
2042        state_set(&ep->com, DEAD);
2043        ep->com.rpl_done = 0;
2044        ep->com.rpl_err = 0;
2045        err = listen_stop(ep);
2046        if (err)
2047                goto done;
2048        wait_event(ep->com.waitq, ep->com.rpl_done);
2049        cxgb3_free_stid(ep->com.tdev, ep->stid);
2050done:
2051        err = ep->com.rpl_err;
2052        cm_id->rem_ref(cm_id);
2053        put_ep(&ep->com);
2054        return err;
2055}
2056
2057int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
2058{
2059        int ret=0;
2060        unsigned long flags;
2061        int close = 0;
2062        int fatal = 0;
2063        struct t3cdev *tdev;
2064        struct cxio_rdev *rdev;
2065
2066        spin_lock_irqsave(&ep->com.lock, flags);
2067
2068        pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep,
2069                 states[ep->com.state], abrupt);
2070
2071        tdev = (struct t3cdev *)ep->com.tdev;
2072        rdev = (struct cxio_rdev *)tdev->ulp;
2073        if (cxio_fatal_error(rdev)) {
2074                fatal = 1;
2075                close_complete_upcall(ep);
2076                ep->com.state = DEAD;
2077        }
2078        switch (ep->com.state) {
2079        case MPA_REQ_WAIT:
2080        case MPA_REQ_SENT:
2081        case MPA_REQ_RCVD:
2082        case MPA_REP_SENT:
2083        case FPDU_MODE:
2084                close = 1;
2085                if (abrupt)
2086                        ep->com.state = ABORTING;
2087                else {
2088                        ep->com.state = CLOSING;
2089                        start_ep_timer(ep);
2090                }
2091                set_bit(CLOSE_SENT, &ep->com.flags);
2092                break;
2093        case CLOSING:
2094                if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
2095                        close = 1;
2096                        if (abrupt) {
2097                                stop_ep_timer(ep);
2098                                ep->com.state = ABORTING;
2099                        } else
2100                                ep->com.state = MORIBUND;
2101                }
2102                break;
2103        case MORIBUND:
2104        case ABORTING:
2105        case DEAD:
2106                pr_debug("%s ignoring disconnect ep %p state %u\n",
2107                         __func__, ep, ep->com.state);
2108                break;
2109        default:
2110                BUG();
2111                break;
2112        }
2113
2114        spin_unlock_irqrestore(&ep->com.lock, flags);
2115        if (close) {
2116                if (abrupt)
2117                        ret = send_abort(ep, NULL, gfp);
2118                else
2119                        ret = send_halfclose(ep, gfp);
2120                if (ret)
2121                        fatal = 1;
2122        }
2123        if (fatal)
2124                release_ep_resources(ep);
2125        return ret;
2126}
2127
2128int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
2129                     struct l2t_entry *l2t)
2130{
2131        struct iwch_ep *ep = ctx;
2132
2133        if (ep->dst != old)
2134                return 0;
2135
2136        pr_debug("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
2137                 l2t);
2138        dst_hold(new);
2139        l2t_release(ep->com.tdev, ep->l2t);
2140        ep->l2t = l2t;
2141        dst_release(old);
2142        ep->dst = new;
2143        return 1;
2144}
2145
2146/*
2147 * All the CM events are handled on a work queue to have a safe context.
2148 * These are the real handlers that are called from the work queue.
2149 */
2150static const cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS] = {
2151        [CPL_ACT_ESTABLISH]     = act_establish,
2152        [CPL_ACT_OPEN_RPL]      = act_open_rpl,
2153        [CPL_RX_DATA]           = rx_data,
2154        [CPL_TX_DMA_ACK]        = tx_ack,
2155        [CPL_ABORT_RPL_RSS]     = abort_rpl,
2156        [CPL_ABORT_RPL]         = abort_rpl,
2157        [CPL_PASS_OPEN_RPL]     = pass_open_rpl,
2158        [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
2159        [CPL_PASS_ACCEPT_REQ]   = pass_accept_req,
2160        [CPL_PASS_ESTABLISH]    = pass_establish,
2161        [CPL_PEER_CLOSE]        = peer_close,
2162        [CPL_ABORT_REQ_RSS]     = peer_abort,
2163        [CPL_CLOSE_CON_RPL]     = close_con_rpl,
2164        [CPL_RDMA_TERMINATE]    = terminate,
2165        [CPL_RDMA_EC_STATUS]    = ec_status,
2166};
2167
2168static void process_work(struct work_struct *work)
2169{
2170        struct sk_buff *skb = NULL;
2171        void *ep;
2172        struct t3cdev *tdev;
2173        int ret;
2174
2175        while ((skb = skb_dequeue(&rxq))) {
2176                ep = *((void **) (skb->cb));
2177                tdev = *((struct t3cdev **) (skb->cb + sizeof(void *)));
2178                ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep);
2179                if (ret & CPL_RET_BUF_DONE)
2180                        kfree_skb(skb);
2181
2182                /*
2183                 * ep was referenced in sched(), and is freed here.
2184                 */
2185                put_ep((struct iwch_ep_common *)ep);
2186        }
2187}
2188
2189static DECLARE_WORK(skb_work, process_work);
2190
2191static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
2192{
2193        struct iwch_ep_common *epc = ctx;
2194
2195        get_ep(epc);
2196
2197        /*
2198         * Save ctx and tdev in the skb->cb area.
2199         */
2200        *((void **) skb->cb) = ctx;
2201        *((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev;
2202
2203        /*
2204         * Queue the skb and schedule the worker thread.
2205         */
2206        skb_queue_tail(&rxq, skb);
2207        queue_work(workq, &skb_work);
2208        return 0;
2209}
2210
2211static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
2212{
2213        struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
2214
2215        if (rpl->status != CPL_ERR_NONE) {
2216                pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
2217                       rpl->status, GET_TID(rpl));
2218        }
2219        return CPL_RET_BUF_DONE;
2220}
2221
2222/*
2223 * All upcalls from the T3 Core go to sched() to schedule the
2224 * processing on a work queue.
2225 */
2226cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS] = {
2227        [CPL_ACT_ESTABLISH]     = sched,
2228        [CPL_ACT_OPEN_RPL]      = sched,
2229        [CPL_RX_DATA]           = sched,
2230        [CPL_TX_DMA_ACK]        = sched,
2231        [CPL_ABORT_RPL_RSS]     = sched,
2232        [CPL_ABORT_RPL]         = sched,
2233        [CPL_PASS_OPEN_RPL]     = sched,
2234        [CPL_CLOSE_LISTSRV_RPL] = sched,
2235        [CPL_PASS_ACCEPT_REQ]   = sched,
2236        [CPL_PASS_ESTABLISH]    = sched,
2237        [CPL_PEER_CLOSE]        = sched,
2238        [CPL_CLOSE_CON_RPL]     = sched,
2239        [CPL_ABORT_REQ_RSS]     = sched,
2240        [CPL_RDMA_TERMINATE]    = sched,
2241        [CPL_RDMA_EC_STATUS]    = sched,
2242        [CPL_SET_TCB_RPL]       = set_tcb_rpl,
2243};
2244
2245int __init iwch_cm_init(void)
2246{
2247        skb_queue_head_init(&rxq);
2248
2249        workq = alloc_ordered_workqueue("iw_cxgb3", WQ_MEM_RECLAIM);
2250        if (!workq)
2251                return -ENOMEM;
2252
2253        return 0;
2254}
2255
2256void __exit iwch_cm_term(void)
2257{
2258        flush_workqueue(workq);
2259        destroy_workqueue(workq);
2260}
2261