linux/drivers/infiniband/hw/cxgb3/iwch_cm.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/module.h>
  33#include <linux/list.h>
  34#include <linux/workqueue.h>
  35#include <linux/skbuff.h>
  36#include <linux/timer.h>
  37#include <linux/notifier.h>
  38#include <linux/inetdevice.h>
  39
  40#include <net/neighbour.h>
  41#include <net/netevent.h>
  42#include <net/route.h>
  43
  44#include "tcb.h"
  45#include "cxgb3_offload.h"
  46#include "iwch.h"
  47#include "iwch_provider.h"
  48#include "iwch_cm.h"
  49
  50static char *states[] = {
  51        "idle",
  52        "listen",
  53        "connecting",
  54        "mpa_wait_req",
  55        "mpa_req_sent",
  56        "mpa_req_rcvd",
  57        "mpa_rep_sent",
  58        "fpdu_mode",
  59        "aborting",
  60        "closing",
  61        "moribund",
  62        "dead",
  63        NULL,
  64};
  65
  66int peer2peer = 0;
  67module_param(peer2peer, int, 0644);
  68MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
  69
  70static int ep_timeout_secs = 60;
  71module_param(ep_timeout_secs, int, 0644);
  72MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
  73                                   "in seconds (default=60)");
  74
  75static int mpa_rev = 1;
  76module_param(mpa_rev, int, 0644);
  77MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
  78                 "1 is spec compliant. (default=1)");
  79
  80static int markers_enabled = 0;
  81module_param(markers_enabled, int, 0644);
  82MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
  83
  84static int crc_enabled = 1;
  85module_param(crc_enabled, int, 0644);
  86MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
  87
  88static int rcv_win = 256 * 1024;
  89module_param(rcv_win, int, 0644);
  90MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)");
  91
  92static int snd_win = 32 * 1024;
  93module_param(snd_win, int, 0644);
  94MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)");
  95
  96static unsigned int nocong = 0;
  97module_param(nocong, uint, 0644);
  98MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)");
  99
 100static unsigned int cong_flavor = 1;
 101module_param(cong_flavor, uint, 0644);
 102MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)");
 103
 104static void process_work(struct work_struct *work);
 105static struct workqueue_struct *workq;
 106static DECLARE_WORK(skb_work, process_work);
 107
 108static struct sk_buff_head rxq;
 109static cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS];
 110
 111static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
 112static void ep_timeout(unsigned long arg);
 113static void connect_reply_upcall(struct iwch_ep *ep, int status);
 114
 115static void start_ep_timer(struct iwch_ep *ep)
 116{
 117        PDBG("%s ep %p\n", __func__, ep);
 118        if (timer_pending(&ep->timer)) {
 119                PDBG("%s stopped / restarted timer ep %p\n", __func__, ep);
 120                del_timer_sync(&ep->timer);
 121        } else
 122                get_ep(&ep->com);
 123        ep->timer.expires = jiffies + ep_timeout_secs * HZ;
 124        ep->timer.data = (unsigned long)ep;
 125        ep->timer.function = ep_timeout;
 126        add_timer(&ep->timer);
 127}
 128
 129static void stop_ep_timer(struct iwch_ep *ep)
 130{
 131        PDBG("%s ep %p\n", __func__, ep);
 132        if (!timer_pending(&ep->timer)) {
 133                printk(KERN_ERR "%s timer stopped when its not running!  ep %p state %u\n",
 134                        __func__, ep, ep->com.state);
 135                WARN_ON(1);
 136                return;
 137        }
 138        del_timer_sync(&ep->timer);
 139        put_ep(&ep->com);
 140}
 141
 142int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e)
 143{
 144        int     error = 0;
 145        struct cxio_rdev *rdev;
 146
 147        rdev = (struct cxio_rdev *)tdev->ulp;
 148        if (cxio_fatal_error(rdev)) {
 149                kfree_skb(skb);
 150                return -EIO;
 151        }
 152        error = l2t_send(tdev, skb, l2e);
 153        if (error)
 154                kfree_skb(skb);
 155        return error;
 156}
 157
 158int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
 159{
 160        int     error = 0;
 161        struct cxio_rdev *rdev;
 162
 163        rdev = (struct cxio_rdev *)tdev->ulp;
 164        if (cxio_fatal_error(rdev)) {
 165                kfree_skb(skb);
 166                return -EIO;
 167        }
 168        error = cxgb3_ofld_send(tdev, skb);
 169        if (error)
 170                kfree_skb(skb);
 171        return error;
 172}
 173
 174static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
 175{
 176        struct cpl_tid_release *req;
 177
 178        skb = get_skb(skb, sizeof *req, GFP_KERNEL);
 179        if (!skb)
 180                return;
 181        req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
 182        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 183        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
 184        skb->priority = CPL_PRIORITY_SETUP;
 185        iwch_cxgb3_ofld_send(tdev, skb);
 186        return;
 187}
 188
 189int iwch_quiesce_tid(struct iwch_ep *ep)
 190{
 191        struct cpl_set_tcb_field *req;
 192        struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 193
 194        if (!skb)
 195                return -ENOMEM;
 196        req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
 197        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 198        req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 199        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
 200        req->reply = 0;
 201        req->cpu_idx = 0;
 202        req->word = htons(W_TCB_RX_QUIESCE);
 203        req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
 204        req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
 205
 206        skb->priority = CPL_PRIORITY_DATA;
 207        return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
 208}
 209
 210int iwch_resume_tid(struct iwch_ep *ep)
 211{
 212        struct cpl_set_tcb_field *req;
 213        struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 214
 215        if (!skb)
 216                return -ENOMEM;
 217        req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
 218        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 219        req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 220        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
 221        req->reply = 0;
 222        req->cpu_idx = 0;
 223        req->word = htons(W_TCB_RX_QUIESCE);
 224        req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
 225        req->val = 0;
 226
 227        skb->priority = CPL_PRIORITY_DATA;
 228        return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
 229}
 230
 231static void set_emss(struct iwch_ep *ep, u16 opt)
 232{
 233        PDBG("%s ep %p opt %u\n", __func__, ep, opt);
 234        ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40;
 235        if (G_TCPOPT_TSTAMP(opt))
 236                ep->emss -= 12;
 237        if (ep->emss < 128)
 238                ep->emss = 128;
 239        PDBG("emss=%d\n", ep->emss);
 240}
 241
 242static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
 243{
 244        unsigned long flags;
 245        enum iwch_ep_state state;
 246
 247        spin_lock_irqsave(&epc->lock, flags);
 248        state = epc->state;
 249        spin_unlock_irqrestore(&epc->lock, flags);
 250        return state;
 251}
 252
 253static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
 254{
 255        epc->state = new;
 256}
 257
 258static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
 259{
 260        unsigned long flags;
 261
 262        spin_lock_irqsave(&epc->lock, flags);
 263        PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
 264        __state_set(epc, new);
 265        spin_unlock_irqrestore(&epc->lock, flags);
 266        return;
 267}
 268
 269static void *alloc_ep(int size, gfp_t gfp)
 270{
 271        struct iwch_ep_common *epc;
 272
 273        epc = kzalloc(size, gfp);
 274        if (epc) {
 275                kref_init(&epc->kref);
 276                spin_lock_init(&epc->lock);
 277                init_waitqueue_head(&epc->waitq);
 278        }
 279        PDBG("%s alloc ep %p\n", __func__, epc);
 280        return epc;
 281}
 282
 283void __free_ep(struct kref *kref)
 284{
 285        struct iwch_ep *ep;
 286        ep = container_of(container_of(kref, struct iwch_ep_common, kref),
 287                          struct iwch_ep, com);
 288        PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
 289        if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
 290                cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
 291                dst_release(ep->dst);
 292                l2t_release(L2DATA(ep->com.tdev), ep->l2t);
 293        }
 294        kfree(ep);
 295}
 296
 297static void release_ep_resources(struct iwch_ep *ep)
 298{
 299        PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
 300        set_bit(RELEASE_RESOURCES, &ep->com.flags);
 301        put_ep(&ep->com);
 302}
 303
 304static void process_work(struct work_struct *work)
 305{
 306        struct sk_buff *skb = NULL;
 307        void *ep;
 308        struct t3cdev *tdev;
 309        int ret;
 310
 311        while ((skb = skb_dequeue(&rxq))) {
 312                ep = *((void **) (skb->cb));
 313                tdev = *((struct t3cdev **) (skb->cb + sizeof(void *)));
 314                ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep);
 315                if (ret & CPL_RET_BUF_DONE)
 316                        kfree_skb(skb);
 317
 318                /*
 319                 * ep was referenced in sched(), and is freed here.
 320                 */
 321                put_ep((struct iwch_ep_common *)ep);
 322        }
 323}
 324
 325static int status2errno(int status)
 326{
 327        switch (status) {
 328        case CPL_ERR_NONE:
 329                return 0;
 330        case CPL_ERR_CONN_RESET:
 331                return -ECONNRESET;
 332        case CPL_ERR_ARP_MISS:
 333                return -EHOSTUNREACH;
 334        case CPL_ERR_CONN_TIMEDOUT:
 335                return -ETIMEDOUT;
 336        case CPL_ERR_TCAM_FULL:
 337                return -ENOMEM;
 338        case CPL_ERR_CONN_EXIST:
 339                return -EADDRINUSE;
 340        default:
 341                return -EIO;
 342        }
 343}
 344
 345/*
 346 * Try and reuse skbs already allocated...
 347 */
 348static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
 349{
 350        if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
 351                skb_trim(skb, 0);
 352                skb_get(skb);
 353        } else {
 354                skb = alloc_skb(len, gfp);
 355        }
 356        return skb;
 357}
 358
 359static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip,
 360                                 __be32 peer_ip, __be16 local_port,
 361                                 __be16 peer_port, u8 tos)
 362{
 363        struct rtable *rt;
 364        struct flowi fl = {
 365                .oif = 0,
 366                .nl_u = {
 367                         .ip4_u = {
 368                                   .daddr = peer_ip,
 369                                   .saddr = local_ip,
 370                                   .tos = tos}
 371                         },
 372                .proto = IPPROTO_TCP,
 373                .uli_u = {
 374                          .ports = {
 375                                    .sport = local_port,
 376                                    .dport = peer_port}
 377                          }
 378        };
 379
 380        if (ip_route_output_flow(&init_net, &rt, &fl, NULL, 0))
 381                return NULL;
 382        return rt;
 383}
 384
 385static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu)
 386{
 387        int i = 0;
 388
 389        while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
 390                ++i;
 391        return i;
 392}
 393
 394static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
 395{
 396        PDBG("%s t3cdev %p\n", __func__, dev);
 397        kfree_skb(skb);
 398}
 399
 400/*
 401 * Handle an ARP failure for an active open.
 402 */
 403static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
 404{
 405        printk(KERN_ERR MOD "ARP failure duing connect\n");
 406        kfree_skb(skb);
 407}
 408
 409/*
 410 * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
 411 * and send it along.
 412 */
 413static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
 414{
 415        struct cpl_abort_req *req = cplhdr(skb);
 416
 417        PDBG("%s t3cdev %p\n", __func__, dev);
 418        req->cmd = CPL_ABORT_NO_RST;
 419        iwch_cxgb3_ofld_send(dev, skb);
 420}
 421
 422static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
 423{
 424        struct cpl_close_con_req *req;
 425        struct sk_buff *skb;
 426
 427        PDBG("%s ep %p\n", __func__, ep);
 428        skb = get_skb(NULL, sizeof(*req), gfp);
 429        if (!skb) {
 430                printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
 431                return -ENOMEM;
 432        }
 433        skb->priority = CPL_PRIORITY_DATA;
 434        set_arp_failure_handler(skb, arp_failure_discard);
 435        req = (struct cpl_close_con_req *) skb_put(skb, sizeof(*req));
 436        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
 437        req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 438        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid));
 439        return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 440}
 441
 442static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
 443{
 444        struct cpl_abort_req *req;
 445
 446        PDBG("%s ep %p\n", __func__, ep);
 447        skb = get_skb(skb, sizeof(*req), gfp);
 448        if (!skb) {
 449                printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
 450                       __func__);
 451                return -ENOMEM;
 452        }
 453        skb->priority = CPL_PRIORITY_DATA;
 454        set_arp_failure_handler(skb, abort_arp_failure);
 455        req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req));
 456        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
 457        req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 458        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
 459        req->cmd = CPL_ABORT_SEND_RST;
 460        return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 461}
 462
 463static int send_connect(struct iwch_ep *ep)
 464{
 465        struct cpl_act_open_req *req;
 466        struct sk_buff *skb;
 467        u32 opt0h, opt0l, opt2;
 468        unsigned int mtu_idx;
 469        int wscale;
 470
 471        PDBG("%s ep %p\n", __func__, ep);
 472
 473        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 474        if (!skb) {
 475                printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
 476                       __func__);
 477                return -ENOMEM;
 478        }
 479        mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
 480        wscale = compute_wscale(rcv_win);
 481        opt0h = V_NAGLE(0) |
 482            V_NO_CONG(nocong) |
 483            V_KEEP_ALIVE(1) |
 484            F_TCAM_BYPASS |
 485            V_WND_SCALE(wscale) |
 486            V_MSS_IDX(mtu_idx) |
 487            V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
 488        opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
 489        opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor);
 490        skb->priority = CPL_PRIORITY_SETUP;
 491        set_arp_failure_handler(skb, act_open_req_arp_failure);
 492
 493        req = (struct cpl_act_open_req *) skb_put(skb, sizeof(*req));
 494        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 495        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid));
 496        req->local_port = ep->com.local_addr.sin_port;
 497        req->peer_port = ep->com.remote_addr.sin_port;
 498        req->local_ip = ep->com.local_addr.sin_addr.s_addr;
 499        req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
 500        req->opt0h = htonl(opt0h);
 501        req->opt0l = htonl(opt0l);
 502        req->params = 0;
 503        req->opt2 = htonl(opt2);
 504        return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 505}
 506
 507static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
 508{
 509        int mpalen;
 510        struct tx_data_wr *req;
 511        struct mpa_message *mpa;
 512        int len;
 513
 514        PDBG("%s ep %p pd_len %d\n", __func__, ep, ep->plen);
 515
 516        BUG_ON(skb_cloned(skb));
 517
 518        mpalen = sizeof(*mpa) + ep->plen;
 519        if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) {
 520                kfree_skb(skb);
 521                skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
 522                if (!skb) {
 523                        connect_reply_upcall(ep, -ENOMEM);
 524                        return;
 525                }
 526        }
 527        skb_trim(skb, 0);
 528        skb_reserve(skb, sizeof(*req));
 529        skb_put(skb, mpalen);
 530        skb->priority = CPL_PRIORITY_DATA;
 531        mpa = (struct mpa_message *) skb->data;
 532        memset(mpa, 0, sizeof(*mpa));
 533        memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
 534        mpa->flags = (crc_enabled ? MPA_CRC : 0) |
 535                     (markers_enabled ? MPA_MARKERS : 0);
 536        mpa->private_data_size = htons(ep->plen);
 537        mpa->revision = mpa_rev;
 538
 539        if (ep->plen)
 540                memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
 541
 542        /*
 543         * Reference the mpa skb.  This ensures the data area
 544         * will remain in memory until the hw acks the tx.
 545         * Function tx_ack() will deref it.
 546         */
 547        skb_get(skb);
 548        set_arp_failure_handler(skb, arp_failure_discard);
 549        skb_reset_transport_header(skb);
 550        len = skb->len;
 551        req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
 552        req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
 553        req->wr_lo = htonl(V_WR_TID(ep->hwtid));
 554        req->len = htonl(len);
 555        req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
 556                           V_TX_SNDBUF(snd_win>>15));
 557        req->flags = htonl(F_TX_INIT);
 558        req->sndseq = htonl(ep->snd_seq);
 559        BUG_ON(ep->mpa_skb);
 560        ep->mpa_skb = skb;
 561        iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 562        start_ep_timer(ep);
 563        state_set(&ep->com, MPA_REQ_SENT);
 564        return;
 565}
 566
 567static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
 568{
 569        int mpalen;
 570        struct tx_data_wr *req;
 571        struct mpa_message *mpa;
 572        struct sk_buff *skb;
 573
 574        PDBG("%s ep %p plen %d\n", __func__, ep, plen);
 575
 576        mpalen = sizeof(*mpa) + plen;
 577
 578        skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
 579        if (!skb) {
 580                printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
 581                return -ENOMEM;
 582        }
 583        skb_reserve(skb, sizeof(*req));
 584        mpa = (struct mpa_message *) skb_put(skb, mpalen);
 585        memset(mpa, 0, sizeof(*mpa));
 586        memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
 587        mpa->flags = MPA_REJECT;
 588        mpa->revision = mpa_rev;
 589        mpa->private_data_size = htons(plen);
 590        if (plen)
 591                memcpy(mpa->private_data, pdata, plen);
 592
 593        /*
 594         * Reference the mpa skb again.  This ensures the data area
 595         * will remain in memory until the hw acks the tx.
 596         * Function tx_ack() will deref it.
 597         */
 598        skb_get(skb);
 599        skb->priority = CPL_PRIORITY_DATA;
 600        set_arp_failure_handler(skb, arp_failure_discard);
 601        skb_reset_transport_header(skb);
 602        req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
 603        req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
 604        req->wr_lo = htonl(V_WR_TID(ep->hwtid));
 605        req->len = htonl(mpalen);
 606        req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
 607                           V_TX_SNDBUF(snd_win>>15));
 608        req->flags = htonl(F_TX_INIT);
 609        req->sndseq = htonl(ep->snd_seq);
 610        BUG_ON(ep->mpa_skb);
 611        ep->mpa_skb = skb;
 612        return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 613}
 614
 615static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
 616{
 617        int mpalen;
 618        struct tx_data_wr *req;
 619        struct mpa_message *mpa;
 620        int len;
 621        struct sk_buff *skb;
 622
 623        PDBG("%s ep %p plen %d\n", __func__, ep, plen);
 624
 625        mpalen = sizeof(*mpa) + plen;
 626
 627        skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
 628        if (!skb) {
 629                printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
 630                return -ENOMEM;
 631        }
 632        skb->priority = CPL_PRIORITY_DATA;
 633        skb_reserve(skb, sizeof(*req));
 634        mpa = (struct mpa_message *) skb_put(skb, mpalen);
 635        memset(mpa, 0, sizeof(*mpa));
 636        memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
 637        mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
 638                     (markers_enabled ? MPA_MARKERS : 0);
 639        mpa->revision = mpa_rev;
 640        mpa->private_data_size = htons(plen);
 641        if (plen)
 642                memcpy(mpa->private_data, pdata, plen);
 643
 644        /*
 645         * Reference the mpa skb.  This ensures the data area
 646         * will remain in memory until the hw acks the tx.
 647         * Function tx_ack() will deref it.
 648         */
 649        skb_get(skb);
 650        set_arp_failure_handler(skb, arp_failure_discard);
 651        skb_reset_transport_header(skb);
 652        len = skb->len;
 653        req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
 654        req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
 655        req->wr_lo = htonl(V_WR_TID(ep->hwtid));
 656        req->len = htonl(len);
 657        req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
 658                           V_TX_SNDBUF(snd_win>>15));
 659        req->flags = htonl(F_TX_INIT);
 660        req->sndseq = htonl(ep->snd_seq);
 661        ep->mpa_skb = skb;
 662        state_set(&ep->com, MPA_REP_SENT);
 663        return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 664}
 665
 666static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 667{
 668        struct iwch_ep *ep = ctx;
 669        struct cpl_act_establish *req = cplhdr(skb);
 670        unsigned int tid = GET_TID(req);
 671
 672        PDBG("%s ep %p tid %d\n", __func__, ep, tid);
 673
 674        dst_confirm(ep->dst);
 675
 676        /* setup the hwtid for this connection */
 677        ep->hwtid = tid;
 678        cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
 679
 680        ep->snd_seq = ntohl(req->snd_isn);
 681        ep->rcv_seq = ntohl(req->rcv_isn);
 682
 683        set_emss(ep, ntohs(req->tcp_opt));
 684
 685        /* dealloc the atid */
 686        cxgb3_free_atid(ep->com.tdev, ep->atid);
 687
 688        /* start MPA negotiation */
 689        send_mpa_req(ep, skb);
 690
 691        return 0;
 692}
 693
 694static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
 695{
 696        PDBG("%s ep %p\n", __FILE__, ep);
 697        state_set(&ep->com, ABORTING);
 698        send_abort(ep, skb, gfp);
 699}
 700
 701static void close_complete_upcall(struct iwch_ep *ep)
 702{
 703        struct iw_cm_event event;
 704
 705        PDBG("%s ep %p\n", __func__, ep);
 706        memset(&event, 0, sizeof(event));
 707        event.event = IW_CM_EVENT_CLOSE;
 708        if (ep->com.cm_id) {
 709                PDBG("close complete delivered ep %p cm_id %p tid %d\n",
 710                     ep, ep->com.cm_id, ep->hwtid);
 711                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 712                ep->com.cm_id->rem_ref(ep->com.cm_id);
 713                ep->com.cm_id = NULL;
 714                ep->com.qp = NULL;
 715        }
 716}
 717
 718static void peer_close_upcall(struct iwch_ep *ep)
 719{
 720        struct iw_cm_event event;
 721
 722        PDBG("%s ep %p\n", __func__, ep);
 723        memset(&event, 0, sizeof(event));
 724        event.event = IW_CM_EVENT_DISCONNECT;
 725        if (ep->com.cm_id) {
 726                PDBG("peer close delivered ep %p cm_id %p tid %d\n",
 727                     ep, ep->com.cm_id, ep->hwtid);
 728                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 729        }
 730}
 731
 732static void peer_abort_upcall(struct iwch_ep *ep)
 733{
 734        struct iw_cm_event event;
 735
 736        PDBG("%s ep %p\n", __func__, ep);
 737        memset(&event, 0, sizeof(event));
 738        event.event = IW_CM_EVENT_CLOSE;
 739        event.status = -ECONNRESET;
 740        if (ep->com.cm_id) {
 741                PDBG("abort delivered ep %p cm_id %p tid %d\n", ep,
 742                     ep->com.cm_id, ep->hwtid);
 743                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 744                ep->com.cm_id->rem_ref(ep->com.cm_id);
 745                ep->com.cm_id = NULL;
 746                ep->com.qp = NULL;
 747        }
 748}
 749
 750static void connect_reply_upcall(struct iwch_ep *ep, int status)
 751{
 752        struct iw_cm_event event;
 753
 754        PDBG("%s ep %p status %d\n", __func__, ep, status);
 755        memset(&event, 0, sizeof(event));
 756        event.event = IW_CM_EVENT_CONNECT_REPLY;
 757        event.status = status;
 758        event.local_addr = ep->com.local_addr;
 759        event.remote_addr = ep->com.remote_addr;
 760
 761        if ((status == 0) || (status == -ECONNREFUSED)) {
 762                event.private_data_len = ep->plen;
 763                event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
 764        }
 765        if (ep->com.cm_id) {
 766                PDBG("%s ep %p tid %d status %d\n", __func__, ep,
 767                     ep->hwtid, status);
 768                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 769        }
 770        if (status < 0) {
 771                ep->com.cm_id->rem_ref(ep->com.cm_id);
 772                ep->com.cm_id = NULL;
 773                ep->com.qp = NULL;
 774        }
 775}
 776
 777static void connect_request_upcall(struct iwch_ep *ep)
 778{
 779        struct iw_cm_event event;
 780
 781        PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
 782        memset(&event, 0, sizeof(event));
 783        event.event = IW_CM_EVENT_CONNECT_REQUEST;
 784        event.local_addr = ep->com.local_addr;
 785        event.remote_addr = ep->com.remote_addr;
 786        event.private_data_len = ep->plen;
 787        event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
 788        event.provider_data = ep;
 789        if (state_read(&ep->parent_ep->com) != DEAD) {
 790                get_ep(&ep->com);
 791                ep->parent_ep->com.cm_id->event_handler(
 792                                                ep->parent_ep->com.cm_id,
 793                                                &event);
 794        }
 795        put_ep(&ep->parent_ep->com);
 796        ep->parent_ep = NULL;
 797}
 798
 799static void established_upcall(struct iwch_ep *ep)
 800{
 801        struct iw_cm_event event;
 802
 803        PDBG("%s ep %p\n", __func__, ep);
 804        memset(&event, 0, sizeof(event));
 805        event.event = IW_CM_EVENT_ESTABLISHED;
 806        if (ep->com.cm_id) {
 807                PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
 808                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 809        }
 810}
 811
 812static int update_rx_credits(struct iwch_ep *ep, u32 credits)
 813{
 814        struct cpl_rx_data_ack *req;
 815        struct sk_buff *skb;
 816
 817        PDBG("%s ep %p credits %u\n", __func__, ep, credits);
 818        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 819        if (!skb) {
 820                printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
 821                return 0;
 822        }
 823
 824        req = (struct cpl_rx_data_ack *) skb_put(skb, sizeof(*req));
 825        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 826        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
 827        req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
 828        skb->priority = CPL_PRIORITY_ACK;
 829        iwch_cxgb3_ofld_send(ep->com.tdev, skb);
 830        return credits;
 831}
 832
 833static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
 834{
 835        struct mpa_message *mpa;
 836        u16 plen;
 837        struct iwch_qp_attributes attrs;
 838        enum iwch_qp_attr_mask mask;
 839        int err;
 840
 841        PDBG("%s ep %p\n", __func__, ep);
 842
 843        /*
 844         * Stop mpa timer.  If it expired, then the state has
 845         * changed and we bail since ep_timeout already aborted
 846         * the connection.
 847         */
 848        stop_ep_timer(ep);
 849        if (state_read(&ep->com) != MPA_REQ_SENT)
 850                return;
 851
 852        /*
 853         * If we get more than the supported amount of private data
 854         * then we must fail this connection.
 855         */
 856        if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
 857                err = -EINVAL;
 858                goto err;
 859        }
 860
 861        /*
 862         * copy the new data into our accumulation buffer.
 863         */
 864        skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
 865                                  skb->len);
 866        ep->mpa_pkt_len += skb->len;
 867
 868        /*
 869         * if we don't even have the mpa message, then bail.
 870         */
 871        if (ep->mpa_pkt_len < sizeof(*mpa))
 872                return;
 873        mpa = (struct mpa_message *) ep->mpa_pkt;
 874
 875        /* Validate MPA header. */
 876        if (mpa->revision != mpa_rev) {
 877                err = -EPROTO;
 878                goto err;
 879        }
 880        if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
 881                err = -EPROTO;
 882                goto err;
 883        }
 884
 885        plen = ntohs(mpa->private_data_size);
 886
 887        /*
 888         * Fail if there's too much private data.
 889         */
 890        if (plen > MPA_MAX_PRIVATE_DATA) {
 891                err = -EPROTO;
 892                goto err;
 893        }
 894
 895        /*
 896         * If plen does not account for pkt size
 897         */
 898        if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
 899                err = -EPROTO;
 900                goto err;
 901        }
 902
 903        ep->plen = (u8) plen;
 904
 905        /*
 906         * If we don't have all the pdata yet, then bail.
 907         * We'll continue process when more data arrives.
 908         */
 909        if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
 910                return;
 911
 912        if (mpa->flags & MPA_REJECT) {
 913                err = -ECONNREFUSED;
 914                goto err;
 915        }
 916
 917        /*
 918         * If we get here we have accumulated the entire mpa
 919         * start reply message including private data. And
 920         * the MPA header is valid.
 921         */
 922        state_set(&ep->com, FPDU_MODE);
 923        ep->mpa_attr.initiator = 1;
 924        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
 925        ep->mpa_attr.recv_marker_enabled = markers_enabled;
 926        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
 927        ep->mpa_attr.version = mpa_rev;
 928        PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
 929             "xmit_marker_enabled=%d, version=%d\n", __func__,
 930             ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
 931             ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
 932
 933        attrs.mpa_attr = ep->mpa_attr;
 934        attrs.max_ird = ep->ird;
 935        attrs.max_ord = ep->ord;
 936        attrs.llp_stream_handle = ep;
 937        attrs.next_state = IWCH_QP_STATE_RTS;
 938
 939        mask = IWCH_QP_ATTR_NEXT_STATE |
 940            IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
 941            IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
 942
 943        /* bind QP and TID with INIT_WR */
 944        err = iwch_modify_qp(ep->com.qp->rhp,
 945                             ep->com.qp, mask, &attrs, 1);
 946        if (err)
 947                goto err;
 948
 949        if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) {
 950                iwch_post_zb_read(ep->com.qp);
 951        }
 952
 953        goto out;
 954err:
 955        abort_connection(ep, skb, GFP_KERNEL);
 956out:
 957        connect_reply_upcall(ep, err);
 958        return;
 959}
 960
 961static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
 962{
 963        struct mpa_message *mpa;
 964        u16 plen;
 965
 966        PDBG("%s ep %p\n", __func__, ep);
 967
 968        /*
 969         * Stop mpa timer.  If it expired, then the state has
 970         * changed and we bail since ep_timeout already aborted
 971         * the connection.
 972         */
 973        stop_ep_timer(ep);
 974        if (state_read(&ep->com) != MPA_REQ_WAIT)
 975                return;
 976
 977        /*
 978         * If we get more than the supported amount of private data
 979         * then we must fail this connection.
 980         */
 981        if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
 982                abort_connection(ep, skb, GFP_KERNEL);
 983                return;
 984        }
 985
 986        PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
 987
 988        /*
 989         * Copy the new data into our accumulation buffer.
 990         */
 991        skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
 992                                  skb->len);
 993        ep->mpa_pkt_len += skb->len;
 994
 995        /*
 996         * If we don't even have the mpa message, then bail.
 997         * We'll continue process when more data arrives.
 998         */
 999        if (ep->mpa_pkt_len < sizeof(*mpa))
1000                return;
1001        PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
1002        mpa = (struct mpa_message *) ep->mpa_pkt;
1003
1004        /*
1005         * Validate MPA Header.
1006         */
1007        if (mpa->revision != mpa_rev) {
1008                abort_connection(ep, skb, GFP_KERNEL);
1009                return;
1010        }
1011
1012        if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
1013                abort_connection(ep, skb, GFP_KERNEL);
1014                return;
1015        }
1016
1017        plen = ntohs(mpa->private_data_size);
1018
1019        /*
1020         * Fail if there's too much private data.
1021         */
1022        if (plen > MPA_MAX_PRIVATE_DATA) {
1023                abort_connection(ep, skb, GFP_KERNEL);
1024                return;
1025        }
1026
1027        /*
1028         * If plen does not account for pkt size
1029         */
1030        if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1031                abort_connection(ep, skb, GFP_KERNEL);
1032                return;
1033        }
1034        ep->plen = (u8) plen;
1035
1036        /*
1037         * If we don't have all the pdata yet, then bail.
1038         */
1039        if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1040                return;
1041
1042        /*
1043         * If we get here we have accumulated the entire mpa
1044         * start reply message including private data.
1045         */
1046        ep->mpa_attr.initiator = 0;
1047        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1048        ep->mpa_attr.recv_marker_enabled = markers_enabled;
1049        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1050        ep->mpa_attr.version = mpa_rev;
1051        PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
1052             "xmit_marker_enabled=%d, version=%d\n", __func__,
1053             ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1054             ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
1055
1056        state_set(&ep->com, MPA_REQ_RCVD);
1057
1058        /* drive upcall */
1059        connect_request_upcall(ep);
1060        return;
1061}
1062
1063static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1064{
1065        struct iwch_ep *ep = ctx;
1066        struct cpl_rx_data *hdr = cplhdr(skb);
1067        unsigned int dlen = ntohs(hdr->len);
1068
1069        PDBG("%s ep %p dlen %u\n", __func__, ep, dlen);
1070
1071        skb_pull(skb, sizeof(*hdr));
1072        skb_trim(skb, dlen);
1073
1074        ep->rcv_seq += dlen;
1075        BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
1076
1077        switch (state_read(&ep->com)) {
1078        case MPA_REQ_SENT:
1079                process_mpa_reply(ep, skb);
1080                break;
1081        case MPA_REQ_WAIT:
1082                process_mpa_request(ep, skb);
1083                break;
1084        case MPA_REP_SENT:
1085                break;
1086        default:
1087                printk(KERN_ERR MOD "%s Unexpected streaming data."
1088                       " ep %p state %d tid %d\n",
1089                       __func__, ep, state_read(&ep->com), ep->hwtid);
1090
1091                /*
1092                 * The ep will timeout and inform the ULP of the failure.
1093                 * See ep_timeout().
1094                 */
1095                break;
1096        }
1097
1098        /* update RX credits */
1099        update_rx_credits(ep, dlen);
1100
1101        return CPL_RET_BUF_DONE;
1102}
1103
1104/*
1105 * Upcall from the adapter indicating data has been transmitted.
1106 * For us its just the single MPA request or reply.  We can now free
1107 * the skb holding the mpa message.
1108 */
1109static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1110{
1111        struct iwch_ep *ep = ctx;
1112        struct cpl_wr_ack *hdr = cplhdr(skb);
1113        unsigned int credits = ntohs(hdr->credits);
1114
1115        PDBG("%s ep %p credits %u\n", __func__, ep, credits);
1116
1117        if (credits == 0) {
1118                PDBG(KERN_ERR "%s 0 credit ack  ep %p state %u\n",
1119                        __func__, ep, state_read(&ep->com));
1120                return CPL_RET_BUF_DONE;
1121        }
1122
1123        BUG_ON(credits != 1);
1124        dst_confirm(ep->dst);
1125        if (!ep->mpa_skb) {
1126                PDBG("%s rdma_init wr_ack ep %p state %u\n",
1127                        __func__, ep, state_read(&ep->com));
1128                if (ep->mpa_attr.initiator) {
1129                        PDBG("%s initiator ep %p state %u\n",
1130                                __func__, ep, state_read(&ep->com));
1131                        if (peer2peer)
1132                                iwch_post_zb_read(ep->com.qp);
1133                } else {
1134                        PDBG("%s responder ep %p state %u\n",
1135                                __func__, ep, state_read(&ep->com));
1136                        ep->com.rpl_done = 1;
1137                        wake_up(&ep->com.waitq);
1138                }
1139        } else {
1140                PDBG("%s lsm ack ep %p state %u freeing skb\n",
1141                        __func__, ep, state_read(&ep->com));
1142                kfree_skb(ep->mpa_skb);
1143                ep->mpa_skb = NULL;
1144        }
1145        return CPL_RET_BUF_DONE;
1146}
1147
1148static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1149{
1150        struct iwch_ep *ep = ctx;
1151        unsigned long flags;
1152        int release = 0;
1153
1154        PDBG("%s ep %p\n", __func__, ep);
1155        BUG_ON(!ep);
1156
1157        /*
1158         * We get 2 abort replies from the HW.  The first one must
1159         * be ignored except for scribbling that we need one more.
1160         */
1161        if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) {
1162                return CPL_RET_BUF_DONE;
1163        }
1164
1165        spin_lock_irqsave(&ep->com.lock, flags);
1166        switch (ep->com.state) {
1167        case ABORTING:
1168                close_complete_upcall(ep);
1169                __state_set(&ep->com, DEAD);
1170                release = 1;
1171                break;
1172        default:
1173                printk(KERN_ERR "%s ep %p state %d\n",
1174                     __func__, ep, ep->com.state);
1175                break;
1176        }
1177        spin_unlock_irqrestore(&ep->com.lock, flags);
1178
1179        if (release)
1180                release_ep_resources(ep);
1181        return CPL_RET_BUF_DONE;
1182}
1183
1184/*
1185 * Return whether a failed active open has allocated a TID
1186 */
1187static inline int act_open_has_tid(int status)
1188{
1189        return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
1190               status != CPL_ERR_ARP_MISS;
1191}
1192
1193static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1194{
1195        struct iwch_ep *ep = ctx;
1196        struct cpl_act_open_rpl *rpl = cplhdr(skb);
1197
1198        PDBG("%s ep %p status %u errno %d\n", __func__, ep, rpl->status,
1199             status2errno(rpl->status));
1200        connect_reply_upcall(ep, status2errno(rpl->status));
1201        state_set(&ep->com, DEAD);
1202        if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status))
1203                release_tid(ep->com.tdev, GET_TID(rpl), NULL);
1204        cxgb3_free_atid(ep->com.tdev, ep->atid);
1205        dst_release(ep->dst);
1206        l2t_release(L2DATA(ep->com.tdev), ep->l2t);
1207        put_ep(&ep->com);
1208        return CPL_RET_BUF_DONE;
1209}
1210
1211static int listen_start(struct iwch_listen_ep *ep)
1212{
1213        struct sk_buff *skb;
1214        struct cpl_pass_open_req *req;
1215
1216        PDBG("%s ep %p\n", __func__, ep);
1217        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1218        if (!skb) {
1219                printk(KERN_ERR MOD "t3c_listen_start failed to alloc skb!\n");
1220                return -ENOMEM;
1221        }
1222
1223        req = (struct cpl_pass_open_req *) skb_put(skb, sizeof(*req));
1224        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1225        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid));
1226        req->local_port = ep->com.local_addr.sin_port;
1227        req->local_ip = ep->com.local_addr.sin_addr.s_addr;
1228        req->peer_port = 0;
1229        req->peer_ip = 0;
1230        req->peer_netmask = 0;
1231        req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
1232        req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10));
1233        req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
1234
1235        skb->priority = 1;
1236        return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
1237}
1238
1239static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1240{
1241        struct iwch_listen_ep *ep = ctx;
1242        struct cpl_pass_open_rpl *rpl = cplhdr(skb);
1243
1244        PDBG("%s ep %p status %d error %d\n", __func__, ep,
1245             rpl->status, status2errno(rpl->status));
1246        ep->com.rpl_err = status2errno(rpl->status);
1247        ep->com.rpl_done = 1;
1248        wake_up(&ep->com.waitq);
1249
1250        return CPL_RET_BUF_DONE;
1251}
1252
1253static int listen_stop(struct iwch_listen_ep *ep)
1254{
1255        struct sk_buff *skb;
1256        struct cpl_close_listserv_req *req;
1257
1258        PDBG("%s ep %p\n", __func__, ep);
1259        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1260        if (!skb) {
1261                printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
1262                return -ENOMEM;
1263        }
1264        req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req));
1265        req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1266        req->cpu_idx = 0;
1267        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
1268        skb->priority = 1;
1269        return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
1270}
1271
1272static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
1273                             void *ctx)
1274{
1275        struct iwch_listen_ep *ep = ctx;
1276        struct cpl_close_listserv_rpl *rpl = cplhdr(skb);
1277
1278        PDBG("%s ep %p\n", __func__, ep);
1279        ep->com.rpl_err = status2errno(rpl->status);
1280        ep->com.rpl_done = 1;
1281        wake_up(&ep->com.waitq);
1282        return CPL_RET_BUF_DONE;
1283}
1284
1285static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
1286{
1287        struct cpl_pass_accept_rpl *rpl;
1288        unsigned int mtu_idx;
1289        u32 opt0h, opt0l, opt2;
1290        int wscale;
1291
1292        PDBG("%s ep %p\n", __func__, ep);
1293        BUG_ON(skb_cloned(skb));
1294        skb_trim(skb, sizeof(*rpl));
1295        skb_get(skb);
1296        mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
1297        wscale = compute_wscale(rcv_win);
1298        opt0h = V_NAGLE(0) |
1299            V_NO_CONG(nocong) |
1300            V_KEEP_ALIVE(1) |
1301            F_TCAM_BYPASS |
1302            V_WND_SCALE(wscale) |
1303            V_MSS_IDX(mtu_idx) |
1304            V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
1305        opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
1306        opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor);
1307
1308        rpl = cplhdr(skb);
1309        rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1310        OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid));
1311        rpl->peer_ip = peer_ip;
1312        rpl->opt0h = htonl(opt0h);
1313        rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT);
1314        rpl->opt2 = htonl(opt2);
1315        rpl->rsvd = rpl->opt2;  /* workaround for HW bug */
1316        skb->priority = CPL_PRIORITY_SETUP;
1317        iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
1318
1319        return;
1320}
1321
1322static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
1323                      struct sk_buff *skb)
1324{
1325        PDBG("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid,
1326             peer_ip);
1327        BUG_ON(skb_cloned(skb));
1328        skb_trim(skb, sizeof(struct cpl_tid_release));
1329        skb_get(skb);
1330
1331        if (tdev->type != T3A)
1332                release_tid(tdev, hwtid, skb);
1333        else {
1334                struct cpl_pass_accept_rpl *rpl;
1335
1336                rpl = cplhdr(skb);
1337                skb->priority = CPL_PRIORITY_SETUP;
1338                rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1339                OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
1340                                                      hwtid));
1341                rpl->peer_ip = peer_ip;
1342                rpl->opt0h = htonl(F_TCAM_BYPASS);
1343                rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
1344                rpl->opt2 = 0;
1345                rpl->rsvd = rpl->opt2;
1346                iwch_cxgb3_ofld_send(tdev, skb);
1347        }
1348}
1349
1350static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1351{
1352        struct iwch_ep *child_ep, *parent_ep = ctx;
1353        struct cpl_pass_accept_req *req = cplhdr(skb);
1354        unsigned int hwtid = GET_TID(req);
1355        struct dst_entry *dst;
1356        struct l2t_entry *l2t;
1357        struct rtable *rt;
1358        struct iff_mac tim;
1359
1360        PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
1361
1362        if (state_read(&parent_ep->com) != LISTEN) {
1363                printk(KERN_ERR "%s - listening ep not in LISTEN\n",
1364                       __func__);
1365                goto reject;
1366        }
1367
1368        /*
1369         * Find the netdev for this connection request.
1370         */
1371        tim.mac_addr = req->dst_mac;
1372        tim.vlan_tag = ntohs(req->vlan_tag);
1373        if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
1374                printk(KERN_ERR
1375                        "%s bad dst mac %02x %02x %02x %02x %02x %02x\n",
1376                        __func__,
1377                        req->dst_mac[0],
1378                        req->dst_mac[1],
1379                        req->dst_mac[2],
1380                        req->dst_mac[3],
1381                        req->dst_mac[4],
1382                        req->dst_mac[5]);
1383                goto reject;
1384        }
1385
1386        /* Find output route */
1387        rt = find_route(tdev,
1388                        req->local_ip,
1389                        req->peer_ip,
1390                        req->local_port,
1391                        req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid)));
1392        if (!rt) {
1393                printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
1394                       __func__);
1395                goto reject;
1396        }
1397        dst = &rt->u.dst;
1398        l2t = t3_l2t_get(tdev, dst->neighbour, dst->neighbour->dev);
1399        if (!l2t) {
1400                printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
1401                       __func__);
1402                dst_release(dst);
1403                goto reject;
1404        }
1405        child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
1406        if (!child_ep) {
1407                printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
1408                       __func__);
1409                l2t_release(L2DATA(tdev), l2t);
1410                dst_release(dst);
1411                goto reject;
1412        }
1413        state_set(&child_ep->com, CONNECTING);
1414        child_ep->com.tdev = tdev;
1415        child_ep->com.cm_id = NULL;
1416        child_ep->com.local_addr.sin_family = PF_INET;
1417        child_ep->com.local_addr.sin_port = req->local_port;
1418        child_ep->com.local_addr.sin_addr.s_addr = req->local_ip;
1419        child_ep->com.remote_addr.sin_family = PF_INET;
1420        child_ep->com.remote_addr.sin_port = req->peer_port;
1421        child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip;
1422        get_ep(&parent_ep->com);
1423        child_ep->parent_ep = parent_ep;
1424        child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid));
1425        child_ep->l2t = l2t;
1426        child_ep->dst = dst;
1427        child_ep->hwtid = hwtid;
1428        init_timer(&child_ep->timer);
1429        cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid);
1430        accept_cr(child_ep, req->peer_ip, skb);
1431        goto out;
1432reject:
1433        reject_cr(tdev, hwtid, req->peer_ip, skb);
1434out:
1435        return CPL_RET_BUF_DONE;
1436}
1437
1438static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1439{
1440        struct iwch_ep *ep = ctx;
1441        struct cpl_pass_establish *req = cplhdr(skb);
1442
1443        PDBG("%s ep %p\n", __func__, ep);
1444        ep->snd_seq = ntohl(req->snd_isn);
1445        ep->rcv_seq = ntohl(req->rcv_isn);
1446
1447        set_emss(ep, ntohs(req->tcp_opt));
1448
1449        dst_confirm(ep->dst);
1450        state_set(&ep->com, MPA_REQ_WAIT);
1451        start_ep_timer(ep);
1452
1453        return CPL_RET_BUF_DONE;
1454}
1455
1456static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1457{
1458        struct iwch_ep *ep = ctx;
1459        struct iwch_qp_attributes attrs;
1460        unsigned long flags;
1461        int disconnect = 1;
1462        int release = 0;
1463
1464        PDBG("%s ep %p\n", __func__, ep);
1465        dst_confirm(ep->dst);
1466
1467        spin_lock_irqsave(&ep->com.lock, flags);
1468        switch (ep->com.state) {
1469        case MPA_REQ_WAIT:
1470                __state_set(&ep->com, CLOSING);
1471                break;
1472        case MPA_REQ_SENT:
1473                __state_set(&ep->com, CLOSING);
1474                connect_reply_upcall(ep, -ECONNRESET);
1475                break;
1476        case MPA_REQ_RCVD:
1477
1478                /*
1479                 * We're gonna mark this puppy DEAD, but keep
1480                 * the reference on it until the ULP accepts or
1481                 * rejects the CR. Also wake up anyone waiting
1482                 * in rdma connection migration (see iwch_accept_cr()).
1483                 */
1484                __state_set(&ep->com, CLOSING);
1485                ep->com.rpl_done = 1;
1486                ep->com.rpl_err = -ECONNRESET;
1487                PDBG("waking up ep %p\n", ep);
1488                wake_up(&ep->com.waitq);
1489                break;
1490        case MPA_REP_SENT:
1491                __state_set(&ep->com, CLOSING);
1492                ep->com.rpl_done = 1;
1493                ep->com.rpl_err = -ECONNRESET;
1494                PDBG("waking up ep %p\n", ep);
1495                wake_up(&ep->com.waitq);
1496                break;
1497        case FPDU_MODE:
1498                start_ep_timer(ep);
1499                __state_set(&ep->com, CLOSING);
1500                attrs.next_state = IWCH_QP_STATE_CLOSING;
1501                iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
1502                               IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
1503                peer_close_upcall(ep);
1504                break;
1505        case ABORTING:
1506                disconnect = 0;
1507                break;
1508        case CLOSING:
1509                __state_set(&ep->com, MORIBUND);
1510                disconnect = 0;
1511                break;
1512        case MORIBUND:
1513                stop_ep_timer(ep);
1514                if (ep->com.cm_id && ep->com.qp) {
1515                        attrs.next_state = IWCH_QP_STATE_IDLE;
1516                        iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
1517                                       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
1518                }
1519                close_complete_upcall(ep);
1520                __state_set(&ep->com, DEAD);
1521                release = 1;
1522                disconnect = 0;
1523                break;
1524        case DEAD:
1525                disconnect = 0;
1526                break;
1527        default:
1528                BUG_ON(1);
1529        }
1530        spin_unlock_irqrestore(&ep->com.lock, flags);
1531        if (disconnect)
1532                iwch_ep_disconnect(ep, 0, GFP_KERNEL);
1533        if (release)
1534                release_ep_resources(ep);
1535        return CPL_RET_BUF_DONE;
1536}
1537
1538/*
1539 * Returns whether an ABORT_REQ_RSS message is a negative advice.
1540 */
1541static int is_neg_adv_abort(unsigned int status)
1542{
1543        return status == CPL_ERR_RTX_NEG_ADVICE ||
1544               status == CPL_ERR_PERSIST_NEG_ADVICE;
1545}
1546
1547static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1548{
1549        struct cpl_abort_req_rss *req = cplhdr(skb);
1550        struct iwch_ep *ep = ctx;
1551        struct cpl_abort_rpl *rpl;
1552        struct sk_buff *rpl_skb;
1553        struct iwch_qp_attributes attrs;
1554        int ret;
1555        int release = 0;
1556        unsigned long flags;
1557
1558        if (is_neg_adv_abort(req->status)) {
1559                PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
1560                     ep->hwtid);
1561                t3_l2t_send_event(ep->com.tdev, ep->l2t);
1562                return CPL_RET_BUF_DONE;
1563        }
1564
1565        /*
1566         * We get 2 peer aborts from the HW.  The first one must
1567         * be ignored except for scribbling that we need one more.
1568         */
1569        if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) {
1570                return CPL_RET_BUF_DONE;
1571        }
1572
1573        spin_lock_irqsave(&ep->com.lock, flags);
1574        PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state);
1575        switch (ep->com.state) {
1576        case CONNECTING:
1577                break;
1578        case MPA_REQ_WAIT:
1579                stop_ep_timer(ep);
1580                break;
1581        case MPA_REQ_SENT:
1582                stop_ep_timer(ep);
1583                connect_reply_upcall(ep, -ECONNRESET);
1584                break;
1585        case MPA_REP_SENT:
1586                ep->com.rpl_done = 1;
1587                ep->com.rpl_err = -ECONNRESET;
1588                PDBG("waking up ep %p\n", ep);
1589                wake_up(&ep->com.waitq);
1590                break;
1591        case MPA_REQ_RCVD:
1592
1593                /*
1594                 * We're gonna mark this puppy DEAD, but keep
1595                 * the reference on it until the ULP accepts or
1596                 * rejects the CR. Also wake up anyone waiting
1597                 * in rdma connection migration (see iwch_accept_cr()).
1598                 */
1599                ep->com.rpl_done = 1;
1600                ep->com.rpl_err = -ECONNRESET;
1601                PDBG("waking up ep %p\n", ep);
1602                wake_up(&ep->com.waitq);
1603                break;
1604        case MORIBUND:
1605        case CLOSING:
1606                stop_ep_timer(ep);
1607                /*FALLTHROUGH*/
1608        case FPDU_MODE:
1609                if (ep->com.cm_id && ep->com.qp) {
1610                        attrs.next_state = IWCH_QP_STATE_ERROR;
1611                        ret = iwch_modify_qp(ep->com.qp->rhp,
1612                                     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1613                                     &attrs, 1);
1614                        if (ret)
1615                                printk(KERN_ERR MOD
1616                                       "%s - qp <- error failed!\n",
1617                                       __func__);
1618                }
1619                peer_abort_upcall(ep);
1620                break;
1621        case ABORTING:
1622                break;
1623        case DEAD:
1624                PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
1625                spin_unlock_irqrestore(&ep->com.lock, flags);
1626                return CPL_RET_BUF_DONE;
1627        default:
1628                BUG_ON(1);
1629                break;
1630        }
1631        dst_confirm(ep->dst);
1632        if (ep->com.state != ABORTING) {
1633                __state_set(&ep->com, DEAD);
1634                release = 1;
1635        }
1636        spin_unlock_irqrestore(&ep->com.lock, flags);
1637
1638        rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
1639        if (!rpl_skb) {
1640                printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
1641                       __func__);
1642                release = 1;
1643                goto out;
1644        }
1645        rpl_skb->priority = CPL_PRIORITY_DATA;
1646        rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
1647        rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
1648        rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
1649        OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
1650        rpl->cmd = CPL_ABORT_NO_RST;
1651        iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb);
1652out:
1653        if (release)
1654                release_ep_resources(ep);
1655        return CPL_RET_BUF_DONE;
1656}
1657
1658static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1659{
1660        struct iwch_ep *ep = ctx;
1661        struct iwch_qp_attributes attrs;
1662        unsigned long flags;
1663        int release = 0;
1664
1665        PDBG("%s ep %p\n", __func__, ep);
1666        BUG_ON(!ep);
1667
1668        /* The cm_id may be null if we failed to connect */
1669        spin_lock_irqsave(&ep->com.lock, flags);
1670        switch (ep->com.state) {
1671        case CLOSING:
1672                __state_set(&ep->com, MORIBUND);
1673                break;
1674        case MORIBUND:
1675                stop_ep_timer(ep);
1676                if ((ep->com.cm_id) && (ep->com.qp)) {
1677                        attrs.next_state = IWCH_QP_STATE_IDLE;
1678                        iwch_modify_qp(ep->com.qp->rhp,
1679                                             ep->com.qp,
1680                                             IWCH_QP_ATTR_NEXT_STATE,
1681                                             &attrs, 1);
1682                }
1683                close_complete_upcall(ep);
1684                __state_set(&ep->com, DEAD);
1685                release = 1;
1686                break;
1687        case ABORTING:
1688        case DEAD:
1689                break;
1690        default:
1691                BUG_ON(1);
1692                break;
1693        }
1694        spin_unlock_irqrestore(&ep->com.lock, flags);
1695        if (release)
1696                release_ep_resources(ep);
1697        return CPL_RET_BUF_DONE;
1698}
1699
1700/*
1701 * T3A does 3 things when a TERM is received:
1702 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
1703 * 2) generate an async event on the QP with the TERMINATE opcode
1704 * 3) post a TERMINATE opcde cqe into the associated CQ.
1705 *
1706 * For (1), we save the message in the qp for later consumer consumption.
1707 * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
1708 * For (3), we toss the CQE in cxio_poll_cq().
1709 *
1710 * terminate() handles case (1)...
1711 */
1712static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1713{
1714        struct iwch_ep *ep = ctx;
1715
1716        if (state_read(&ep->com) != FPDU_MODE)
1717                return CPL_RET_BUF_DONE;
1718
1719        PDBG("%s ep %p\n", __func__, ep);
1720        skb_pull(skb, sizeof(struct cpl_rdma_terminate));
1721        PDBG("%s saving %d bytes of term msg\n", __func__, skb->len);
1722        skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
1723                                  skb->len);
1724        ep->com.qp->attr.terminate_msg_len = skb->len;
1725        ep->com.qp->attr.is_terminate_local = 0;
1726        return CPL_RET_BUF_DONE;
1727}
1728
1729static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1730{
1731        struct cpl_rdma_ec_status *rep = cplhdr(skb);
1732        struct iwch_ep *ep = ctx;
1733
1734        PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid,
1735             rep->status);
1736        if (rep->status) {
1737                struct iwch_qp_attributes attrs;
1738
1739                printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n",
1740                       __func__, ep->hwtid);
1741                stop_ep_timer(ep);
1742                attrs.next_state = IWCH_QP_STATE_ERROR;
1743                iwch_modify_qp(ep->com.qp->rhp,
1744                               ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1745                               &attrs, 1);
1746                abort_connection(ep, NULL, GFP_KERNEL);
1747        }
1748        return CPL_RET_BUF_DONE;
1749}
1750
1751static void ep_timeout(unsigned long arg)
1752{
1753        struct iwch_ep *ep = (struct iwch_ep *)arg;
1754        struct iwch_qp_attributes attrs;
1755        unsigned long flags;
1756        int abort = 1;
1757
1758        spin_lock_irqsave(&ep->com.lock, flags);
1759        PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
1760             ep->com.state);
1761        switch (ep->com.state) {
1762        case MPA_REQ_SENT:
1763                __state_set(&ep->com, ABORTING);
1764                connect_reply_upcall(ep, -ETIMEDOUT);
1765                break;
1766        case MPA_REQ_WAIT:
1767                __state_set(&ep->com, ABORTING);
1768                break;
1769        case CLOSING:
1770        case MORIBUND:
1771                if (ep->com.cm_id && ep->com.qp) {
1772                        attrs.next_state = IWCH_QP_STATE_ERROR;
1773                        iwch_modify_qp(ep->com.qp->rhp,
1774                                     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1775                                     &attrs, 1);
1776                }
1777                __state_set(&ep->com, ABORTING);
1778                break;
1779        default:
1780                printk(KERN_ERR "%s unexpected state ep %p state %u\n",
1781                        __func__, ep, ep->com.state);
1782                WARN_ON(1);
1783                abort = 0;
1784        }
1785        spin_unlock_irqrestore(&ep->com.lock, flags);
1786        if (abort)
1787                abort_connection(ep, NULL, GFP_ATOMIC);
1788        put_ep(&ep->com);
1789}
1790
1791int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
1792{
1793        int err;
1794        struct iwch_ep *ep = to_ep(cm_id);
1795        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1796
1797        if (state_read(&ep->com) == DEAD) {
1798                put_ep(&ep->com);
1799                return -ECONNRESET;
1800        }
1801        BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
1802        if (mpa_rev == 0)
1803                abort_connection(ep, NULL, GFP_KERNEL);
1804        else {
1805                err = send_mpa_reject(ep, pdata, pdata_len);
1806                err = iwch_ep_disconnect(ep, 0, GFP_KERNEL);
1807        }
1808        put_ep(&ep->com);
1809        return 0;
1810}
1811
1812int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1813{
1814        int err;
1815        struct iwch_qp_attributes attrs;
1816        enum iwch_qp_attr_mask mask;
1817        struct iwch_ep *ep = to_ep(cm_id);
1818        struct iwch_dev *h = to_iwch_dev(cm_id->device);
1819        struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
1820
1821        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1822        if (state_read(&ep->com) == DEAD) {
1823                err = -ECONNRESET;
1824                goto err;
1825        }
1826
1827        BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
1828        BUG_ON(!qp);
1829
1830        if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
1831            (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
1832                abort_connection(ep, NULL, GFP_KERNEL);
1833                err = -EINVAL;
1834                goto err;
1835        }
1836
1837        cm_id->add_ref(cm_id);
1838        ep->com.cm_id = cm_id;
1839        ep->com.qp = qp;
1840
1841        ep->ird = conn_param->ird;
1842        ep->ord = conn_param->ord;
1843
1844        if (peer2peer && ep->ird == 0)
1845                ep->ird = 1;
1846
1847        PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
1848
1849        /* bind QP to EP and move to RTS */
1850        attrs.mpa_attr = ep->mpa_attr;
1851        attrs.max_ird = ep->ird;
1852        attrs.max_ord = ep->ord;
1853        attrs.llp_stream_handle = ep;
1854        attrs.next_state = IWCH_QP_STATE_RTS;
1855
1856        /* bind QP and TID with INIT_WR */
1857        mask = IWCH_QP_ATTR_NEXT_STATE |
1858                             IWCH_QP_ATTR_LLP_STREAM_HANDLE |
1859                             IWCH_QP_ATTR_MPA_ATTR |
1860                             IWCH_QP_ATTR_MAX_IRD |
1861                             IWCH_QP_ATTR_MAX_ORD;
1862
1863        err = iwch_modify_qp(ep->com.qp->rhp,
1864                             ep->com.qp, mask, &attrs, 1);
1865        if (err)
1866                goto err1;
1867
1868        /* if needed, wait for wr_ack */
1869        if (iwch_rqes_posted(qp)) {
1870                wait_event(ep->com.waitq, ep->com.rpl_done);
1871                err = ep->com.rpl_err;
1872                if (err)
1873                        goto err1;
1874        }
1875
1876        err = send_mpa_reply(ep, conn_param->private_data,
1877                             conn_param->private_data_len);
1878        if (err)
1879                goto err1;
1880
1881
1882        state_set(&ep->com, FPDU_MODE);
1883        established_upcall(ep);
1884        put_ep(&ep->com);
1885        return 0;
1886err1:
1887        ep->com.cm_id = NULL;
1888        ep->com.qp = NULL;
1889        cm_id->rem_ref(cm_id);
1890err:
1891        put_ep(&ep->com);
1892        return err;
1893}
1894
1895static int is_loopback_dst(struct iw_cm_id *cm_id)
1896{
1897        struct net_device *dev;
1898
1899        dev = ip_dev_find(&init_net, cm_id->remote_addr.sin_addr.s_addr);
1900        if (!dev)
1901                return 0;
1902        dev_put(dev);
1903        return 1;
1904}
1905
1906int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1907{
1908        int err = 0;
1909        struct iwch_dev *h = to_iwch_dev(cm_id->device);
1910        struct iwch_ep *ep;
1911        struct rtable *rt;
1912
1913        if (is_loopback_dst(cm_id)) {
1914                err = -ENOSYS;
1915                goto out;
1916        }
1917
1918        ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
1919        if (!ep) {
1920                printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
1921                err = -ENOMEM;
1922                goto out;
1923        }
1924        init_timer(&ep->timer);
1925        ep->plen = conn_param->private_data_len;
1926        if (ep->plen)
1927                memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
1928                       conn_param->private_data, ep->plen);
1929        ep->ird = conn_param->ird;
1930        ep->ord = conn_param->ord;
1931
1932        if (peer2peer && ep->ord == 0)
1933                ep->ord = 1;
1934
1935        ep->com.tdev = h->rdev.t3cdev_p;
1936
1937        cm_id->add_ref(cm_id);
1938        ep->com.cm_id = cm_id;
1939        ep->com.qp = get_qhp(h, conn_param->qpn);
1940        BUG_ON(!ep->com.qp);
1941        PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
1942             ep->com.qp, cm_id);
1943
1944        /*
1945         * Allocate an active TID to initiate a TCP connection.
1946         */
1947        ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep);
1948        if (ep->atid == -1) {
1949                printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
1950                err = -ENOMEM;
1951                goto fail2;
1952        }
1953
1954        /* find a route */
1955        rt = find_route(h->rdev.t3cdev_p,
1956                        cm_id->local_addr.sin_addr.s_addr,
1957                        cm_id->remote_addr.sin_addr.s_addr,
1958                        cm_id->local_addr.sin_port,
1959                        cm_id->remote_addr.sin_port, IPTOS_LOWDELAY);
1960        if (!rt) {
1961                printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
1962                err = -EHOSTUNREACH;
1963                goto fail3;
1964        }
1965        ep->dst = &rt->u.dst;
1966
1967        /* get a l2t entry */
1968        ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst->neighbour,
1969                             ep->dst->neighbour->dev);
1970        if (!ep->l2t) {
1971                printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
1972                err = -ENOMEM;
1973                goto fail4;
1974        }
1975
1976        state_set(&ep->com, CONNECTING);
1977        ep->tos = IPTOS_LOWDELAY;
1978        ep->com.local_addr = cm_id->local_addr;
1979        ep->com.remote_addr = cm_id->remote_addr;
1980
1981        /* send connect request to rnic */
1982        err = send_connect(ep);
1983        if (!err)
1984                goto out;
1985
1986        l2t_release(L2DATA(h->rdev.t3cdev_p), ep->l2t);
1987fail4:
1988        dst_release(ep->dst);
1989fail3:
1990        cxgb3_free_atid(ep->com.tdev, ep->atid);
1991fail2:
1992        cm_id->rem_ref(cm_id);
1993        put_ep(&ep->com);
1994out:
1995        return err;
1996}
1997
1998int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
1999{
2000        int err = 0;
2001        struct iwch_dev *h = to_iwch_dev(cm_id->device);
2002        struct iwch_listen_ep *ep;
2003
2004
2005        might_sleep();
2006
2007        ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
2008        if (!ep) {
2009                printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
2010                err = -ENOMEM;
2011                goto fail1;
2012        }
2013        PDBG("%s ep %p\n", __func__, ep);
2014        ep->com.tdev = h->rdev.t3cdev_p;
2015        cm_id->add_ref(cm_id);
2016        ep->com.cm_id = cm_id;
2017        ep->backlog = backlog;
2018        ep->com.local_addr = cm_id->local_addr;
2019
2020        /*
2021         * Allocate a server TID.
2022         */
2023        ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep);
2024        if (ep->stid == -1) {
2025                printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
2026                err = -ENOMEM;
2027                goto fail2;
2028        }
2029
2030        state_set(&ep->com, LISTEN);
2031        err = listen_start(ep);
2032        if (err)
2033                goto fail3;
2034
2035        /* wait for pass_open_rpl */
2036        wait_event(ep->com.waitq, ep->com.rpl_done);
2037        err = ep->com.rpl_err;
2038        if (!err) {
2039                cm_id->provider_data = ep;
2040                goto out;
2041        }
2042fail3:
2043        cxgb3_free_stid(ep->com.tdev, ep->stid);
2044fail2:
2045        cm_id->rem_ref(cm_id);
2046        put_ep(&ep->com);
2047fail1:
2048out:
2049        return err;
2050}
2051
2052int iwch_destroy_listen(struct iw_cm_id *cm_id)
2053{
2054        int err;
2055        struct iwch_listen_ep *ep = to_listen_ep(cm_id);
2056
2057        PDBG("%s ep %p\n", __func__, ep);
2058
2059        might_sleep();
2060        state_set(&ep->com, DEAD);
2061        ep->com.rpl_done = 0;
2062        ep->com.rpl_err = 0;
2063        err = listen_stop(ep);
2064        if (err)
2065                goto done;
2066        wait_event(ep->com.waitq, ep->com.rpl_done);
2067        cxgb3_free_stid(ep->com.tdev, ep->stid);
2068done:
2069        err = ep->com.rpl_err;
2070        cm_id->rem_ref(cm_id);
2071        put_ep(&ep->com);
2072        return err;
2073}
2074
2075int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
2076{
2077        int ret=0;
2078        unsigned long flags;
2079        int close = 0;
2080        int fatal = 0;
2081        struct t3cdev *tdev;
2082        struct cxio_rdev *rdev;
2083
2084        spin_lock_irqsave(&ep->com.lock, flags);
2085
2086        PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
2087             states[ep->com.state], abrupt);
2088
2089        tdev = (struct t3cdev *)ep->com.tdev;
2090        rdev = (struct cxio_rdev *)tdev->ulp;
2091        if (cxio_fatal_error(rdev)) {
2092                fatal = 1;
2093                close_complete_upcall(ep);
2094                ep->com.state = DEAD;
2095        }
2096        switch (ep->com.state) {
2097        case MPA_REQ_WAIT:
2098        case MPA_REQ_SENT:
2099        case MPA_REQ_RCVD:
2100        case MPA_REP_SENT:
2101        case FPDU_MODE:
2102                close = 1;
2103                if (abrupt)
2104                        ep->com.state = ABORTING;
2105                else {
2106                        ep->com.state = CLOSING;
2107                        start_ep_timer(ep);
2108                }
2109                set_bit(CLOSE_SENT, &ep->com.flags);
2110                break;
2111        case CLOSING:
2112                if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
2113                        close = 1;
2114                        if (abrupt) {
2115                                stop_ep_timer(ep);
2116                                ep->com.state = ABORTING;
2117                        } else
2118                                ep->com.state = MORIBUND;
2119                }
2120                break;
2121        case MORIBUND:
2122        case ABORTING:
2123        case DEAD:
2124                PDBG("%s ignoring disconnect ep %p state %u\n",
2125                     __func__, ep, ep->com.state);
2126                break;
2127        default:
2128                BUG();
2129                break;
2130        }
2131
2132        spin_unlock_irqrestore(&ep->com.lock, flags);
2133        if (close) {
2134                if (abrupt)
2135                        ret = send_abort(ep, NULL, gfp);
2136                else
2137                        ret = send_halfclose(ep, gfp);
2138                if (ret)
2139                        fatal = 1;
2140        }
2141        if (fatal)
2142                release_ep_resources(ep);
2143        return ret;
2144}
2145
2146int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
2147                     struct l2t_entry *l2t)
2148{
2149        struct iwch_ep *ep = ctx;
2150
2151        if (ep->dst != old)
2152                return 0;
2153
2154        PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
2155             l2t);
2156        dst_hold(new);
2157        l2t_release(L2DATA(ep->com.tdev), ep->l2t);
2158        ep->l2t = l2t;
2159        dst_release(old);
2160        ep->dst = new;
2161        return 1;
2162}
2163
2164/*
2165 * All the CM events are handled on a work queue to have a safe context.
2166 */
2167static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
2168{
2169        struct iwch_ep_common *epc = ctx;
2170
2171        get_ep(epc);
2172
2173        /*
2174         * Save ctx and tdev in the skb->cb area.
2175         */
2176        *((void **) skb->cb) = ctx;
2177        *((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev;
2178
2179        /*
2180         * Queue the skb and schedule the worker thread.
2181         */
2182        skb_queue_tail(&rxq, skb);
2183        queue_work(workq, &skb_work);
2184        return 0;
2185}
2186
2187static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
2188{
2189        struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
2190
2191        if (rpl->status != CPL_ERR_NONE) {
2192                printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
2193                       "for tid %u\n", rpl->status, GET_TID(rpl));
2194        }
2195        return CPL_RET_BUF_DONE;
2196}
2197
2198int __init iwch_cm_init(void)
2199{
2200        skb_queue_head_init(&rxq);
2201
2202        workq = create_singlethread_workqueue("iw_cxgb3");
2203        if (!workq)
2204                return -ENOMEM;
2205
2206        /*
2207         * All upcalls from the T3 Core go to sched() to
2208         * schedule the processing on a work queue.
2209         */
2210        t3c_handlers[CPL_ACT_ESTABLISH] = sched;
2211        t3c_handlers[CPL_ACT_OPEN_RPL] = sched;
2212        t3c_handlers[CPL_RX_DATA] = sched;
2213        t3c_handlers[CPL_TX_DMA_ACK] = sched;
2214        t3c_handlers[CPL_ABORT_RPL_RSS] = sched;
2215        t3c_handlers[CPL_ABORT_RPL] = sched;
2216        t3c_handlers[CPL_PASS_OPEN_RPL] = sched;
2217        t3c_handlers[CPL_CLOSE_LISTSRV_RPL] = sched;
2218        t3c_handlers[CPL_PASS_ACCEPT_REQ] = sched;
2219        t3c_handlers[CPL_PASS_ESTABLISH] = sched;
2220        t3c_handlers[CPL_PEER_CLOSE] = sched;
2221        t3c_handlers[CPL_CLOSE_CON_RPL] = sched;
2222        t3c_handlers[CPL_ABORT_REQ_RSS] = sched;
2223        t3c_handlers[CPL_RDMA_TERMINATE] = sched;
2224        t3c_handlers[CPL_RDMA_EC_STATUS] = sched;
2225        t3c_handlers[CPL_SET_TCB_RPL] = set_tcb_rpl;
2226
2227        /*
2228         * These are the real handlers that are called from a
2229         * work queue.
2230         */
2231        work_handlers[CPL_ACT_ESTABLISH] = act_establish;
2232        work_handlers[CPL_ACT_OPEN_RPL] = act_open_rpl;
2233        work_handlers[CPL_RX_DATA] = rx_data;
2234        work_handlers[CPL_TX_DMA_ACK] = tx_ack;
2235        work_handlers[CPL_ABORT_RPL_RSS] = abort_rpl;
2236        work_handlers[CPL_ABORT_RPL] = abort_rpl;
2237        work_handlers[CPL_PASS_OPEN_RPL] = pass_open_rpl;
2238        work_handlers[CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl;
2239        work_handlers[CPL_PASS_ACCEPT_REQ] = pass_accept_req;
2240        work_handlers[CPL_PASS_ESTABLISH] = pass_establish;
2241        work_handlers[CPL_PEER_CLOSE] = peer_close;
2242        work_handlers[CPL_ABORT_REQ_RSS] = peer_abort;
2243        work_handlers[CPL_CLOSE_CON_RPL] = close_con_rpl;
2244        work_handlers[CPL_RDMA_TERMINATE] = terminate;
2245        work_handlers[CPL_RDMA_EC_STATUS] = ec_status;
2246        return 0;
2247}
2248
2249void __exit iwch_cm_term(void)
2250{
2251        flush_workqueue(workq);
2252        destroy_workqueue(workq);
2253}
2254