linux/drivers/infiniband/hw/ehca/ehca_irq.c
<<
>>
Prefs
   1/*
   2 *  IBM eServer eHCA Infiniband device driver for Linux on POWER
   3 *
   4 *  Functions for EQs, NEQs and interrupts
   5 *
   6 *  Authors: Heiko J Schick <schickhj@de.ibm.com>
   7 *           Khadija Souissi <souissi@de.ibm.com>
   8 *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
   9 *           Joachim Fenkes <fenkes@de.ibm.com>
  10 *
  11 *  Copyright (c) 2005 IBM Corporation
  12 *
  13 *  All rights reserved.
  14 *
  15 *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
  16 *  BSD.
  17 *
  18 * OpenIB BSD License
  19 *
  20 * Redistribution and use in source and binary forms, with or without
  21 * modification, are permitted provided that the following conditions are met:
  22 *
  23 * Redistributions of source code must retain the above copyright notice, this
  24 * list of conditions and the following disclaimer.
  25 *
  26 * Redistributions in binary form must reproduce the above copyright notice,
  27 * this list of conditions and the following disclaimer in the documentation
  28 * and/or other materials
  29 * provided with the distribution.
  30 *
  31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  32 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  35 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  36 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  37 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  39 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  41 * POSSIBILITY OF SUCH DAMAGE.
  42 */
  43
  44#include <linux/slab.h>
  45#include <linux/smpboot.h>
  46
  47#include "ehca_classes.h"
  48#include "ehca_irq.h"
  49#include "ehca_iverbs.h"
  50#include "ehca_tools.h"
  51#include "hcp_if.h"
  52#include "hipz_fns.h"
  53#include "ipz_pt_fn.h"
  54
  55#define EQE_COMPLETION_EVENT   EHCA_BMASK_IBM( 1,  1)
  56#define EQE_CQ_QP_NUMBER       EHCA_BMASK_IBM( 8, 31)
  57#define EQE_EE_IDENTIFIER      EHCA_BMASK_IBM( 2,  7)
  58#define EQE_CQ_NUMBER          EHCA_BMASK_IBM( 8, 31)
  59#define EQE_QP_NUMBER          EHCA_BMASK_IBM( 8, 31)
  60#define EQE_QP_TOKEN           EHCA_BMASK_IBM(32, 63)
  61#define EQE_CQ_TOKEN           EHCA_BMASK_IBM(32, 63)
  62
  63#define NEQE_COMPLETION_EVENT  EHCA_BMASK_IBM( 1,  1)
  64#define NEQE_EVENT_CODE        EHCA_BMASK_IBM( 2,  7)
  65#define NEQE_PORT_NUMBER       EHCA_BMASK_IBM( 8, 15)
  66#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
  67#define NEQE_DISRUPTIVE        EHCA_BMASK_IBM(16, 16)
  68#define NEQE_SPECIFIC_EVENT    EHCA_BMASK_IBM(16, 23)
  69
  70#define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52, 63)
  71#define ERROR_DATA_TYPE        EHCA_BMASK_IBM( 0,  7)
  72
  73static void queue_comp_task(struct ehca_cq *__cq);
  74
  75static struct ehca_comp_pool *pool;
  76
  77static inline void comp_event_callback(struct ehca_cq *cq)
  78{
  79        if (!cq->ib_cq.comp_handler)
  80                return;
  81
  82        spin_lock(&cq->cb_lock);
  83        cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
  84        spin_unlock(&cq->cb_lock);
  85
  86        return;
  87}
  88
  89static void print_error_data(struct ehca_shca *shca, void *data,
  90                             u64 *rblock, int length)
  91{
  92        u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
  93        u64 resource = rblock[1];
  94
  95        switch (type) {
  96        case 0x1: /* Queue Pair */
  97        {
  98                struct ehca_qp *qp = (struct ehca_qp *)data;
  99
 100                /* only print error data if AER is set */
 101                if (rblock[6] == 0)
 102                        return;
 103
 104                ehca_err(&shca->ib_device,
 105                         "QP 0x%x (resource=%llx) has errors.",
 106                         qp->ib_qp.qp_num, resource);
 107                break;
 108        }
 109        case 0x4: /* Completion Queue */
 110        {
 111                struct ehca_cq *cq = (struct ehca_cq *)data;
 112
 113                ehca_err(&shca->ib_device,
 114                         "CQ 0x%x (resource=%llx) has errors.",
 115                         cq->cq_number, resource);
 116                break;
 117        }
 118        default:
 119                ehca_err(&shca->ib_device,
 120                         "Unknown error type: %llx on %s.",
 121                         type, shca->ib_device.name);
 122                break;
 123        }
 124
 125        ehca_err(&shca->ib_device, "Error data is available: %llx.", resource);
 126        ehca_err(&shca->ib_device, "EHCA ----- error data begin "
 127                 "---------------------------------------------------");
 128        ehca_dmp(rblock, length, "resource=%llx", resource);
 129        ehca_err(&shca->ib_device, "EHCA ----- error data end "
 130                 "----------------------------------------------------");
 131
 132        return;
 133}
 134
 135int ehca_error_data(struct ehca_shca *shca, void *data,
 136                    u64 resource)
 137{
 138
 139        unsigned long ret;
 140        u64 *rblock;
 141        unsigned long block_count;
 142
 143        rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
 144        if (!rblock) {
 145                ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
 146                ret = -ENOMEM;
 147                goto error_data1;
 148        }
 149
 150        /* rblock must be 4K aligned and should be 4K large */
 151        ret = hipz_h_error_data(shca->ipz_hca_handle,
 152                                resource,
 153                                rblock,
 154                                &block_count);
 155
 156        if (ret == H_R_STATE)
 157                ehca_err(&shca->ib_device,
 158                         "No error data is available: %llx.", resource);
 159        else if (ret == H_SUCCESS) {
 160                int length;
 161
 162                length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
 163
 164                if (length > EHCA_PAGESIZE)
 165                        length = EHCA_PAGESIZE;
 166
 167                print_error_data(shca, data, rblock, length);
 168        } else
 169                ehca_err(&shca->ib_device,
 170                         "Error data could not be fetched: %llx", resource);
 171
 172        ehca_free_fw_ctrlblock(rblock);
 173
 174error_data1:
 175        return ret;
 176
 177}
 178
 179static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
 180                              enum ib_event_type event_type)
 181{
 182        struct ib_event event;
 183
 184        /* PATH_MIG without the QP ever having been armed is false alarm */
 185        if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed)
 186                return;
 187
 188        event.device = &shca->ib_device;
 189        event.event = event_type;
 190
 191        if (qp->ext_type == EQPT_SRQ) {
 192                if (!qp->ib_srq.event_handler)
 193                        return;
 194
 195                event.element.srq = &qp->ib_srq;
 196                qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context);
 197        } else {
 198                if (!qp->ib_qp.event_handler)
 199                        return;
 200
 201                event.element.qp = &qp->ib_qp;
 202                qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
 203        }
 204}
 205
 206static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
 207                              enum ib_event_type event_type, int fatal)
 208{
 209        struct ehca_qp *qp;
 210        u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
 211
 212        read_lock(&ehca_qp_idr_lock);
 213        qp = idr_find(&ehca_qp_idr, token);
 214        if (qp)
 215                atomic_inc(&qp->nr_events);
 216        read_unlock(&ehca_qp_idr_lock);
 217
 218        if (!qp)
 219                return;
 220
 221        if (fatal)
 222                ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
 223
 224        dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ?
 225                          IB_EVENT_SRQ_ERR : event_type);
 226
 227        /*
 228         * eHCA only processes one WQE at a time for SRQ base QPs,
 229         * so the last WQE has been processed as soon as the QP enters
 230         * error state.
 231         */
 232        if (fatal && qp->ext_type == EQPT_SRQBASE)
 233                dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
 234
 235        if (atomic_dec_and_test(&qp->nr_events))
 236                wake_up(&qp->wait_completion);
 237        return;
 238}
 239
 240static void cq_event_callback(struct ehca_shca *shca,
 241                              u64 eqe)
 242{
 243        struct ehca_cq *cq;
 244        u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
 245
 246        read_lock(&ehca_cq_idr_lock);
 247        cq = idr_find(&ehca_cq_idr, token);
 248        if (cq)
 249                atomic_inc(&cq->nr_events);
 250        read_unlock(&ehca_cq_idr_lock);
 251
 252        if (!cq)
 253                return;
 254
 255        ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
 256
 257        if (atomic_dec_and_test(&cq->nr_events))
 258                wake_up(&cq->wait_completion);
 259
 260        return;
 261}
 262
 263static void parse_identifier(struct ehca_shca *shca, u64 eqe)
 264{
 265        u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
 266
 267        switch (identifier) {
 268        case 0x02: /* path migrated */
 269                qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0);
 270                break;
 271        case 0x03: /* communication established */
 272                qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0);
 273                break;
 274        case 0x04: /* send queue drained */
 275                qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0);
 276                break;
 277        case 0x05: /* QP error */
 278        case 0x06: /* QP error */
 279                qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1);
 280                break;
 281        case 0x07: /* CQ error */
 282        case 0x08: /* CQ error */
 283                cq_event_callback(shca, eqe);
 284                break;
 285        case 0x09: /* MRMWPTE error */
 286                ehca_err(&shca->ib_device, "MRMWPTE error.");
 287                break;
 288        case 0x0A: /* port event */
 289                ehca_err(&shca->ib_device, "Port event.");
 290                break;
 291        case 0x0B: /* MR access error */
 292                ehca_err(&shca->ib_device, "MR access error.");
 293                break;
 294        case 0x0C: /* EQ error */
 295                ehca_err(&shca->ib_device, "EQ error.");
 296                break;
 297        case 0x0D: /* P/Q_Key mismatch */
 298                ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
 299                break;
 300        case 0x10: /* sampling complete */
 301                ehca_err(&shca->ib_device, "Sampling complete.");
 302                break;
 303        case 0x11: /* unaffiliated access error */
 304                ehca_err(&shca->ib_device, "Unaffiliated access error.");
 305                break;
 306        case 0x12: /* path migrating */
 307                ehca_err(&shca->ib_device, "Path migrating.");
 308                break;
 309        case 0x13: /* interface trace stopped */
 310                ehca_err(&shca->ib_device, "Interface trace stopped.");
 311                break;
 312        case 0x14: /* first error capture info available */
 313                ehca_info(&shca->ib_device, "First error capture available");
 314                break;
 315        case 0x15: /* SRQ limit reached */
 316                qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0);
 317                break;
 318        default:
 319                ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
 320                         identifier, shca->ib_device.name);
 321                break;
 322        }
 323
 324        return;
 325}
 326
 327static void dispatch_port_event(struct ehca_shca *shca, int port_num,
 328                                enum ib_event_type type, const char *msg)
 329{
 330        struct ib_event event;
 331
 332        ehca_info(&shca->ib_device, "port %d %s.", port_num, msg);
 333        event.device = &shca->ib_device;
 334        event.event = type;
 335        event.element.port_num = port_num;
 336        ib_dispatch_event(&event);
 337}
 338
 339static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
 340{
 341        struct ehca_sma_attr  new_attr;
 342        struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr;
 343
 344        ehca_query_sma_attr(shca, port_num, &new_attr);
 345
 346        if (new_attr.sm_sl  != old_attr->sm_sl ||
 347            new_attr.sm_lid != old_attr->sm_lid)
 348                dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE,
 349                                    "SM changed");
 350
 351        if (new_attr.lid != old_attr->lid ||
 352            new_attr.lmc != old_attr->lmc)
 353                dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE,
 354                                    "LID changed");
 355
 356        if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len ||
 357            memcmp(new_attr.pkeys, old_attr->pkeys,
 358                   sizeof(u16) * new_attr.pkey_tbl_len))
 359                dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE,
 360                                    "P_Key changed");
 361
 362        *old_attr = new_attr;
 363}
 364
 365/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */
 366static int replay_modify_qp(struct ehca_sport *sport)
 367{
 368        int aqp1_destroyed;
 369        unsigned long flags;
 370
 371        spin_lock_irqsave(&sport->mod_sqp_lock, flags);
 372
 373        aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI];
 374
 375        if (sport->ibqp_sqp[IB_QPT_SMI])
 376                ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
 377        if (!aqp1_destroyed)
 378                ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
 379
 380        spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
 381
 382        return aqp1_destroyed;
 383}
 384
 385static void parse_ec(struct ehca_shca *shca, u64 eqe)
 386{
 387        u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
 388        u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
 389        u8 spec_event;
 390        struct ehca_sport *sport = &shca->sport[port - 1];
 391
 392        switch (ec) {
 393        case 0x30: /* port availability change */
 394                if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
 395                        /* only replay modify_qp calls in autodetect mode;
 396                         * if AQP1 was destroyed, the port is already down
 397                         * again and we can drop the event.
 398                         */
 399                        if (ehca_nr_ports < 0)
 400                                if (replay_modify_qp(sport))
 401                                        break;
 402
 403                        sport->port_state = IB_PORT_ACTIVE;
 404                        dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
 405                                            "is active");
 406                        ehca_query_sma_attr(shca, port, &sport->saved_attr);
 407                } else {
 408                        sport->port_state = IB_PORT_DOWN;
 409                        dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
 410                                            "is inactive");
 411                }
 412                break;
 413        case 0x31:
 414                /* port configuration change
 415                 * disruptive change is caused by
 416                 * LID, PKEY or SM change
 417                 */
 418                if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) {
 419                        ehca_warn(&shca->ib_device, "disruptive port "
 420                                  "%d configuration change", port);
 421
 422                        sport->port_state = IB_PORT_DOWN;
 423                        dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
 424                                            "is inactive");
 425
 426                        sport->port_state = IB_PORT_ACTIVE;
 427                        dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
 428                                            "is active");
 429                        ehca_query_sma_attr(shca, port,
 430                                            &sport->saved_attr);
 431                } else
 432                        notify_port_conf_change(shca, port);
 433                break;
 434        case 0x32: /* adapter malfunction */
 435                ehca_err(&shca->ib_device, "Adapter malfunction.");
 436                break;
 437        case 0x33:  /* trace stopped */
 438                ehca_err(&shca->ib_device, "Traced stopped.");
 439                break;
 440        case 0x34: /* util async event */
 441                spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe);
 442                if (spec_event == 0x80) /* client reregister required */
 443                        dispatch_port_event(shca, port,
 444                                            IB_EVENT_CLIENT_REREGISTER,
 445                                            "client reregister req.");
 446                else
 447                        ehca_warn(&shca->ib_device, "Unknown util async "
 448                                  "event %x on port %x", spec_event, port);
 449                break;
 450        default:
 451                ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
 452                         ec, shca->ib_device.name);
 453                break;
 454        }
 455
 456        return;
 457}
 458
 459static inline void reset_eq_pending(struct ehca_cq *cq)
 460{
 461        u64 CQx_EP;
 462        struct h_galpa gal = cq->galpas.kernel;
 463
 464        hipz_galpa_store_cq(gal, cqx_ep, 0x0);
 465        CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
 466
 467        return;
 468}
 469
 470irqreturn_t ehca_interrupt_neq(int irq, void *dev_id)
 471{
 472        struct ehca_shca *shca = (struct ehca_shca*)dev_id;
 473
 474        tasklet_hi_schedule(&shca->neq.interrupt_task);
 475
 476        return IRQ_HANDLED;
 477}
 478
 479void ehca_tasklet_neq(unsigned long data)
 480{
 481        struct ehca_shca *shca = (struct ehca_shca*)data;
 482        struct ehca_eqe *eqe;
 483        u64 ret;
 484
 485        eqe = ehca_poll_eq(shca, &shca->neq);
 486
 487        while (eqe) {
 488                if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
 489                        parse_ec(shca, eqe->entry);
 490
 491                eqe = ehca_poll_eq(shca, &shca->neq);
 492        }
 493
 494        ret = hipz_h_reset_event(shca->ipz_hca_handle,
 495                                 shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
 496
 497        if (ret != H_SUCCESS)
 498                ehca_err(&shca->ib_device, "Can't clear notification events.");
 499
 500        return;
 501}
 502
 503irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
 504{
 505        struct ehca_shca *shca = (struct ehca_shca*)dev_id;
 506
 507        tasklet_hi_schedule(&shca->eq.interrupt_task);
 508
 509        return IRQ_HANDLED;
 510}
 511
 512
 513static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
 514{
 515        u64 eqe_value;
 516        u32 token;
 517        struct ehca_cq *cq;
 518
 519        eqe_value = eqe->entry;
 520        ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value);
 521        if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
 522                ehca_dbg(&shca->ib_device, "Got completion event");
 523                token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
 524                read_lock(&ehca_cq_idr_lock);
 525                cq = idr_find(&ehca_cq_idr, token);
 526                if (cq)
 527                        atomic_inc(&cq->nr_events);
 528                read_unlock(&ehca_cq_idr_lock);
 529                if (cq == NULL) {
 530                        ehca_err(&shca->ib_device,
 531                                 "Invalid eqe for non-existing cq token=%x",
 532                                 token);
 533                        return;
 534                }
 535                reset_eq_pending(cq);
 536                if (ehca_scaling_code)
 537                        queue_comp_task(cq);
 538                else {
 539                        comp_event_callback(cq);
 540                        if (atomic_dec_and_test(&cq->nr_events))
 541                                wake_up(&cq->wait_completion);
 542                }
 543        } else {
 544                ehca_dbg(&shca->ib_device, "Got non completion event");
 545                parse_identifier(shca, eqe_value);
 546        }
 547}
 548
 549void ehca_process_eq(struct ehca_shca *shca, int is_irq)
 550{
 551        struct ehca_eq *eq = &shca->eq;
 552        struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
 553        u64 eqe_value, ret;
 554        int eqe_cnt, i;
 555        int eq_empty = 0;
 556
 557        spin_lock(&eq->irq_spinlock);
 558        if (is_irq) {
 559                const int max_query_cnt = 100;
 560                int query_cnt = 0;
 561                int int_state = 1;
 562                do {
 563                        int_state = hipz_h_query_int_state(
 564                                shca->ipz_hca_handle, eq->ist);
 565                        query_cnt++;
 566                        iosync();
 567                } while (int_state && query_cnt < max_query_cnt);
 568                if (unlikely((query_cnt == max_query_cnt)))
 569                        ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
 570                                 int_state, query_cnt);
 571        }
 572
 573        /* read out all eqes */
 574        eqe_cnt = 0;
 575        do {
 576                u32 token;
 577                eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq);
 578                if (!eqe_cache[eqe_cnt].eqe)
 579                        break;
 580                eqe_value = eqe_cache[eqe_cnt].eqe->entry;
 581                if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
 582                        token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
 583                        read_lock(&ehca_cq_idr_lock);
 584                        eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
 585                        if (eqe_cache[eqe_cnt].cq)
 586                                atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);
 587                        read_unlock(&ehca_cq_idr_lock);
 588                        if (!eqe_cache[eqe_cnt].cq) {
 589                                ehca_err(&shca->ib_device,
 590                                         "Invalid eqe for non-existing cq "
 591                                         "token=%x", token);
 592                                continue;
 593                        }
 594                } else
 595                        eqe_cache[eqe_cnt].cq = NULL;
 596                eqe_cnt++;
 597        } while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
 598        if (!eqe_cnt) {
 599                if (is_irq)
 600                        ehca_dbg(&shca->ib_device,
 601                                 "No eqe found for irq event");
 602                goto unlock_irq_spinlock;
 603        } else if (!is_irq) {
 604                ret = hipz_h_eoi(eq->ist);
 605                if (ret != H_SUCCESS)
 606                        ehca_err(&shca->ib_device,
 607                                 "bad return code EOI -rc = %lld\n", ret);
 608                ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
 609        }
 610        if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
 611                ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
 612        /* enable irq for new packets */
 613        for (i = 0; i < eqe_cnt; i++) {
 614                if (eq->eqe_cache[i].cq)
 615                        reset_eq_pending(eq->eqe_cache[i].cq);
 616        }
 617        /* check eq */
 618        spin_lock(&eq->spinlock);
 619        eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
 620        spin_unlock(&eq->spinlock);
 621        /* call completion handler for cached eqes */
 622        for (i = 0; i < eqe_cnt; i++)
 623                if (eq->eqe_cache[i].cq) {
 624                        if (ehca_scaling_code)
 625                                queue_comp_task(eq->eqe_cache[i].cq);
 626                        else {
 627                                struct ehca_cq *cq = eq->eqe_cache[i].cq;
 628                                comp_event_callback(cq);
 629                                if (atomic_dec_and_test(&cq->nr_events))
 630                                        wake_up(&cq->wait_completion);
 631                        }
 632                } else {
 633                        ehca_dbg(&shca->ib_device, "Got non completion event");
 634                        parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
 635                }
 636        /* poll eq if not empty */
 637        if (eq_empty)
 638                goto unlock_irq_spinlock;
 639        do {
 640                struct ehca_eqe *eqe;
 641                eqe = ehca_poll_eq(shca, &shca->eq);
 642                if (!eqe)
 643                        break;
 644                process_eqe(shca, eqe);
 645        } while (1);
 646
 647unlock_irq_spinlock:
 648        spin_unlock(&eq->irq_spinlock);
 649}
 650
 651void ehca_tasklet_eq(unsigned long data)
 652{
 653        ehca_process_eq((struct ehca_shca*)data, 1);
 654}
 655
 656static int find_next_online_cpu(struct ehca_comp_pool *pool)
 657{
 658        int cpu;
 659        unsigned long flags;
 660
 661        WARN_ON_ONCE(!in_interrupt());
 662        if (ehca_debug_level >= 3)
 663                ehca_dmp(cpu_online_mask, cpumask_size(), "");
 664
 665        spin_lock_irqsave(&pool->last_cpu_lock, flags);
 666        do {
 667                cpu = cpumask_next(pool->last_cpu, cpu_online_mask);
 668                if (cpu >= nr_cpu_ids)
 669                        cpu = cpumask_first(cpu_online_mask);
 670                pool->last_cpu = cpu;
 671        } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active);
 672        spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
 673
 674        return cpu;
 675}
 676
 677static void __queue_comp_task(struct ehca_cq *__cq,
 678                              struct ehca_cpu_comp_task *cct,
 679                              struct task_struct *thread)
 680{
 681        unsigned long flags;
 682
 683        spin_lock_irqsave(&cct->task_lock, flags);
 684        spin_lock(&__cq->task_lock);
 685
 686        if (__cq->nr_callbacks == 0) {
 687                __cq->nr_callbacks++;
 688                list_add_tail(&__cq->entry, &cct->cq_list);
 689                cct->cq_jobs++;
 690                wake_up_process(thread);
 691        } else
 692                __cq->nr_callbacks++;
 693
 694        spin_unlock(&__cq->task_lock);
 695        spin_unlock_irqrestore(&cct->task_lock, flags);
 696}
 697
 698static void queue_comp_task(struct ehca_cq *__cq)
 699{
 700        int cpu_id;
 701        struct ehca_cpu_comp_task *cct;
 702        struct task_struct *thread;
 703        int cq_jobs;
 704        unsigned long flags;
 705
 706        cpu_id = find_next_online_cpu(pool);
 707        BUG_ON(!cpu_online(cpu_id));
 708
 709        cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
 710        thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
 711        BUG_ON(!cct || !thread);
 712
 713        spin_lock_irqsave(&cct->task_lock, flags);
 714        cq_jobs = cct->cq_jobs;
 715        spin_unlock_irqrestore(&cct->task_lock, flags);
 716        if (cq_jobs > 0) {
 717                cpu_id = find_next_online_cpu(pool);
 718                cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
 719                thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
 720                BUG_ON(!cct || !thread);
 721        }
 722        __queue_comp_task(__cq, cct, thread);
 723}
 724
 725static void run_comp_task(struct ehca_cpu_comp_task *cct)
 726{
 727        struct ehca_cq *cq;
 728
 729        while (!list_empty(&cct->cq_list)) {
 730                cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
 731                spin_unlock_irq(&cct->task_lock);
 732
 733                comp_event_callback(cq);
 734                if (atomic_dec_and_test(&cq->nr_events))
 735                        wake_up(&cq->wait_completion);
 736
 737                spin_lock_irq(&cct->task_lock);
 738                spin_lock(&cq->task_lock);
 739                cq->nr_callbacks--;
 740                if (!cq->nr_callbacks) {
 741                        list_del_init(cct->cq_list.next);
 742                        cct->cq_jobs--;
 743                }
 744                spin_unlock(&cq->task_lock);
 745        }
 746}
 747
 748static void comp_task_park(unsigned int cpu)
 749{
 750        struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
 751        struct ehca_cpu_comp_task *target;
 752        struct task_struct *thread;
 753        struct ehca_cq *cq, *tmp;
 754        LIST_HEAD(list);
 755
 756        spin_lock_irq(&cct->task_lock);
 757        cct->cq_jobs = 0;
 758        cct->active = 0;
 759        list_splice_init(&cct->cq_list, &list);
 760        spin_unlock_irq(&cct->task_lock);
 761
 762        cpu = find_next_online_cpu(pool);
 763        target = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
 764        thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu);
 765        spin_lock_irq(&target->task_lock);
 766        list_for_each_entry_safe(cq, tmp, &list, entry) {
 767                list_del(&cq->entry);
 768                __queue_comp_task(cq, target, thread);
 769        }
 770        spin_unlock_irq(&target->task_lock);
 771}
 772
 773static void comp_task_stop(unsigned int cpu, bool online)
 774{
 775        struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
 776
 777        spin_lock_irq(&cct->task_lock);
 778        cct->cq_jobs = 0;
 779        cct->active = 0;
 780        WARN_ON(!list_empty(&cct->cq_list));
 781        spin_unlock_irq(&cct->task_lock);
 782}
 783
 784static int comp_task_should_run(unsigned int cpu)
 785{
 786        struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
 787
 788        return cct->cq_jobs;
 789}
 790
 791static void comp_task(unsigned int cpu)
 792{
 793        struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks);
 794        int cql_empty;
 795
 796        spin_lock_irq(&cct->task_lock);
 797        cql_empty = list_empty(&cct->cq_list);
 798        if (!cql_empty) {
 799                __set_current_state(TASK_RUNNING);
 800                run_comp_task(cct);
 801        }
 802        spin_unlock_irq(&cct->task_lock);
 803}
 804
 805static struct smp_hotplug_thread comp_pool_threads = {
 806        .thread_should_run      = comp_task_should_run,
 807        .thread_fn              = comp_task,
 808        .thread_comm            = "ehca_comp/%u",
 809        .cleanup                = comp_task_stop,
 810        .park                   = comp_task_park,
 811};
 812
 813int ehca_create_comp_pool(void)
 814{
 815        int cpu, ret = -ENOMEM;
 816
 817        if (!ehca_scaling_code)
 818                return 0;
 819
 820        pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
 821        if (pool == NULL)
 822                return -ENOMEM;
 823
 824        spin_lock_init(&pool->last_cpu_lock);
 825        pool->last_cpu = cpumask_any(cpu_online_mask);
 826
 827        pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
 828        if (!pool->cpu_comp_tasks)
 829                goto out_pool;
 830
 831        pool->cpu_comp_threads = alloc_percpu(struct task_struct *);
 832        if (!pool->cpu_comp_threads)
 833                goto out_tasks;
 834
 835        for_each_present_cpu(cpu) {
 836                struct ehca_cpu_comp_task *cct;
 837
 838                cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
 839                spin_lock_init(&cct->task_lock);
 840                INIT_LIST_HEAD(&cct->cq_list);
 841        }
 842
 843        comp_pool_threads.store = pool->cpu_comp_threads;
 844        ret = smpboot_register_percpu_thread(&comp_pool_threads);
 845        if (ret)
 846                goto out_threads;
 847
 848        pr_info("eHCA scaling code enabled\n");
 849        return ret;
 850
 851out_threads:
 852        free_percpu(pool->cpu_comp_threads);
 853out_tasks:
 854        free_percpu(pool->cpu_comp_tasks);
 855out_pool:
 856        kfree(pool);
 857        return ret;
 858}
 859
 860void ehca_destroy_comp_pool(void)
 861{
 862        if (!ehca_scaling_code)
 863                return;
 864
 865        smpboot_unregister_percpu_thread(&comp_pool_threads);
 866
 867        free_percpu(pool->cpu_comp_threads);
 868        free_percpu(pool->cpu_comp_tasks);
 869        kfree(pool);
 870}
 871