linux/drivers/dma/ioat/dma_v3.c
<<
>>
Prefs
   1/*
   2 * This file is provided under a dual BSD/GPLv2 license.  When using or
   3 * redistributing this file, you may do so under either license.
   4 *
   5 * GPL LICENSE SUMMARY
   6 *
   7 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
   8 *
   9 * This program is free software; you can redistribute it and/or modify it
  10 * under the terms and conditions of the GNU General Public License,
  11 * version 2, as published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but WITHOUT
  14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  16 * more details.
  17 *
  18 * You should have received a copy of the GNU General Public License along with
  19 * this program; if not, write to the Free Software Foundation, Inc.,
  20 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  21 *
  22 * The full GNU General Public License is included in this distribution in
  23 * the file called "COPYING".
  24 *
  25 * BSD LICENSE
  26 *
  27 * Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
  28 *
  29 * Redistribution and use in source and binary forms, with or without
  30 * modification, are permitted provided that the following conditions are met:
  31 *
  32 *   * Redistributions of source code must retain the above copyright
  33 *     notice, this list of conditions and the following disclaimer.
  34 *   * Redistributions in binary form must reproduce the above copyright
  35 *     notice, this list of conditions and the following disclaimer in
  36 *     the documentation and/or other materials provided with the
  37 *     distribution.
  38 *   * Neither the name of Intel Corporation nor the names of its
  39 *     contributors may be used to endorse or promote products derived
  40 *     from this software without specific prior written permission.
  41 *
  42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  43 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  45 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  46 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  47 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  48 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  49 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  50 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  51 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  52 * POSSIBILITY OF SUCH DAMAGE.
  53 */
  54
  55/*
  56 * Support routines for v3+ hardware
  57 */
  58
  59#include <linux/pci.h>
  60#include <linux/dmaengine.h>
  61#include <linux/dma-mapping.h>
  62#include "registers.h"
  63#include "hw.h"
  64#include "dma.h"
  65#include "dma_v2.h"
  66
  67/* ioat hardware assumes at least two sources for raid operations */
  68#define src_cnt_to_sw(x) ((x) + 2)
  69#define src_cnt_to_hw(x) ((x) - 2)
  70
  71/* provide a lookup table for setting the source address in the base or
  72 * extended descriptor of an xor or pq descriptor
  73 */
  74static const u8 xor_idx_to_desc __read_mostly = 0xd0;
  75static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 };
  76static const u8 pq_idx_to_desc __read_mostly = 0xf8;
  77static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 };
  78
  79static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
  80{
  81        struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
  82
  83        return raw->field[xor_idx_to_field[idx]];
  84}
  85
  86static void xor_set_src(struct ioat_raw_descriptor *descs[2],
  87                        dma_addr_t addr, u32 offset, int idx)
  88{
  89        struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
  90
  91        raw->field[xor_idx_to_field[idx]] = addr + offset;
  92}
  93
  94static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
  95{
  96        struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
  97
  98        return raw->field[pq_idx_to_field[idx]];
  99}
 100
 101static void pq_set_src(struct ioat_raw_descriptor *descs[2],
 102                       dma_addr_t addr, u32 offset, u8 coef, int idx)
 103{
 104        struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
 105        struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
 106
 107        raw->field[pq_idx_to_field[idx]] = addr + offset;
 108        pq->coef[idx] = coef;
 109}
 110
 111static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
 112                            struct ioat_ring_ent *desc, int idx)
 113{
 114        struct ioat_chan_common *chan = &ioat->base;
 115        struct pci_dev *pdev = chan->device->pdev;
 116        size_t len = desc->len;
 117        size_t offset = len - desc->hw->size;
 118        struct dma_async_tx_descriptor *tx = &desc->txd;
 119        enum dma_ctrl_flags flags = tx->flags;
 120
 121        switch (desc->hw->ctl_f.op) {
 122        case IOAT_OP_COPY:
 123                if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
 124                        ioat_dma_unmap(chan, flags, len, desc->hw);
 125                break;
 126        case IOAT_OP_FILL: {
 127                struct ioat_fill_descriptor *hw = desc->fill;
 128
 129                if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
 130                        ioat_unmap(pdev, hw->dst_addr - offset, len,
 131                                   PCI_DMA_FROMDEVICE, flags, 1);
 132                break;
 133        }
 134        case IOAT_OP_XOR_VAL:
 135        case IOAT_OP_XOR: {
 136                struct ioat_xor_descriptor *xor = desc->xor;
 137                struct ioat_ring_ent *ext;
 138                struct ioat_xor_ext_descriptor *xor_ex = NULL;
 139                int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
 140                struct ioat_raw_descriptor *descs[2];
 141                int i;
 142
 143                if (src_cnt > 5) {
 144                        ext = ioat2_get_ring_ent(ioat, idx + 1);
 145                        xor_ex = ext->xor_ex;
 146                }
 147
 148                if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
 149                        descs[0] = (struct ioat_raw_descriptor *) xor;
 150                        descs[1] = (struct ioat_raw_descriptor *) xor_ex;
 151                        for (i = 0; i < src_cnt; i++) {
 152                                dma_addr_t src = xor_get_src(descs, i);
 153
 154                                ioat_unmap(pdev, src - offset, len,
 155                                           PCI_DMA_TODEVICE, flags, 0);
 156                        }
 157
 158                        /* dest is a source in xor validate operations */
 159                        if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
 160                                ioat_unmap(pdev, xor->dst_addr - offset, len,
 161                                           PCI_DMA_TODEVICE, flags, 1);
 162                                break;
 163                        }
 164                }
 165
 166                if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
 167                        ioat_unmap(pdev, xor->dst_addr - offset, len,
 168                                   PCI_DMA_FROMDEVICE, flags, 1);
 169                break;
 170        }
 171        case IOAT_OP_PQ_VAL:
 172        case IOAT_OP_PQ: {
 173                struct ioat_pq_descriptor *pq = desc->pq;
 174                struct ioat_ring_ent *ext;
 175                struct ioat_pq_ext_descriptor *pq_ex = NULL;
 176                int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
 177                struct ioat_raw_descriptor *descs[2];
 178                int i;
 179
 180                if (src_cnt > 3) {
 181                        ext = ioat2_get_ring_ent(ioat, idx + 1);
 182                        pq_ex = ext->pq_ex;
 183                }
 184
 185                /* in the 'continue' case don't unmap the dests as sources */
 186                if (dmaf_p_disabled_continue(flags))
 187                        src_cnt--;
 188                else if (dmaf_continue(flags))
 189                        src_cnt -= 3;
 190
 191                if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
 192                        descs[0] = (struct ioat_raw_descriptor *) pq;
 193                        descs[1] = (struct ioat_raw_descriptor *) pq_ex;
 194                        for (i = 0; i < src_cnt; i++) {
 195                                dma_addr_t src = pq_get_src(descs, i);
 196
 197                                ioat_unmap(pdev, src - offset, len,
 198                                           PCI_DMA_TODEVICE, flags, 0);
 199                        }
 200
 201                        /* the dests are sources in pq validate operations */
 202                        if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
 203                                if (!(flags & DMA_PREP_PQ_DISABLE_P))
 204                                        ioat_unmap(pdev, pq->p_addr - offset,
 205                                                   len, PCI_DMA_TODEVICE, flags, 0);
 206                                if (!(flags & DMA_PREP_PQ_DISABLE_Q))
 207                                        ioat_unmap(pdev, pq->q_addr - offset,
 208                                                   len, PCI_DMA_TODEVICE, flags, 0);
 209                                break;
 210                        }
 211                }
 212
 213                if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
 214                        if (!(flags & DMA_PREP_PQ_DISABLE_P))
 215                                ioat_unmap(pdev, pq->p_addr - offset, len,
 216                                           PCI_DMA_BIDIRECTIONAL, flags, 1);
 217                        if (!(flags & DMA_PREP_PQ_DISABLE_Q))
 218                                ioat_unmap(pdev, pq->q_addr - offset, len,
 219                                           PCI_DMA_BIDIRECTIONAL, flags, 1);
 220                }
 221                break;
 222        }
 223        default:
 224                dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
 225                        __func__, desc->hw->ctl_f.op);
 226        }
 227}
 228
 229static bool desc_has_ext(struct ioat_ring_ent *desc)
 230{
 231        struct ioat_dma_descriptor *hw = desc->hw;
 232
 233        if (hw->ctl_f.op == IOAT_OP_XOR ||
 234            hw->ctl_f.op == IOAT_OP_XOR_VAL) {
 235                struct ioat_xor_descriptor *xor = desc->xor;
 236
 237                if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
 238                        return true;
 239        } else if (hw->ctl_f.op == IOAT_OP_PQ ||
 240                   hw->ctl_f.op == IOAT_OP_PQ_VAL) {
 241                struct ioat_pq_descriptor *pq = desc->pq;
 242
 243                if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
 244                        return true;
 245        }
 246
 247        return false;
 248}
 249
 250/**
 251 * __cleanup - reclaim used descriptors
 252 * @ioat: channel (ring) to clean
 253 *
 254 * The difference from the dma_v2.c __cleanup() is that this routine
 255 * handles extended descriptors and dma-unmapping raid operations.
 256 */
 257static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
 258{
 259        struct ioat_chan_common *chan = &ioat->base;
 260        struct ioat_ring_ent *desc;
 261        bool seen_current = false;
 262        u16 active;
 263        int i;
 264
 265        dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
 266                __func__, ioat->head, ioat->tail, ioat->issued);
 267
 268        active = ioat2_ring_active(ioat);
 269        for (i = 0; i < active && !seen_current; i++) {
 270                struct dma_async_tx_descriptor *tx;
 271
 272                prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
 273                desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
 274                dump_desc_dbg(ioat, desc);
 275                tx = &desc->txd;
 276                if (tx->cookie) {
 277                        chan->completed_cookie = tx->cookie;
 278                        ioat3_dma_unmap(ioat, desc, ioat->tail + i);
 279                        tx->cookie = 0;
 280                        if (tx->callback) {
 281                                tx->callback(tx->callback_param);
 282                                tx->callback = NULL;
 283                        }
 284                }
 285
 286                if (tx->phys == phys_complete)
 287                        seen_current = true;
 288
 289                /* skip extended descriptors */
 290                if (desc_has_ext(desc)) {
 291                        BUG_ON(i + 1 >= active);
 292                        i++;
 293                }
 294        }
 295        ioat->tail += i;
 296        BUG_ON(!seen_current); /* no active descs have written a completion? */
 297        chan->last_completion = phys_complete;
 298        if (ioat->head == ioat->tail) {
 299                dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
 300                        __func__);
 301                clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
 302                mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
 303        }
 304}
 305
 306static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
 307{
 308        struct ioat_chan_common *chan = &ioat->base;
 309        unsigned long phys_complete;
 310
 311        prefetch(chan->completion);
 312
 313        if (!spin_trylock_bh(&chan->cleanup_lock))
 314                return;
 315
 316        if (!ioat_cleanup_preamble(chan, &phys_complete)) {
 317                spin_unlock_bh(&chan->cleanup_lock);
 318                return;
 319        }
 320
 321        if (!spin_trylock_bh(&ioat->ring_lock)) {
 322                spin_unlock_bh(&chan->cleanup_lock);
 323                return;
 324        }
 325
 326        __cleanup(ioat, phys_complete);
 327
 328        spin_unlock_bh(&ioat->ring_lock);
 329        spin_unlock_bh(&chan->cleanup_lock);
 330}
 331
 332static void ioat3_cleanup_tasklet(unsigned long data)
 333{
 334        struct ioat2_dma_chan *ioat = (void *) data;
 335
 336        ioat3_cleanup(ioat);
 337        writew(IOAT_CHANCTRL_RUN | IOAT3_CHANCTRL_COMPL_DCA_EN,
 338               ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
 339}
 340
 341static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
 342{
 343        struct ioat_chan_common *chan = &ioat->base;
 344        unsigned long phys_complete;
 345        u32 status;
 346
 347        status = ioat_chansts(chan);
 348        if (is_ioat_active(status) || is_ioat_idle(status))
 349                ioat_suspend(chan);
 350        while (is_ioat_active(status) || is_ioat_idle(status)) {
 351                status = ioat_chansts(chan);
 352                cpu_relax();
 353        }
 354
 355        if (ioat_cleanup_preamble(chan, &phys_complete))
 356                __cleanup(ioat, phys_complete);
 357
 358        __ioat2_restart_chan(ioat);
 359}
 360
 361static void ioat3_timer_event(unsigned long data)
 362{
 363        struct ioat2_dma_chan *ioat = (void *) data;
 364        struct ioat_chan_common *chan = &ioat->base;
 365
 366        spin_lock_bh(&chan->cleanup_lock);
 367        if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
 368                unsigned long phys_complete;
 369                u64 status;
 370
 371                spin_lock_bh(&ioat->ring_lock);
 372                status = ioat_chansts(chan);
 373
 374                /* when halted due to errors check for channel
 375                 * programming errors before advancing the completion state
 376                 */
 377                if (is_ioat_halted(status)) {
 378                        u32 chanerr;
 379
 380                        chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
 381                        dev_err(to_dev(chan), "%s: Channel halted (%x)\n",
 382                                __func__, chanerr);
 383                        BUG_ON(is_ioat_bug(chanerr));
 384                }
 385
 386                /* if we haven't made progress and we have already
 387                 * acknowledged a pending completion once, then be more
 388                 * forceful with a restart
 389                 */
 390                if (ioat_cleanup_preamble(chan, &phys_complete))
 391                        __cleanup(ioat, phys_complete);
 392                else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
 393                        ioat3_restart_channel(ioat);
 394                else {
 395                        set_bit(IOAT_COMPLETION_ACK, &chan->state);
 396                        mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
 397                }
 398                spin_unlock_bh(&ioat->ring_lock);
 399        } else {
 400                u16 active;
 401
 402                /* if the ring is idle, empty, and oversized try to step
 403                 * down the size
 404                 */
 405                spin_lock_bh(&ioat->ring_lock);
 406                active = ioat2_ring_active(ioat);
 407                if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
 408                        reshape_ring(ioat, ioat->alloc_order-1);
 409                spin_unlock_bh(&ioat->ring_lock);
 410
 411                /* keep shrinking until we get back to our minimum
 412                 * default size
 413                 */
 414                if (ioat->alloc_order > ioat_get_alloc_order())
 415                        mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
 416        }
 417        spin_unlock_bh(&chan->cleanup_lock);
 418}
 419
 420static enum dma_status
 421ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie,
 422                  dma_cookie_t *done, dma_cookie_t *used)
 423{
 424        struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 425
 426        if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
 427                return DMA_SUCCESS;
 428
 429        ioat3_cleanup(ioat);
 430
 431        return ioat_is_complete(c, cookie, done, used);
 432}
 433
 434static struct dma_async_tx_descriptor *
 435ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
 436                       size_t len, unsigned long flags)
 437{
 438        struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 439        struct ioat_ring_ent *desc;
 440        size_t total_len = len;
 441        struct ioat_fill_descriptor *fill;
 442        int num_descs;
 443        u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
 444        u16 idx;
 445        int i;
 446
 447        num_descs = ioat2_xferlen_to_descs(ioat, len);
 448        if (likely(num_descs) &&
 449            ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
 450                /* pass */;
 451        else
 452                return NULL;
 453        i = 0;
 454        do {
 455                size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
 456
 457                desc = ioat2_get_ring_ent(ioat, idx + i);
 458                fill = desc->fill;
 459
 460                fill->size = xfer_size;
 461                fill->src_data = src_data;
 462                fill->dst_addr = dest;
 463                fill->ctl = 0;
 464                fill->ctl_f.op = IOAT_OP_FILL;
 465
 466                len -= xfer_size;
 467                dest += xfer_size;
 468                dump_desc_dbg(ioat, desc);
 469        } while (++i < num_descs);
 470
 471        desc->txd.flags = flags;
 472        desc->len = total_len;
 473        fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
 474        fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
 475        fill->ctl_f.compl_write = 1;
 476        dump_desc_dbg(ioat, desc);
 477
 478        /* we leave the channel locked to ensure in order submission */
 479        return &desc->txd;
 480}
 481
 482static struct dma_async_tx_descriptor *
 483__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
 484                      dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
 485                      size_t len, unsigned long flags)
 486{
 487        struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 488        struct ioat_ring_ent *compl_desc;
 489        struct ioat_ring_ent *desc;
 490        struct ioat_ring_ent *ext;
 491        size_t total_len = len;
 492        struct ioat_xor_descriptor *xor;
 493        struct ioat_xor_ext_descriptor *xor_ex = NULL;
 494        struct ioat_dma_descriptor *hw;
 495        u32 offset = 0;
 496        int num_descs;
 497        int with_ext;
 498        int i;
 499        u16 idx;
 500        u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
 501
 502        BUG_ON(src_cnt < 2);
 503
 504        num_descs = ioat2_xferlen_to_descs(ioat, len);
 505        /* we need 2x the number of descriptors to cover greater than 5
 506         * sources
 507         */
 508        if (src_cnt > 5) {
 509                with_ext = 1;
 510                num_descs *= 2;
 511        } else
 512                with_ext = 0;
 513
 514        /* completion writes from the raid engine may pass completion
 515         * writes from the legacy engine, so we need one extra null
 516         * (legacy) descriptor to ensure all completion writes arrive in
 517         * order.
 518         */
 519        if (likely(num_descs) &&
 520            ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
 521                /* pass */;
 522        else
 523                return NULL;
 524        i = 0;
 525        do {
 526                struct ioat_raw_descriptor *descs[2];
 527                size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
 528                int s;
 529
 530                desc = ioat2_get_ring_ent(ioat, idx + i);
 531                xor = desc->xor;
 532
 533                /* save a branch by unconditionally retrieving the
 534                 * extended descriptor xor_set_src() knows to not write
 535                 * to it in the single descriptor case
 536                 */
 537                ext = ioat2_get_ring_ent(ioat, idx + i + 1);
 538                xor_ex = ext->xor_ex;
 539
 540                descs[0] = (struct ioat_raw_descriptor *) xor;
 541                descs[1] = (struct ioat_raw_descriptor *) xor_ex;
 542                for (s = 0; s < src_cnt; s++)
 543                        xor_set_src(descs, src[s], offset, s);
 544                xor->size = xfer_size;
 545                xor->dst_addr = dest + offset;
 546                xor->ctl = 0;
 547                xor->ctl_f.op = op;
 548                xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
 549
 550                len -= xfer_size;
 551                offset += xfer_size;
 552                dump_desc_dbg(ioat, desc);
 553        } while ((i += 1 + with_ext) < num_descs);
 554
 555        /* last xor descriptor carries the unmap parameters and fence bit */
 556        desc->txd.flags = flags;
 557        desc->len = total_len;
 558        if (result)
 559                desc->result = result;
 560        xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
 561
 562        /* completion descriptor carries interrupt bit */
 563        compl_desc = ioat2_get_ring_ent(ioat, idx + i);
 564        compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
 565        hw = compl_desc->hw;
 566        hw->ctl = 0;
 567        hw->ctl_f.null = 1;
 568        hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
 569        hw->ctl_f.compl_write = 1;
 570        hw->size = NULL_DESC_BUFFER_SIZE;
 571        dump_desc_dbg(ioat, compl_desc);
 572
 573        /* we leave the channel locked to ensure in order submission */
 574        return &compl_desc->txd;
 575}
 576
 577static struct dma_async_tx_descriptor *
 578ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
 579               unsigned int src_cnt, size_t len, unsigned long flags)
 580{
 581        return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
 582}
 583
 584struct dma_async_tx_descriptor *
 585ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
 586                    unsigned int src_cnt, size_t len,
 587                    enum sum_check_flags *result, unsigned long flags)
 588{
 589        /* the cleanup routine only sets bits on validate failure, it
 590         * does not clear bits on validate success... so clear it here
 591         */
 592        *result = 0;
 593
 594        return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
 595                                     src_cnt - 1, len, flags);
 596}
 597
 598static void
 599dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
 600{
 601        struct device *dev = to_dev(&ioat->base);
 602        struct ioat_pq_descriptor *pq = desc->pq;
 603        struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
 604        struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
 605        int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
 606        int i;
 607
 608        dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
 609                " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n",
 610                desc_id(desc), (unsigned long long) desc->txd.phys,
 611                (unsigned long long) (pq_ex ? pq_ex->next : pq->next),
 612                desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
 613                pq->ctl_f.compl_write,
 614                pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
 615                pq->ctl_f.src_cnt);
 616        for (i = 0; i < src_cnt; i++)
 617                dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
 618                        (unsigned long long) pq_get_src(descs, i), pq->coef[i]);
 619        dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
 620        dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
 621}
 622
 623static struct dma_async_tx_descriptor *
 624__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
 625                     const dma_addr_t *dst, const dma_addr_t *src,
 626                     unsigned int src_cnt, const unsigned char *scf,
 627                     size_t len, unsigned long flags)
 628{
 629        struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 630        struct ioat_chan_common *chan = &ioat->base;
 631        struct ioat_ring_ent *compl_desc;
 632        struct ioat_ring_ent *desc;
 633        struct ioat_ring_ent *ext;
 634        size_t total_len = len;
 635        struct ioat_pq_descriptor *pq;
 636        struct ioat_pq_ext_descriptor *pq_ex = NULL;
 637        struct ioat_dma_descriptor *hw;
 638        u32 offset = 0;
 639        int num_descs;
 640        int with_ext;
 641        int i, s;
 642        u16 idx;
 643        u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
 644
 645        dev_dbg(to_dev(chan), "%s\n", __func__);
 646        /* the engine requires at least two sources (we provide
 647         * at least 1 implied source in the DMA_PREP_CONTINUE case)
 648         */
 649        BUG_ON(src_cnt + dmaf_continue(flags) < 2);
 650
 651        num_descs = ioat2_xferlen_to_descs(ioat, len);
 652        /* we need 2x the number of descriptors to cover greater than 3
 653         * sources
 654         */
 655        if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) {
 656                with_ext = 1;
 657                num_descs *= 2;
 658        } else
 659                with_ext = 0;
 660
 661        /* completion writes from the raid engine may pass completion
 662         * writes from the legacy engine, so we need one extra null
 663         * (legacy) descriptor to ensure all completion writes arrive in
 664         * order.
 665         */
 666        if (likely(num_descs) &&
 667            ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
 668                /* pass */;
 669        else
 670                return NULL;
 671        i = 0;
 672        do {
 673                struct ioat_raw_descriptor *descs[2];
 674                size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
 675
 676                desc = ioat2_get_ring_ent(ioat, idx + i);
 677                pq = desc->pq;
 678
 679                /* save a branch by unconditionally retrieving the
 680                 * extended descriptor pq_set_src() knows to not write
 681                 * to it in the single descriptor case
 682                 */
 683                ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
 684                pq_ex = ext->pq_ex;
 685
 686                descs[0] = (struct ioat_raw_descriptor *) pq;
 687                descs[1] = (struct ioat_raw_descriptor *) pq_ex;
 688
 689                for (s = 0; s < src_cnt; s++)
 690                        pq_set_src(descs, src[s], offset, scf[s], s);
 691
 692                /* see the comment for dma_maxpq in include/linux/dmaengine.h */
 693                if (dmaf_p_disabled_continue(flags))
 694                        pq_set_src(descs, dst[1], offset, 1, s++);
 695                else if (dmaf_continue(flags)) {
 696                        pq_set_src(descs, dst[0], offset, 0, s++);
 697                        pq_set_src(descs, dst[1], offset, 1, s++);
 698                        pq_set_src(descs, dst[1], offset, 0, s++);
 699                }
 700                pq->size = xfer_size;
 701                pq->p_addr = dst[0] + offset;
 702                pq->q_addr = dst[1] + offset;
 703                pq->ctl = 0;
 704                pq->ctl_f.op = op;
 705                pq->ctl_f.src_cnt = src_cnt_to_hw(s);
 706                pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
 707                pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
 708
 709                len -= xfer_size;
 710                offset += xfer_size;
 711        } while ((i += 1 + with_ext) < num_descs);
 712
 713        /* last pq descriptor carries the unmap parameters and fence bit */
 714        desc->txd.flags = flags;
 715        desc->len = total_len;
 716        if (result)
 717                desc->result = result;
 718        pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
 719        dump_pq_desc_dbg(ioat, desc, ext);
 720
 721        /* completion descriptor carries interrupt bit */
 722        compl_desc = ioat2_get_ring_ent(ioat, idx + i);
 723        compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
 724        hw = compl_desc->hw;
 725        hw->ctl = 0;
 726        hw->ctl_f.null = 1;
 727        hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
 728        hw->ctl_f.compl_write = 1;
 729        hw->size = NULL_DESC_BUFFER_SIZE;
 730        dump_desc_dbg(ioat, compl_desc);
 731
 732        /* we leave the channel locked to ensure in order submission */
 733        return &compl_desc->txd;
 734}
 735
 736static struct dma_async_tx_descriptor *
 737ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
 738              unsigned int src_cnt, const unsigned char *scf, size_t len,
 739              unsigned long flags)
 740{
 741        /* specify valid address for disabled result */
 742        if (flags & DMA_PREP_PQ_DISABLE_P)
 743                dst[0] = dst[1];
 744        if (flags & DMA_PREP_PQ_DISABLE_Q)
 745                dst[1] = dst[0];
 746
 747        /* handle the single source multiply case from the raid6
 748         * recovery path
 749         */
 750        if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) {
 751                dma_addr_t single_source[2];
 752                unsigned char single_source_coef[2];
 753
 754                BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
 755                single_source[0] = src[0];
 756                single_source[1] = src[0];
 757                single_source_coef[0] = scf[0];
 758                single_source_coef[1] = 0;
 759
 760                return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
 761                                            single_source_coef, len, flags);
 762        } else
 763                return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf,
 764                                            len, flags);
 765}
 766
 767struct dma_async_tx_descriptor *
 768ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
 769                  unsigned int src_cnt, const unsigned char *scf, size_t len,
 770                  enum sum_check_flags *pqres, unsigned long flags)
 771{
 772        /* specify valid address for disabled result */
 773        if (flags & DMA_PREP_PQ_DISABLE_P)
 774                pq[0] = pq[1];
 775        if (flags & DMA_PREP_PQ_DISABLE_Q)
 776                pq[1] = pq[0];
 777
 778        /* the cleanup routine only sets bits on validate failure, it
 779         * does not clear bits on validate success... so clear it here
 780         */
 781        *pqres = 0;
 782
 783        return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
 784                                    flags);
 785}
 786
 787static struct dma_async_tx_descriptor *
 788ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
 789                 unsigned int src_cnt, size_t len, unsigned long flags)
 790{
 791        unsigned char scf[src_cnt];
 792        dma_addr_t pq[2];
 793
 794        memset(scf, 0, src_cnt);
 795        pq[0] = dst;
 796        flags |= DMA_PREP_PQ_DISABLE_Q;
 797        pq[1] = dst; /* specify valid address for disabled result */
 798
 799        return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
 800                                    flags);
 801}
 802
 803struct dma_async_tx_descriptor *
 804ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
 805                     unsigned int src_cnt, size_t len,
 806                     enum sum_check_flags *result, unsigned long flags)
 807{
 808        unsigned char scf[src_cnt];
 809        dma_addr_t pq[2];
 810
 811        /* the cleanup routine only sets bits on validate failure, it
 812         * does not clear bits on validate success... so clear it here
 813         */
 814        *result = 0;
 815
 816        memset(scf, 0, src_cnt);
 817        pq[0] = src[0];
 818        flags |= DMA_PREP_PQ_DISABLE_Q;
 819        pq[1] = pq[0]; /* specify valid address for disabled result */
 820
 821        return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf,
 822                                    len, flags);
 823}
 824
 825static struct dma_async_tx_descriptor *
 826ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
 827{
 828        struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 829        struct ioat_ring_ent *desc;
 830        struct ioat_dma_descriptor *hw;
 831        u16 idx;
 832
 833        if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0)
 834                desc = ioat2_get_ring_ent(ioat, idx);
 835        else
 836                return NULL;
 837
 838        hw = desc->hw;
 839        hw->ctl = 0;
 840        hw->ctl_f.null = 1;
 841        hw->ctl_f.int_en = 1;
 842        hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
 843        hw->ctl_f.compl_write = 1;
 844        hw->size = NULL_DESC_BUFFER_SIZE;
 845        hw->src_addr = 0;
 846        hw->dst_addr = 0;
 847
 848        desc->txd.flags = flags;
 849        desc->len = 1;
 850
 851        dump_desc_dbg(ioat, desc);
 852
 853        /* we leave the channel locked to ensure in order submission */
 854        return &desc->txd;
 855}
 856
 857static void __devinit ioat3_dma_test_callback(void *dma_async_param)
 858{
 859        struct completion *cmp = dma_async_param;
 860
 861        complete(cmp);
 862}
 863
 864#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
 865static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
 866{
 867        int i, src_idx;
 868        struct page *dest;
 869        struct page *xor_srcs[IOAT_NUM_SRC_TEST];
 870        struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
 871        dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
 872        dma_addr_t dma_addr, dest_dma;
 873        struct dma_async_tx_descriptor *tx;
 874        struct dma_chan *dma_chan;
 875        dma_cookie_t cookie;
 876        u8 cmp_byte = 0;
 877        u32 cmp_word;
 878        u32 xor_val_result;
 879        int err = 0;
 880        struct completion cmp;
 881        unsigned long tmo;
 882        struct device *dev = &device->pdev->dev;
 883        struct dma_device *dma = &device->common;
 884
 885        dev_dbg(dev, "%s\n", __func__);
 886
 887        if (!dma_has_cap(DMA_XOR, dma->cap_mask))
 888                return 0;
 889
 890        for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
 891                xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
 892                if (!xor_srcs[src_idx]) {
 893                        while (src_idx--)
 894                                __free_page(xor_srcs[src_idx]);
 895                        return -ENOMEM;
 896                }
 897        }
 898
 899        dest = alloc_page(GFP_KERNEL);
 900        if (!dest) {
 901                while (src_idx--)
 902                        __free_page(xor_srcs[src_idx]);
 903                return -ENOMEM;
 904        }
 905
 906        /* Fill in src buffers */
 907        for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
 908                u8 *ptr = page_address(xor_srcs[src_idx]);
 909                for (i = 0; i < PAGE_SIZE; i++)
 910                        ptr[i] = (1 << src_idx);
 911        }
 912
 913        for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
 914                cmp_byte ^= (u8) (1 << src_idx);
 915
 916        cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
 917                        (cmp_byte << 8) | cmp_byte;
 918
 919        memset(page_address(dest), 0, PAGE_SIZE);
 920
 921        dma_chan = container_of(dma->channels.next, struct dma_chan,
 922                                device_node);
 923        if (dma->device_alloc_chan_resources(dma_chan) < 1) {
 924                err = -ENODEV;
 925                goto out;
 926        }
 927
 928        /* test xor */
 929        dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
 930        for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
 931                dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
 932                                           DMA_TO_DEVICE);
 933        tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
 934                                      IOAT_NUM_SRC_TEST, PAGE_SIZE,
 935                                      DMA_PREP_INTERRUPT);
 936
 937        if (!tx) {
 938                dev_err(dev, "Self-test xor prep failed\n");
 939                err = -ENODEV;
 940                goto free_resources;
 941        }
 942
 943        async_tx_ack(tx);
 944        init_completion(&cmp);
 945        tx->callback = ioat3_dma_test_callback;
 946        tx->callback_param = &cmp;
 947        cookie = tx->tx_submit(tx);
 948        if (cookie < 0) {
 949                dev_err(dev, "Self-test xor setup failed\n");
 950                err = -ENODEV;
 951                goto free_resources;
 952        }
 953        dma->device_issue_pending(dma_chan);
 954
 955        tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
 956
 957        if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
 958                dev_err(dev, "Self-test xor timed out\n");
 959                err = -ENODEV;
 960                goto free_resources;
 961        }
 962
 963        dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
 964        for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
 965                u32 *ptr = page_address(dest);
 966                if (ptr[i] != cmp_word) {
 967                        dev_err(dev, "Self-test xor failed compare\n");
 968                        err = -ENODEV;
 969                        goto free_resources;
 970                }
 971        }
 972        dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE);
 973
 974        /* skip validate if the capability is not present */
 975        if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
 976                goto free_resources;
 977
 978        /* validate the sources with the destintation page */
 979        for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
 980                xor_val_srcs[i] = xor_srcs[i];
 981        xor_val_srcs[i] = dest;
 982
 983        xor_val_result = 1;
 984
 985        for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
 986                dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
 987                                           DMA_TO_DEVICE);
 988        tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
 989                                          IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
 990                                          &xor_val_result, DMA_PREP_INTERRUPT);
 991        if (!tx) {
 992                dev_err(dev, "Self-test zero prep failed\n");
 993                err = -ENODEV;
 994                goto free_resources;
 995        }
 996
 997        async_tx_ack(tx);
 998        init_completion(&cmp);
 999        tx->callback = ioat3_dma_test_callback;
1000        tx->callback_param = &cmp;
1001        cookie = tx->tx_submit(tx);
1002        if (cookie < 0) {
1003                dev_err(dev, "Self-test zero setup failed\n");
1004                err = -ENODEV;
1005                goto free_resources;
1006        }
1007        dma->device_issue_pending(dma_chan);
1008
1009        tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1010
1011        if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
1012                dev_err(dev, "Self-test validate timed out\n");
1013                err = -ENODEV;
1014                goto free_resources;
1015        }
1016
1017        if (xor_val_result != 0) {
1018                dev_err(dev, "Self-test validate failed compare\n");
1019                err = -ENODEV;
1020                goto free_resources;
1021        }
1022
1023        /* skip memset if the capability is not present */
1024        if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask))
1025                goto free_resources;
1026
1027        /* test memset */
1028        dma_addr = dma_map_page(dev, dest, 0,
1029                        PAGE_SIZE, DMA_FROM_DEVICE);
1030        tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
1031                                         DMA_PREP_INTERRUPT);
1032        if (!tx) {
1033                dev_err(dev, "Self-test memset prep failed\n");
1034                err = -ENODEV;
1035                goto free_resources;
1036        }
1037
1038        async_tx_ack(tx);
1039        init_completion(&cmp);
1040        tx->callback = ioat3_dma_test_callback;
1041        tx->callback_param = &cmp;
1042        cookie = tx->tx_submit(tx);
1043        if (cookie < 0) {
1044                dev_err(dev, "Self-test memset setup failed\n");
1045                err = -ENODEV;
1046                goto free_resources;
1047        }
1048        dma->device_issue_pending(dma_chan);
1049
1050        tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1051
1052        if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
1053                dev_err(dev, "Self-test memset timed out\n");
1054                err = -ENODEV;
1055                goto free_resources;
1056        }
1057
1058        for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
1059                u32 *ptr = page_address(dest);
1060                if (ptr[i]) {
1061                        dev_err(dev, "Self-test memset failed compare\n");
1062                        err = -ENODEV;
1063                        goto free_resources;
1064                }
1065        }
1066
1067        /* test for non-zero parity sum */
1068        xor_val_result = 0;
1069        for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1070                dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
1071                                           DMA_TO_DEVICE);
1072        tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
1073                                          IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
1074                                          &xor_val_result, DMA_PREP_INTERRUPT);
1075        if (!tx) {
1076                dev_err(dev, "Self-test 2nd zero prep failed\n");
1077                err = -ENODEV;
1078                goto free_resources;
1079        }
1080
1081        async_tx_ack(tx);
1082        init_completion(&cmp);
1083        tx->callback = ioat3_dma_test_callback;
1084        tx->callback_param = &cmp;
1085        cookie = tx->tx_submit(tx);
1086        if (cookie < 0) {
1087                dev_err(dev, "Self-test  2nd zero setup failed\n");
1088                err = -ENODEV;
1089                goto free_resources;
1090        }
1091        dma->device_issue_pending(dma_chan);
1092
1093        tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1094
1095        if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
1096                dev_err(dev, "Self-test 2nd validate timed out\n");
1097                err = -ENODEV;
1098                goto free_resources;
1099        }
1100
1101        if (xor_val_result != SUM_CHECK_P_RESULT) {
1102                dev_err(dev, "Self-test validate failed compare\n");
1103                err = -ENODEV;
1104                goto free_resources;
1105        }
1106
1107free_resources:
1108        dma->device_free_chan_resources(dma_chan);
1109out:
1110        src_idx = IOAT_NUM_SRC_TEST;
1111        while (src_idx--)
1112                __free_page(xor_srcs[src_idx]);
1113        __free_page(dest);
1114        return err;
1115}
1116
1117static int __devinit ioat3_dma_self_test(struct ioatdma_device *device)
1118{
1119        int rc = ioat_dma_self_test(device);
1120
1121        if (rc)
1122                return rc;
1123
1124        rc = ioat_xor_val_self_test(device);
1125        if (rc)
1126                return rc;
1127
1128        return 0;
1129}
1130
1131int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
1132{
1133        struct pci_dev *pdev = device->pdev;
1134        int dca_en = system_has_dca_enabled(pdev);
1135        struct dma_device *dma;
1136        struct dma_chan *c;
1137        struct ioat_chan_common *chan;
1138        bool is_raid_device = false;
1139        int err;
1140        u16 dev_id;
1141        u32 cap;
1142
1143        device->enumerate_channels = ioat2_enumerate_channels;
1144        device->self_test = ioat3_dma_self_test;
1145        dma = &device->common;
1146        dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
1147        dma->device_issue_pending = ioat2_issue_pending;
1148        dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
1149        dma->device_free_chan_resources = ioat2_free_chan_resources;
1150
1151        dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
1152        dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
1153
1154        cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
1155
1156        /* dca is incompatible with raid operations */
1157        if (dca_en && (cap & (IOAT_CAP_XOR|IOAT_CAP_PQ)))
1158                cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ);
1159
1160        if (cap & IOAT_CAP_XOR) {
1161                is_raid_device = true;
1162                dma->max_xor = 8;
1163                dma->xor_align = 2;
1164
1165                dma_cap_set(DMA_XOR, dma->cap_mask);
1166                dma->device_prep_dma_xor = ioat3_prep_xor;
1167
1168                dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1169                dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
1170        }
1171        if (cap & IOAT_CAP_PQ) {
1172                is_raid_device = true;
1173                dma_set_maxpq(dma, 8, 0);
1174                dma->pq_align = 2;
1175
1176                dma_cap_set(DMA_PQ, dma->cap_mask);
1177                dma->device_prep_dma_pq = ioat3_prep_pq;
1178
1179                dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
1180                dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
1181
1182                if (!(cap & IOAT_CAP_XOR)) {
1183                        dma->max_xor = 8;
1184                        dma->xor_align = 2;
1185
1186                        dma_cap_set(DMA_XOR, dma->cap_mask);
1187                        dma->device_prep_dma_xor = ioat3_prep_pqxor;
1188
1189                        dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1190                        dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
1191                }
1192        }
1193        if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) {
1194                dma_cap_set(DMA_MEMSET, dma->cap_mask);
1195                dma->device_prep_dma_memset = ioat3_prep_memset_lock;
1196        }
1197
1198
1199        if (is_raid_device) {
1200                dma->device_is_tx_complete = ioat3_is_complete;
1201                device->cleanup_tasklet = ioat3_cleanup_tasklet;
1202                device->timer_fn = ioat3_timer_event;
1203        } else {
1204                dma->device_is_tx_complete = ioat2_is_complete;
1205                device->cleanup_tasklet = ioat2_cleanup_tasklet;
1206                device->timer_fn = ioat2_timer_event;
1207        }
1208
1209        #ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA
1210        dma_cap_clear(DMA_PQ_VAL, dma->cap_mask);
1211        dma->device_prep_dma_pq_val = NULL;
1212        #endif
1213
1214        #ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA
1215        dma_cap_clear(DMA_XOR_VAL, dma->cap_mask);
1216        dma->device_prep_dma_xor_val = NULL;
1217        #endif
1218
1219        /* -= IOAT ver.3 workarounds =- */
1220        /* Write CHANERRMSK_INT with 3E07h to mask out the errors
1221         * that can cause stability issues for IOAT ver.3
1222         */
1223        pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
1224
1225        /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
1226         * (workaround for spurious config parity error after restart)
1227         */
1228        pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
1229        if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
1230                pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
1231
1232        err = ioat_probe(device);
1233        if (err)
1234                return err;
1235        ioat_set_tcp_copy_break(262144);
1236
1237        list_for_each_entry(c, &dma->channels, device_node) {
1238                chan = to_chan_common(c);
1239                writel(IOAT_DMA_DCA_ANY_CPU,
1240                       chan->reg_base + IOAT_DCACTRL_OFFSET);
1241        }
1242
1243        err = ioat_register(device);
1244        if (err)
1245                return err;
1246
1247        ioat_kobject_add(device, &ioat2_ktype);
1248
1249        if (dca)
1250                device->dca = ioat3_dca_init(pdev, device->reg_base);
1251
1252        return 0;
1253}
1254