linux/drivers/dma/ioat/dma_v3.c
<<
>>
Prefs
   1/*
   2 * This file is provided under a dual BSD/GPLv2 license.  When using or
   3 * redistributing this file, you may do so under either license.
   4 *
   5 * GPL LICENSE SUMMARY
   6 *
   7 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
   8 *
   9 * This program is free software; you can redistribute it and/or modify it
  10 * under the terms and conditions of the GNU General Public License,
  11 * version 2, as published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but WITHOUT
  14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  16 * more details.
  17 *
  18 * You should have received a copy of the GNU General Public License along with
  19 * this program; if not, write to the Free Software Foundation, Inc.,
  20 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  21 *
  22 * The full GNU General Public License is included in this distribution in
  23 * the file called "COPYING".
  24 *
  25 * BSD LICENSE
  26 *
  27 * Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
  28 *
  29 * Redistribution and use in source and binary forms, with or without
  30 * modification, are permitted provided that the following conditions are met:
  31 *
  32 *   * Redistributions of source code must retain the above copyright
  33 *     notice, this list of conditions and the following disclaimer.
  34 *   * Redistributions in binary form must reproduce the above copyright
  35 *     notice, this list of conditions and the following disclaimer in
  36 *     the documentation and/or other materials provided with the
  37 *     distribution.
  38 *   * Neither the name of Intel Corporation nor the names of its
  39 *     contributors may be used to endorse or promote products derived
  40 *     from this software without specific prior written permission.
  41 *
  42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  43 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  45 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  46 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  47 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  48 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  49 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  50 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  51 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  52 * POSSIBILITY OF SUCH DAMAGE.
  53 */
  54
  55/*
  56 * Support routines for v3+ hardware
  57 */
  58
  59#include <linux/pci.h>
  60#include <linux/gfp.h>
  61#include <linux/dmaengine.h>
  62#include <linux/dma-mapping.h>
  63#include <linux/prefetch.h>
  64#include "../dmaengine.h"
  65#include "registers.h"
  66#include "hw.h"
  67#include "dma.h"
  68#include "dma_v2.h"
  69
  70/* ioat hardware assumes at least two sources for raid operations */
  71#define src_cnt_to_sw(x) ((x) + 2)
  72#define src_cnt_to_hw(x) ((x) - 2)
  73
  74/* provide a lookup table for setting the source address in the base or
  75 * extended descriptor of an xor or pq descriptor
  76 */
  77static const u8 xor_idx_to_desc = 0xe0;
  78static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
  79static const u8 pq_idx_to_desc = 0xf8;
  80static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
  81
  82static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
  83{
  84        struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
  85
  86        return raw->field[xor_idx_to_field[idx]];
  87}
  88
  89static void xor_set_src(struct ioat_raw_descriptor *descs[2],
  90                        dma_addr_t addr, u32 offset, int idx)
  91{
  92        struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
  93
  94        raw->field[xor_idx_to_field[idx]] = addr + offset;
  95}
  96
  97static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
  98{
  99        struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
 100
 101        return raw->field[pq_idx_to_field[idx]];
 102}
 103
 104static void pq_set_src(struct ioat_raw_descriptor *descs[2],
 105                       dma_addr_t addr, u32 offset, u8 coef, int idx)
 106{
 107        struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
 108        struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
 109
 110        raw->field[pq_idx_to_field[idx]] = addr + offset;
 111        pq->coef[idx] = coef;
 112}
 113
 114static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
 115                            struct ioat_ring_ent *desc, int idx)
 116{
 117        struct ioat_chan_common *chan = &ioat->base;
 118        struct pci_dev *pdev = chan->device->pdev;
 119        size_t len = desc->len;
 120        size_t offset = len - desc->hw->size;
 121        struct dma_async_tx_descriptor *tx = &desc->txd;
 122        enum dma_ctrl_flags flags = tx->flags;
 123
 124        switch (desc->hw->ctl_f.op) {
 125        case IOAT_OP_COPY:
 126                if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
 127                        ioat_dma_unmap(chan, flags, len, desc->hw);
 128                break;
 129        case IOAT_OP_FILL: {
 130                struct ioat_fill_descriptor *hw = desc->fill;
 131
 132                if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
 133                        ioat_unmap(pdev, hw->dst_addr - offset, len,
 134                                   PCI_DMA_FROMDEVICE, flags, 1);
 135                break;
 136        }
 137        case IOAT_OP_XOR_VAL:
 138        case IOAT_OP_XOR: {
 139                struct ioat_xor_descriptor *xor = desc->xor;
 140                struct ioat_ring_ent *ext;
 141                struct ioat_xor_ext_descriptor *xor_ex = NULL;
 142                int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
 143                struct ioat_raw_descriptor *descs[2];
 144                int i;
 145
 146                if (src_cnt > 5) {
 147                        ext = ioat2_get_ring_ent(ioat, idx + 1);
 148                        xor_ex = ext->xor_ex;
 149                }
 150
 151                if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
 152                        descs[0] = (struct ioat_raw_descriptor *) xor;
 153                        descs[1] = (struct ioat_raw_descriptor *) xor_ex;
 154                        for (i = 0; i < src_cnt; i++) {
 155                                dma_addr_t src = xor_get_src(descs, i);
 156
 157                                ioat_unmap(pdev, src - offset, len,
 158                                           PCI_DMA_TODEVICE, flags, 0);
 159                        }
 160
 161                        /* dest is a source in xor validate operations */
 162                        if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
 163                                ioat_unmap(pdev, xor->dst_addr - offset, len,
 164                                           PCI_DMA_TODEVICE, flags, 1);
 165                                break;
 166                        }
 167                }
 168
 169                if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
 170                        ioat_unmap(pdev, xor->dst_addr - offset, len,
 171                                   PCI_DMA_FROMDEVICE, flags, 1);
 172                break;
 173        }
 174        case IOAT_OP_PQ_VAL:
 175        case IOAT_OP_PQ: {
 176                struct ioat_pq_descriptor *pq = desc->pq;
 177                struct ioat_ring_ent *ext;
 178                struct ioat_pq_ext_descriptor *pq_ex = NULL;
 179                int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
 180                struct ioat_raw_descriptor *descs[2];
 181                int i;
 182
 183                if (src_cnt > 3) {
 184                        ext = ioat2_get_ring_ent(ioat, idx + 1);
 185                        pq_ex = ext->pq_ex;
 186                }
 187
 188                /* in the 'continue' case don't unmap the dests as sources */
 189                if (dmaf_p_disabled_continue(flags))
 190                        src_cnt--;
 191                else if (dmaf_continue(flags))
 192                        src_cnt -= 3;
 193
 194                if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
 195                        descs[0] = (struct ioat_raw_descriptor *) pq;
 196                        descs[1] = (struct ioat_raw_descriptor *) pq_ex;
 197                        for (i = 0; i < src_cnt; i++) {
 198                                dma_addr_t src = pq_get_src(descs, i);
 199
 200                                ioat_unmap(pdev, src - offset, len,
 201                                           PCI_DMA_TODEVICE, flags, 0);
 202                        }
 203
 204                        /* the dests are sources in pq validate operations */
 205                        if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
 206                                if (!(flags & DMA_PREP_PQ_DISABLE_P))
 207                                        ioat_unmap(pdev, pq->p_addr - offset,
 208                                                   len, PCI_DMA_TODEVICE, flags, 0);
 209                                if (!(flags & DMA_PREP_PQ_DISABLE_Q))
 210                                        ioat_unmap(pdev, pq->q_addr - offset,
 211                                                   len, PCI_DMA_TODEVICE, flags, 0);
 212                                break;
 213                        }
 214                }
 215
 216                if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
 217                        if (!(flags & DMA_PREP_PQ_DISABLE_P))
 218                                ioat_unmap(pdev, pq->p_addr - offset, len,
 219                                           PCI_DMA_BIDIRECTIONAL, flags, 1);
 220                        if (!(flags & DMA_PREP_PQ_DISABLE_Q))
 221                                ioat_unmap(pdev, pq->q_addr - offset, len,
 222                                           PCI_DMA_BIDIRECTIONAL, flags, 1);
 223                }
 224                break;
 225        }
 226        default:
 227                dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
 228                        __func__, desc->hw->ctl_f.op);
 229        }
 230}
 231
 232static bool desc_has_ext(struct ioat_ring_ent *desc)
 233{
 234        struct ioat_dma_descriptor *hw = desc->hw;
 235
 236        if (hw->ctl_f.op == IOAT_OP_XOR ||
 237            hw->ctl_f.op == IOAT_OP_XOR_VAL) {
 238                struct ioat_xor_descriptor *xor = desc->xor;
 239
 240                if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
 241                        return true;
 242        } else if (hw->ctl_f.op == IOAT_OP_PQ ||
 243                   hw->ctl_f.op == IOAT_OP_PQ_VAL) {
 244                struct ioat_pq_descriptor *pq = desc->pq;
 245
 246                if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
 247                        return true;
 248        }
 249
 250        return false;
 251}
 252
 253/**
 254 * __cleanup - reclaim used descriptors
 255 * @ioat: channel (ring) to clean
 256 *
 257 * The difference from the dma_v2.c __cleanup() is that this routine
 258 * handles extended descriptors and dma-unmapping raid operations.
 259 */
 260static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
 261{
 262        struct ioat_chan_common *chan = &ioat->base;
 263        struct ioat_ring_ent *desc;
 264        bool seen_current = false;
 265        int idx = ioat->tail, i;
 266        u16 active;
 267
 268        dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
 269                __func__, ioat->head, ioat->tail, ioat->issued);
 270
 271        active = ioat2_ring_active(ioat);
 272        for (i = 0; i < active && !seen_current; i++) {
 273                struct dma_async_tx_descriptor *tx;
 274
 275                smp_read_barrier_depends();
 276                prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
 277                desc = ioat2_get_ring_ent(ioat, idx + i);
 278                dump_desc_dbg(ioat, desc);
 279                tx = &desc->txd;
 280                if (tx->cookie) {
 281                        dma_cookie_complete(tx);
 282                        ioat3_dma_unmap(ioat, desc, idx + i);
 283                        if (tx->callback) {
 284                                tx->callback(tx->callback_param);
 285                                tx->callback = NULL;
 286                        }
 287                }
 288
 289                if (tx->phys == phys_complete)
 290                        seen_current = true;
 291
 292                /* skip extended descriptors */
 293                if (desc_has_ext(desc)) {
 294                        BUG_ON(i + 1 >= active);
 295                        i++;
 296                }
 297        }
 298        smp_mb(); /* finish all descriptor reads before incrementing tail */
 299        ioat->tail = idx + i;
 300        BUG_ON(active && !seen_current); /* no active descs have written a completion? */
 301        chan->last_completion = phys_complete;
 302
 303        if (active - i == 0) {
 304                dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
 305                        __func__);
 306                clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
 307                mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
 308        }
 309        /* 5 microsecond delay per pending descriptor */
 310        writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK),
 311               chan->device->reg_base + IOAT_INTRDELAY_OFFSET);
 312}
 313
 314static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
 315{
 316        struct ioat_chan_common *chan = &ioat->base;
 317        dma_addr_t phys_complete;
 318
 319        spin_lock_bh(&chan->cleanup_lock);
 320        if (ioat_cleanup_preamble(chan, &phys_complete))
 321                __cleanup(ioat, phys_complete);
 322        spin_unlock_bh(&chan->cleanup_lock);
 323}
 324
 325static void ioat3_cleanup_event(unsigned long data)
 326{
 327        struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
 328
 329        ioat3_cleanup(ioat);
 330        writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
 331}
 332
 333static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
 334{
 335        struct ioat_chan_common *chan = &ioat->base;
 336        dma_addr_t phys_complete;
 337
 338        ioat2_quiesce(chan, 0);
 339        if (ioat_cleanup_preamble(chan, &phys_complete))
 340                __cleanup(ioat, phys_complete);
 341
 342        __ioat2_restart_chan(ioat);
 343}
 344
 345static void check_active(struct ioat2_dma_chan *ioat)
 346{
 347        struct ioat_chan_common *chan = &ioat->base;
 348
 349        if (ioat2_ring_active(ioat)) {
 350                mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
 351                return;
 352        }
 353
 354        if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &chan->state))
 355                mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
 356        else if (ioat->alloc_order > ioat_get_alloc_order()) {
 357                /* if the ring is idle, empty, and oversized try to step
 358                 * down the size
 359                 */
 360                reshape_ring(ioat, ioat->alloc_order - 1);
 361
 362                /* keep shrinking until we get back to our minimum
 363                 * default size
 364                 */
 365                if (ioat->alloc_order > ioat_get_alloc_order())
 366                        mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
 367        }
 368
 369}
 370
 371static void ioat3_timer_event(unsigned long data)
 372{
 373        struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
 374        struct ioat_chan_common *chan = &ioat->base;
 375        dma_addr_t phys_complete;
 376        u64 status;
 377
 378        status = ioat_chansts(chan);
 379
 380        /* when halted due to errors check for channel
 381         * programming errors before advancing the completion state
 382         */
 383        if (is_ioat_halted(status)) {
 384                u32 chanerr;
 385
 386                chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
 387                dev_err(to_dev(chan), "%s: Channel halted (%x)\n",
 388                        __func__, chanerr);
 389                if (test_bit(IOAT_RUN, &chan->state))
 390                        BUG_ON(is_ioat_bug(chanerr));
 391                else /* we never got off the ground */
 392                        return;
 393        }
 394
 395        /* if we haven't made progress and we have already
 396         * acknowledged a pending completion once, then be more
 397         * forceful with a restart
 398         */
 399        spin_lock_bh(&chan->cleanup_lock);
 400        if (ioat_cleanup_preamble(chan, &phys_complete))
 401                __cleanup(ioat, phys_complete);
 402        else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) {
 403                spin_lock_bh(&ioat->prep_lock);
 404                ioat3_restart_channel(ioat);
 405                spin_unlock_bh(&ioat->prep_lock);
 406                spin_unlock_bh(&chan->cleanup_lock);
 407                return;
 408        } else {
 409                set_bit(IOAT_COMPLETION_ACK, &chan->state);
 410                mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
 411        }
 412
 413
 414        if (ioat2_ring_active(ioat))
 415                mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
 416        else {
 417                spin_lock_bh(&ioat->prep_lock);
 418                check_active(ioat);
 419                spin_unlock_bh(&ioat->prep_lock);
 420        }
 421        spin_unlock_bh(&chan->cleanup_lock);
 422}
 423
 424static enum dma_status
 425ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie,
 426                struct dma_tx_state *txstate)
 427{
 428        struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 429        enum dma_status ret;
 430
 431        ret = dma_cookie_status(c, cookie, txstate);
 432        if (ret == DMA_SUCCESS)
 433                return ret;
 434
 435        ioat3_cleanup(ioat);
 436
 437        return dma_cookie_status(c, cookie, txstate);
 438}
 439
 440static struct dma_async_tx_descriptor *
 441ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
 442                       size_t len, unsigned long flags)
 443{
 444        struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 445        struct ioat_ring_ent *desc;
 446        size_t total_len = len;
 447        struct ioat_fill_descriptor *fill;
 448        u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
 449        int num_descs, idx, i;
 450
 451        num_descs = ioat2_xferlen_to_descs(ioat, len);
 452        if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0)
 453                idx = ioat->head;
 454        else
 455                return NULL;
 456        i = 0;
 457        do {
 458                size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
 459
 460                desc = ioat2_get_ring_ent(ioat, idx + i);
 461                fill = desc->fill;
 462
 463                fill->size = xfer_size;
 464                fill->src_data = src_data;
 465                fill->dst_addr = dest;
 466                fill->ctl = 0;
 467                fill->ctl_f.op = IOAT_OP_FILL;
 468
 469                len -= xfer_size;
 470                dest += xfer_size;
 471                dump_desc_dbg(ioat, desc);
 472        } while (++i < num_descs);
 473
 474        desc->txd.flags = flags;
 475        desc->len = total_len;
 476        fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
 477        fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
 478        fill->ctl_f.compl_write = 1;
 479        dump_desc_dbg(ioat, desc);
 480
 481        /* we leave the channel locked to ensure in order submission */
 482        return &desc->txd;
 483}
 484
 485static struct dma_async_tx_descriptor *
 486__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
 487                      dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
 488                      size_t len, unsigned long flags)
 489{
 490        struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 491        struct ioat_ring_ent *compl_desc;
 492        struct ioat_ring_ent *desc;
 493        struct ioat_ring_ent *ext;
 494        size_t total_len = len;
 495        struct ioat_xor_descriptor *xor;
 496        struct ioat_xor_ext_descriptor *xor_ex = NULL;
 497        struct ioat_dma_descriptor *hw;
 498        int num_descs, with_ext, idx, i;
 499        u32 offset = 0;
 500        u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
 501
 502        BUG_ON(src_cnt < 2);
 503
 504        num_descs = ioat2_xferlen_to_descs(ioat, len);
 505        /* we need 2x the number of descriptors to cover greater than 5
 506         * sources
 507         */
 508        if (src_cnt > 5) {
 509                with_ext = 1;
 510                num_descs *= 2;
 511        } else
 512                with_ext = 0;
 513
 514        /* completion writes from the raid engine may pass completion
 515         * writes from the legacy engine, so we need one extra null
 516         * (legacy) descriptor to ensure all completion writes arrive in
 517         * order.
 518         */
 519        if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0)
 520                idx = ioat->head;
 521        else
 522                return NULL;
 523        i = 0;
 524        do {
 525                struct ioat_raw_descriptor *descs[2];
 526                size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
 527                int s;
 528
 529                desc = ioat2_get_ring_ent(ioat, idx + i);
 530                xor = desc->xor;
 531
 532                /* save a branch by unconditionally retrieving the
 533                 * extended descriptor xor_set_src() knows to not write
 534                 * to it in the single descriptor case
 535                 */
 536                ext = ioat2_get_ring_ent(ioat, idx + i + 1);
 537                xor_ex = ext->xor_ex;
 538
 539                descs[0] = (struct ioat_raw_descriptor *) xor;
 540                descs[1] = (struct ioat_raw_descriptor *) xor_ex;
 541                for (s = 0; s < src_cnt; s++)
 542                        xor_set_src(descs, src[s], offset, s);
 543                xor->size = xfer_size;
 544                xor->dst_addr = dest + offset;
 545                xor->ctl = 0;
 546                xor->ctl_f.op = op;
 547                xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
 548
 549                len -= xfer_size;
 550                offset += xfer_size;
 551                dump_desc_dbg(ioat, desc);
 552        } while ((i += 1 + with_ext) < num_descs);
 553
 554        /* last xor descriptor carries the unmap parameters and fence bit */
 555        desc->txd.flags = flags;
 556        desc->len = total_len;
 557        if (result)
 558                desc->result = result;
 559        xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
 560
 561        /* completion descriptor carries interrupt bit */
 562        compl_desc = ioat2_get_ring_ent(ioat, idx + i);
 563        compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
 564        hw = compl_desc->hw;
 565        hw->ctl = 0;
 566        hw->ctl_f.null = 1;
 567        hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
 568        hw->ctl_f.compl_write = 1;
 569        hw->size = NULL_DESC_BUFFER_SIZE;
 570        dump_desc_dbg(ioat, compl_desc);
 571
 572        /* we leave the channel locked to ensure in order submission */
 573        return &compl_desc->txd;
 574}
 575
 576static struct dma_async_tx_descriptor *
 577ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
 578               unsigned int src_cnt, size_t len, unsigned long flags)
 579{
 580        return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
 581}
 582
 583struct dma_async_tx_descriptor *
 584ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
 585                    unsigned int src_cnt, size_t len,
 586                    enum sum_check_flags *result, unsigned long flags)
 587{
 588        /* the cleanup routine only sets bits on validate failure, it
 589         * does not clear bits on validate success... so clear it here
 590         */
 591        *result = 0;
 592
 593        return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
 594                                     src_cnt - 1, len, flags);
 595}
 596
 597static void
 598dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
 599{
 600        struct device *dev = to_dev(&ioat->base);
 601        struct ioat_pq_descriptor *pq = desc->pq;
 602        struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
 603        struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
 604        int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
 605        int i;
 606
 607        dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
 608                " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n",
 609                desc_id(desc), (unsigned long long) desc->txd.phys,
 610                (unsigned long long) (pq_ex ? pq_ex->next : pq->next),
 611                desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
 612                pq->ctl_f.compl_write,
 613                pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
 614                pq->ctl_f.src_cnt);
 615        for (i = 0; i < src_cnt; i++)
 616                dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
 617                        (unsigned long long) pq_get_src(descs, i), pq->coef[i]);
 618        dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
 619        dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
 620}
 621
 622static struct dma_async_tx_descriptor *
 623__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
 624                     const dma_addr_t *dst, const dma_addr_t *src,
 625                     unsigned int src_cnt, const unsigned char *scf,
 626                     size_t len, unsigned long flags)
 627{
 628        struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 629        struct ioat_chan_common *chan = &ioat->base;
 630        struct ioat_ring_ent *compl_desc;
 631        struct ioat_ring_ent *desc;
 632        struct ioat_ring_ent *ext;
 633        size_t total_len = len;
 634        struct ioat_pq_descriptor *pq;
 635        struct ioat_pq_ext_descriptor *pq_ex = NULL;
 636        struct ioat_dma_descriptor *hw;
 637        u32 offset = 0;
 638        u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
 639        int i, s, idx, with_ext, num_descs;
 640
 641        dev_dbg(to_dev(chan), "%s\n", __func__);
 642        /* the engine requires at least two sources (we provide
 643         * at least 1 implied source in the DMA_PREP_CONTINUE case)
 644         */
 645        BUG_ON(src_cnt + dmaf_continue(flags) < 2);
 646
 647        num_descs = ioat2_xferlen_to_descs(ioat, len);
 648        /* we need 2x the number of descriptors to cover greater than 3
 649         * sources (we need 1 extra source in the q-only continuation
 650         * case and 3 extra sources in the p+q continuation case.
 651         */
 652        if (src_cnt + dmaf_p_disabled_continue(flags) > 3 ||
 653            (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) {
 654                with_ext = 1;
 655                num_descs *= 2;
 656        } else
 657                with_ext = 0;
 658
 659        /* completion writes from the raid engine may pass completion
 660         * writes from the legacy engine, so we need one extra null
 661         * (legacy) descriptor to ensure all completion writes arrive in
 662         * order.
 663         */
 664        if (likely(num_descs) &&
 665            ioat2_check_space_lock(ioat, num_descs+1) == 0)
 666                idx = ioat->head;
 667        else
 668                return NULL;
 669        i = 0;
 670        do {
 671                struct ioat_raw_descriptor *descs[2];
 672                size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
 673
 674                desc = ioat2_get_ring_ent(ioat, idx + i);
 675                pq = desc->pq;
 676
 677                /* save a branch by unconditionally retrieving the
 678                 * extended descriptor pq_set_src() knows to not write
 679                 * to it in the single descriptor case
 680                 */
 681                ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
 682                pq_ex = ext->pq_ex;
 683
 684                descs[0] = (struct ioat_raw_descriptor *) pq;
 685                descs[1] = (struct ioat_raw_descriptor *) pq_ex;
 686
 687                for (s = 0; s < src_cnt; s++)
 688                        pq_set_src(descs, src[s], offset, scf[s], s);
 689
 690                /* see the comment for dma_maxpq in include/linux/dmaengine.h */
 691                if (dmaf_p_disabled_continue(flags))
 692                        pq_set_src(descs, dst[1], offset, 1, s++);
 693                else if (dmaf_continue(flags)) {
 694                        pq_set_src(descs, dst[0], offset, 0, s++);
 695                        pq_set_src(descs, dst[1], offset, 1, s++);
 696                        pq_set_src(descs, dst[1], offset, 0, s++);
 697                }
 698                pq->size = xfer_size;
 699                pq->p_addr = dst[0] + offset;
 700                pq->q_addr = dst[1] + offset;
 701                pq->ctl = 0;
 702                pq->ctl_f.op = op;
 703                pq->ctl_f.src_cnt = src_cnt_to_hw(s);
 704                pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
 705                pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
 706
 707                len -= xfer_size;
 708                offset += xfer_size;
 709        } while ((i += 1 + with_ext) < num_descs);
 710
 711        /* last pq descriptor carries the unmap parameters and fence bit */
 712        desc->txd.flags = flags;
 713        desc->len = total_len;
 714        if (result)
 715                desc->result = result;
 716        pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
 717        dump_pq_desc_dbg(ioat, desc, ext);
 718
 719        /* completion descriptor carries interrupt bit */
 720        compl_desc = ioat2_get_ring_ent(ioat, idx + i);
 721        compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
 722        hw = compl_desc->hw;
 723        hw->ctl = 0;
 724        hw->ctl_f.null = 1;
 725        hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
 726        hw->ctl_f.compl_write = 1;
 727        hw->size = NULL_DESC_BUFFER_SIZE;
 728        dump_desc_dbg(ioat, compl_desc);
 729
 730        /* we leave the channel locked to ensure in order submission */
 731        return &compl_desc->txd;
 732}
 733
 734static struct dma_async_tx_descriptor *
 735ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
 736              unsigned int src_cnt, const unsigned char *scf, size_t len,
 737              unsigned long flags)
 738{
 739        /* specify valid address for disabled result */
 740        if (flags & DMA_PREP_PQ_DISABLE_P)
 741                dst[0] = dst[1];
 742        if (flags & DMA_PREP_PQ_DISABLE_Q)
 743                dst[1] = dst[0];
 744
 745        /* handle the single source multiply case from the raid6
 746         * recovery path
 747         */
 748        if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) {
 749                dma_addr_t single_source[2];
 750                unsigned char single_source_coef[2];
 751
 752                BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
 753                single_source[0] = src[0];
 754                single_source[1] = src[0];
 755                single_source_coef[0] = scf[0];
 756                single_source_coef[1] = 0;
 757
 758                return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
 759                                            single_source_coef, len, flags);
 760        } else
 761                return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf,
 762                                            len, flags);
 763}
 764
 765struct dma_async_tx_descriptor *
 766ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
 767                  unsigned int src_cnt, const unsigned char *scf, size_t len,
 768                  enum sum_check_flags *pqres, unsigned long flags)
 769{
 770        /* specify valid address for disabled result */
 771        if (flags & DMA_PREP_PQ_DISABLE_P)
 772                pq[0] = pq[1];
 773        if (flags & DMA_PREP_PQ_DISABLE_Q)
 774                pq[1] = pq[0];
 775
 776        /* the cleanup routine only sets bits on validate failure, it
 777         * does not clear bits on validate success... so clear it here
 778         */
 779        *pqres = 0;
 780
 781        return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
 782                                    flags);
 783}
 784
 785static struct dma_async_tx_descriptor *
 786ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
 787                 unsigned int src_cnt, size_t len, unsigned long flags)
 788{
 789        unsigned char scf[src_cnt];
 790        dma_addr_t pq[2];
 791
 792        memset(scf, 0, src_cnt);
 793        pq[0] = dst;
 794        flags |= DMA_PREP_PQ_DISABLE_Q;
 795        pq[1] = dst; /* specify valid address for disabled result */
 796
 797        return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
 798                                    flags);
 799}
 800
 801struct dma_async_tx_descriptor *
 802ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
 803                     unsigned int src_cnt, size_t len,
 804                     enum sum_check_flags *result, unsigned long flags)
 805{
 806        unsigned char scf[src_cnt];
 807        dma_addr_t pq[2];
 808
 809        /* the cleanup routine only sets bits on validate failure, it
 810         * does not clear bits on validate success... so clear it here
 811         */
 812        *result = 0;
 813
 814        memset(scf, 0, src_cnt);
 815        pq[0] = src[0];
 816        flags |= DMA_PREP_PQ_DISABLE_Q;
 817        pq[1] = pq[0]; /* specify valid address for disabled result */
 818
 819        return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf,
 820                                    len, flags);
 821}
 822
 823static struct dma_async_tx_descriptor *
 824ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
 825{
 826        struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 827        struct ioat_ring_ent *desc;
 828        struct ioat_dma_descriptor *hw;
 829
 830        if (ioat2_check_space_lock(ioat, 1) == 0)
 831                desc = ioat2_get_ring_ent(ioat, ioat->head);
 832        else
 833                return NULL;
 834
 835        hw = desc->hw;
 836        hw->ctl = 0;
 837        hw->ctl_f.null = 1;
 838        hw->ctl_f.int_en = 1;
 839        hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
 840        hw->ctl_f.compl_write = 1;
 841        hw->size = NULL_DESC_BUFFER_SIZE;
 842        hw->src_addr = 0;
 843        hw->dst_addr = 0;
 844
 845        desc->txd.flags = flags;
 846        desc->len = 1;
 847
 848        dump_desc_dbg(ioat, desc);
 849
 850        /* we leave the channel locked to ensure in order submission */
 851        return &desc->txd;
 852}
 853
 854static void ioat3_dma_test_callback(void *dma_async_param)
 855{
 856        struct completion *cmp = dma_async_param;
 857
 858        complete(cmp);
 859}
 860
 861#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
 862static int ioat_xor_val_self_test(struct ioatdma_device *device)
 863{
 864        int i, src_idx;
 865        struct page *dest;
 866        struct page *xor_srcs[IOAT_NUM_SRC_TEST];
 867        struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
 868        dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
 869        dma_addr_t dma_addr, dest_dma;
 870        struct dma_async_tx_descriptor *tx;
 871        struct dma_chan *dma_chan;
 872        dma_cookie_t cookie;
 873        u8 cmp_byte = 0;
 874        u32 cmp_word;
 875        u32 xor_val_result;
 876        int err = 0;
 877        struct completion cmp;
 878        unsigned long tmo;
 879        struct device *dev = &device->pdev->dev;
 880        struct dma_device *dma = &device->common;
 881        u8 op = 0;
 882
 883        dev_dbg(dev, "%s\n", __func__);
 884
 885        if (!dma_has_cap(DMA_XOR, dma->cap_mask))
 886                return 0;
 887
 888        for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
 889                xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
 890                if (!xor_srcs[src_idx]) {
 891                        while (src_idx--)
 892                                __free_page(xor_srcs[src_idx]);
 893                        return -ENOMEM;
 894                }
 895        }
 896
 897        dest = alloc_page(GFP_KERNEL);
 898        if (!dest) {
 899                while (src_idx--)
 900                        __free_page(xor_srcs[src_idx]);
 901                return -ENOMEM;
 902        }
 903
 904        /* Fill in src buffers */
 905        for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
 906                u8 *ptr = page_address(xor_srcs[src_idx]);
 907                for (i = 0; i < PAGE_SIZE; i++)
 908                        ptr[i] = (1 << src_idx);
 909        }
 910
 911        for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
 912                cmp_byte ^= (u8) (1 << src_idx);
 913
 914        cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
 915                        (cmp_byte << 8) | cmp_byte;
 916
 917        memset(page_address(dest), 0, PAGE_SIZE);
 918
 919        dma_chan = container_of(dma->channels.next, struct dma_chan,
 920                                device_node);
 921        if (dma->device_alloc_chan_resources(dma_chan) < 1) {
 922                err = -ENODEV;
 923                goto out;
 924        }
 925
 926        /* test xor */
 927        op = IOAT_OP_XOR;
 928
 929        dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
 930        for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
 931                dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
 932                                           DMA_TO_DEVICE);
 933        tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
 934                                      IOAT_NUM_SRC_TEST, PAGE_SIZE,
 935                                      DMA_PREP_INTERRUPT |
 936                                      DMA_COMPL_SKIP_SRC_UNMAP |
 937                                      DMA_COMPL_SKIP_DEST_UNMAP);
 938
 939        if (!tx) {
 940                dev_err(dev, "Self-test xor prep failed\n");
 941                err = -ENODEV;
 942                goto dma_unmap;
 943        }
 944
 945        async_tx_ack(tx);
 946        init_completion(&cmp);
 947        tx->callback = ioat3_dma_test_callback;
 948        tx->callback_param = &cmp;
 949        cookie = tx->tx_submit(tx);
 950        if (cookie < 0) {
 951                dev_err(dev, "Self-test xor setup failed\n");
 952                err = -ENODEV;
 953                goto dma_unmap;
 954        }
 955        dma->device_issue_pending(dma_chan);
 956
 957        tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
 958
 959        if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
 960                dev_err(dev, "Self-test xor timed out\n");
 961                err = -ENODEV;
 962                goto dma_unmap;
 963        }
 964
 965        dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
 966        for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
 967                dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
 968
 969        dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
 970        for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
 971                u32 *ptr = page_address(dest);
 972                if (ptr[i] != cmp_word) {
 973                        dev_err(dev, "Self-test xor failed compare\n");
 974                        err = -ENODEV;
 975                        goto free_resources;
 976                }
 977        }
 978        dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
 979
 980        /* skip validate if the capability is not present */
 981        if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
 982                goto free_resources;
 983
 984        op = IOAT_OP_XOR_VAL;
 985
 986        /* validate the sources with the destintation page */
 987        for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
 988                xor_val_srcs[i] = xor_srcs[i];
 989        xor_val_srcs[i] = dest;
 990
 991        xor_val_result = 1;
 992
 993        for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
 994                dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
 995                                           DMA_TO_DEVICE);
 996        tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
 997                                          IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
 998                                          &xor_val_result, DMA_PREP_INTERRUPT |
 999                                          DMA_COMPL_SKIP_SRC_UNMAP |
1000                                          DMA_COMPL_SKIP_DEST_UNMAP);
1001        if (!tx) {
1002                dev_err(dev, "Self-test zero prep failed\n");
1003                err = -ENODEV;
1004                goto dma_unmap;
1005        }
1006
1007        async_tx_ack(tx);
1008        init_completion(&cmp);
1009        tx->callback = ioat3_dma_test_callback;
1010        tx->callback_param = &cmp;
1011        cookie = tx->tx_submit(tx);
1012        if (cookie < 0) {
1013                dev_err(dev, "Self-test zero setup failed\n");
1014                err = -ENODEV;
1015                goto dma_unmap;
1016        }
1017        dma->device_issue_pending(dma_chan);
1018
1019        tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1020
1021        if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
1022                dev_err(dev, "Self-test validate timed out\n");
1023                err = -ENODEV;
1024                goto dma_unmap;
1025        }
1026
1027        for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1028                dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
1029
1030        if (xor_val_result != 0) {
1031                dev_err(dev, "Self-test validate failed compare\n");
1032                err = -ENODEV;
1033                goto free_resources;
1034        }
1035
1036        /* skip memset if the capability is not present */
1037        if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask))
1038                goto free_resources;
1039
1040        /* test memset */
1041        op = IOAT_OP_FILL;
1042
1043        dma_addr = dma_map_page(dev, dest, 0,
1044                        PAGE_SIZE, DMA_FROM_DEVICE);
1045        tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
1046                                         DMA_PREP_INTERRUPT |
1047                                         DMA_COMPL_SKIP_SRC_UNMAP |
1048                                         DMA_COMPL_SKIP_DEST_UNMAP);
1049        if (!tx) {
1050                dev_err(dev, "Self-test memset prep failed\n");
1051                err = -ENODEV;
1052                goto dma_unmap;
1053        }
1054
1055        async_tx_ack(tx);
1056        init_completion(&cmp);
1057        tx->callback = ioat3_dma_test_callback;
1058        tx->callback_param = &cmp;
1059        cookie = tx->tx_submit(tx);
1060        if (cookie < 0) {
1061                dev_err(dev, "Self-test memset setup failed\n");
1062                err = -ENODEV;
1063                goto dma_unmap;
1064        }
1065        dma->device_issue_pending(dma_chan);
1066
1067        tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1068
1069        if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
1070                dev_err(dev, "Self-test memset timed out\n");
1071                err = -ENODEV;
1072                goto dma_unmap;
1073        }
1074
1075        dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
1076
1077        for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
1078                u32 *ptr = page_address(dest);
1079                if (ptr[i]) {
1080                        dev_err(dev, "Self-test memset failed compare\n");
1081                        err = -ENODEV;
1082                        goto free_resources;
1083                }
1084        }
1085
1086        /* test for non-zero parity sum */
1087        op = IOAT_OP_XOR_VAL;
1088
1089        xor_val_result = 0;
1090        for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1091                dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
1092                                           DMA_TO_DEVICE);
1093        tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
1094                                          IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
1095                                          &xor_val_result, DMA_PREP_INTERRUPT |
1096                                          DMA_COMPL_SKIP_SRC_UNMAP |
1097                                          DMA_COMPL_SKIP_DEST_UNMAP);
1098        if (!tx) {
1099                dev_err(dev, "Self-test 2nd zero prep failed\n");
1100                err = -ENODEV;
1101                goto dma_unmap;
1102        }
1103
1104        async_tx_ack(tx);
1105        init_completion(&cmp);
1106        tx->callback = ioat3_dma_test_callback;
1107        tx->callback_param = &cmp;
1108        cookie = tx->tx_submit(tx);
1109        if (cookie < 0) {
1110                dev_err(dev, "Self-test  2nd zero setup failed\n");
1111                err = -ENODEV;
1112                goto dma_unmap;
1113        }
1114        dma->device_issue_pending(dma_chan);
1115
1116        tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1117
1118        if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
1119                dev_err(dev, "Self-test 2nd validate timed out\n");
1120                err = -ENODEV;
1121                goto dma_unmap;
1122        }
1123
1124        if (xor_val_result != SUM_CHECK_P_RESULT) {
1125                dev_err(dev, "Self-test validate failed compare\n");
1126                err = -ENODEV;
1127                goto dma_unmap;
1128        }
1129
1130        for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1131                dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
1132
1133        goto free_resources;
1134dma_unmap:
1135        if (op == IOAT_OP_XOR) {
1136                dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
1137                for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
1138                        dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
1139                                       DMA_TO_DEVICE);
1140        } else if (op == IOAT_OP_XOR_VAL) {
1141                for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1142                        dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
1143                                       DMA_TO_DEVICE);
1144        } else if (op == IOAT_OP_FILL)
1145                dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
1146free_resources:
1147        dma->device_free_chan_resources(dma_chan);
1148out:
1149        src_idx = IOAT_NUM_SRC_TEST;
1150        while (src_idx--)
1151                __free_page(xor_srcs[src_idx]);
1152        __free_page(dest);
1153        return err;
1154}
1155
1156static int ioat3_dma_self_test(struct ioatdma_device *device)
1157{
1158        int rc = ioat_dma_self_test(device);
1159
1160        if (rc)
1161                return rc;
1162
1163        rc = ioat_xor_val_self_test(device);
1164        if (rc)
1165                return rc;
1166
1167        return 0;
1168}
1169
1170static int ioat3_reset_hw(struct ioat_chan_common *chan)
1171{
1172        /* throw away whatever the channel was doing and get it
1173         * initialized, with ioat3 specific workarounds
1174         */
1175        struct ioatdma_device *device = chan->device;
1176        struct pci_dev *pdev = device->pdev;
1177        u32 chanerr;
1178        u16 dev_id;
1179        int err;
1180
1181        ioat2_quiesce(chan, msecs_to_jiffies(100));
1182
1183        chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
1184        writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
1185
1186        /* clear any pending errors */
1187        err = pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr);
1188        if (err) {
1189                dev_err(&pdev->dev, "channel error register unreachable\n");
1190                return err;
1191        }
1192        pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr);
1193
1194        /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
1195         * (workaround for spurious config parity error after restart)
1196         */
1197        pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
1198        if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
1199                pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
1200
1201        return ioat2_reset_sync(chan, msecs_to_jiffies(200));
1202}
1203
1204static bool is_jf_ioat(struct pci_dev *pdev)
1205{
1206        switch (pdev->device) {
1207        case PCI_DEVICE_ID_INTEL_IOAT_JSF0:
1208        case PCI_DEVICE_ID_INTEL_IOAT_JSF1:
1209        case PCI_DEVICE_ID_INTEL_IOAT_JSF2:
1210        case PCI_DEVICE_ID_INTEL_IOAT_JSF3:
1211        case PCI_DEVICE_ID_INTEL_IOAT_JSF4:
1212        case PCI_DEVICE_ID_INTEL_IOAT_JSF5:
1213        case PCI_DEVICE_ID_INTEL_IOAT_JSF6:
1214        case PCI_DEVICE_ID_INTEL_IOAT_JSF7:
1215        case PCI_DEVICE_ID_INTEL_IOAT_JSF8:
1216        case PCI_DEVICE_ID_INTEL_IOAT_JSF9:
1217                return true;
1218        default:
1219                return false;
1220        }
1221}
1222
1223static bool is_snb_ioat(struct pci_dev *pdev)
1224{
1225        switch (pdev->device) {
1226        case PCI_DEVICE_ID_INTEL_IOAT_SNB0:
1227        case PCI_DEVICE_ID_INTEL_IOAT_SNB1:
1228        case PCI_DEVICE_ID_INTEL_IOAT_SNB2:
1229        case PCI_DEVICE_ID_INTEL_IOAT_SNB3:
1230        case PCI_DEVICE_ID_INTEL_IOAT_SNB4:
1231        case PCI_DEVICE_ID_INTEL_IOAT_SNB5:
1232        case PCI_DEVICE_ID_INTEL_IOAT_SNB6:
1233        case PCI_DEVICE_ID_INTEL_IOAT_SNB7:
1234        case PCI_DEVICE_ID_INTEL_IOAT_SNB8:
1235        case PCI_DEVICE_ID_INTEL_IOAT_SNB9:
1236                return true;
1237        default:
1238                return false;
1239        }
1240}
1241
1242static bool is_ivb_ioat(struct pci_dev *pdev)
1243{
1244        switch (pdev->device) {
1245        case PCI_DEVICE_ID_INTEL_IOAT_IVB0:
1246        case PCI_DEVICE_ID_INTEL_IOAT_IVB1:
1247        case PCI_DEVICE_ID_INTEL_IOAT_IVB2:
1248        case PCI_DEVICE_ID_INTEL_IOAT_IVB3:
1249        case PCI_DEVICE_ID_INTEL_IOAT_IVB4:
1250        case PCI_DEVICE_ID_INTEL_IOAT_IVB5:
1251        case PCI_DEVICE_ID_INTEL_IOAT_IVB6:
1252        case PCI_DEVICE_ID_INTEL_IOAT_IVB7:
1253        case PCI_DEVICE_ID_INTEL_IOAT_IVB8:
1254        case PCI_DEVICE_ID_INTEL_IOAT_IVB9:
1255                return true;
1256        default:
1257                return false;
1258        }
1259
1260}
1261
1262int ioat3_dma_probe(struct ioatdma_device *device, int dca)
1263{
1264        struct pci_dev *pdev = device->pdev;
1265        int dca_en = system_has_dca_enabled(pdev);
1266        struct dma_device *dma;
1267        struct dma_chan *c;
1268        struct ioat_chan_common *chan;
1269        bool is_raid_device = false;
1270        int err;
1271        u32 cap;
1272
1273        device->enumerate_channels = ioat2_enumerate_channels;
1274        device->reset_hw = ioat3_reset_hw;
1275        device->self_test = ioat3_dma_self_test;
1276        dma = &device->common;
1277        dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
1278        dma->device_issue_pending = ioat2_issue_pending;
1279        dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
1280        dma->device_free_chan_resources = ioat2_free_chan_resources;
1281
1282        if (is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev))
1283                dma->copy_align = 6;
1284
1285        dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
1286        dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
1287
1288        cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
1289
1290        /* dca is incompatible with raid operations */
1291        if (dca_en && (cap & (IOAT_CAP_XOR|IOAT_CAP_PQ)))
1292                cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ);
1293
1294        if (cap & IOAT_CAP_XOR) {
1295                is_raid_device = true;
1296                dma->max_xor = 8;
1297                dma->xor_align = 6;
1298
1299                dma_cap_set(DMA_XOR, dma->cap_mask);
1300                dma->device_prep_dma_xor = ioat3_prep_xor;
1301
1302                dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1303                dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
1304        }
1305        if (cap & IOAT_CAP_PQ) {
1306                is_raid_device = true;
1307                dma_set_maxpq(dma, 8, 0);
1308                dma->pq_align = 6;
1309
1310                dma_cap_set(DMA_PQ, dma->cap_mask);
1311                dma->device_prep_dma_pq = ioat3_prep_pq;
1312
1313                dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
1314                dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
1315
1316                if (!(cap & IOAT_CAP_XOR)) {
1317                        dma->max_xor = 8;
1318                        dma->xor_align = 6;
1319
1320                        dma_cap_set(DMA_XOR, dma->cap_mask);
1321                        dma->device_prep_dma_xor = ioat3_prep_pqxor;
1322
1323                        dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1324                        dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
1325                }
1326        }
1327        if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) {
1328                dma_cap_set(DMA_MEMSET, dma->cap_mask);
1329                dma->device_prep_dma_memset = ioat3_prep_memset_lock;
1330        }
1331
1332
1333        if (is_raid_device) {
1334                dma->device_tx_status = ioat3_tx_status;
1335                device->cleanup_fn = ioat3_cleanup_event;
1336                device->timer_fn = ioat3_timer_event;
1337        } else {
1338                dma->device_tx_status = ioat_dma_tx_status;
1339                device->cleanup_fn = ioat2_cleanup_event;
1340                device->timer_fn = ioat2_timer_event;
1341        }
1342
1343        #ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA
1344        dma_cap_clear(DMA_PQ_VAL, dma->cap_mask);
1345        dma->device_prep_dma_pq_val = NULL;
1346        #endif
1347
1348        #ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA
1349        dma_cap_clear(DMA_XOR_VAL, dma->cap_mask);
1350        dma->device_prep_dma_xor_val = NULL;
1351        #endif
1352
1353        err = ioat_probe(device);
1354        if (err)
1355                return err;
1356        ioat_set_tcp_copy_break(262144);
1357
1358        list_for_each_entry(c, &dma->channels, device_node) {
1359                chan = to_chan_common(c);
1360                writel(IOAT_DMA_DCA_ANY_CPU,
1361                       chan->reg_base + IOAT_DCACTRL_OFFSET);
1362        }
1363
1364        err = ioat_register(device);
1365        if (err)
1366                return err;
1367
1368        ioat_kobject_add(device, &ioat2_ktype);
1369
1370        if (dca)
1371                device->dca = ioat3_dca_init(pdev, device->reg_base);
1372
1373        return 0;
1374}
1375