linux/drivers/dma/ti/cppi41.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2#include <linux/delay.h>
   3#include <linux/dmaengine.h>
   4#include <linux/dma-mapping.h>
   5#include <linux/platform_device.h>
   6#include <linux/module.h>
   7#include <linux/of.h>
   8#include <linux/slab.h>
   9#include <linux/of_dma.h>
  10#include <linux/of_irq.h>
  11#include <linux/dmapool.h>
  12#include <linux/interrupt.h>
  13#include <linux/of_address.h>
  14#include <linux/pm_runtime.h>
  15#include "../dmaengine.h"
  16
  17#define DESC_TYPE       27
  18#define DESC_TYPE_HOST  0x10
  19#define DESC_TYPE_TEARD 0x13
  20
  21#define TD_DESC_IS_RX   (1 << 16)
  22#define TD_DESC_DMA_NUM 10
  23
  24#define DESC_LENGTH_BITS_NUM    21
  25
  26#define DESC_TYPE_USB   (5 << 26)
  27#define DESC_PD_COMPLETE        (1 << 31)
  28
  29/* DMA engine */
  30#define DMA_TDFDQ       4
  31#define DMA_TXGCR(x)    (0x800 + (x) * 0x20)
  32#define DMA_RXGCR(x)    (0x808 + (x) * 0x20)
  33#define RXHPCRA0                4
  34
  35#define GCR_CHAN_ENABLE         (1 << 31)
  36#define GCR_TEARDOWN            (1 << 30)
  37#define GCR_STARV_RETRY         (1 << 24)
  38#define GCR_DESC_TYPE_HOST      (1 << 14)
  39
  40/* DMA scheduler */
  41#define DMA_SCHED_CTRL          0
  42#define DMA_SCHED_CTRL_EN       (1 << 31)
  43#define DMA_SCHED_WORD(x)       ((x) * 4 + 0x800)
  44
  45#define SCHED_ENTRY0_CHAN(x)    ((x) << 0)
  46#define SCHED_ENTRY0_IS_RX      (1 << 7)
  47
  48#define SCHED_ENTRY1_CHAN(x)    ((x) << 8)
  49#define SCHED_ENTRY1_IS_RX      (1 << 15)
  50
  51#define SCHED_ENTRY2_CHAN(x)    ((x) << 16)
  52#define SCHED_ENTRY2_IS_RX      (1 << 23)
  53
  54#define SCHED_ENTRY3_CHAN(x)    ((x) << 24)
  55#define SCHED_ENTRY3_IS_RX      (1 << 31)
  56
  57/* Queue manager */
  58/* 4 KiB of memory for descriptors, 2 for each endpoint */
  59#define ALLOC_DECS_NUM          128
  60#define DESCS_AREAS             1
  61#define TOTAL_DESCS_NUM         (ALLOC_DECS_NUM * DESCS_AREAS)
  62#define QMGR_SCRATCH_SIZE       (TOTAL_DESCS_NUM * 4)
  63
  64#define QMGR_LRAM0_BASE         0x80
  65#define QMGR_LRAM_SIZE          0x84
  66#define QMGR_LRAM1_BASE         0x88
  67#define QMGR_MEMBASE(x)         (0x1000 + (x) * 0x10)
  68#define QMGR_MEMCTRL(x)         (0x1004 + (x) * 0x10)
  69#define QMGR_MEMCTRL_IDX_SH     16
  70#define QMGR_MEMCTRL_DESC_SH    8
  71
  72#define QMGR_PEND(x)    (0x90 + (x) * 4)
  73
  74#define QMGR_PENDING_SLOT_Q(x)  (x / 32)
  75#define QMGR_PENDING_BIT_Q(x)   (x % 32)
  76
  77#define QMGR_QUEUE_A(n) (0x2000 + (n) * 0x10)
  78#define QMGR_QUEUE_B(n) (0x2004 + (n) * 0x10)
  79#define QMGR_QUEUE_C(n) (0x2008 + (n) * 0x10)
  80#define QMGR_QUEUE_D(n) (0x200c + (n) * 0x10)
  81
  82/* Packet Descriptor */
  83#define PD2_ZERO_LENGTH         (1 << 19)
  84
  85struct cppi41_channel {
  86        struct dma_chan chan;
  87        struct dma_async_tx_descriptor txd;
  88        struct cppi41_dd *cdd;
  89        struct cppi41_desc *desc;
  90        dma_addr_t desc_phys;
  91        void __iomem *gcr_reg;
  92        int is_tx;
  93        u32 residue;
  94
  95        unsigned int q_num;
  96        unsigned int q_comp_num;
  97        unsigned int port_num;
  98
  99        unsigned td_retry;
 100        unsigned td_queued:1;
 101        unsigned td_seen:1;
 102        unsigned td_desc_seen:1;
 103
 104        struct list_head node;          /* Node for pending list */
 105};
 106
 107struct cppi41_desc {
 108        u32 pd0;
 109        u32 pd1;
 110        u32 pd2;
 111        u32 pd3;
 112        u32 pd4;
 113        u32 pd5;
 114        u32 pd6;
 115        u32 pd7;
 116} __aligned(32);
 117
 118struct chan_queues {
 119        u16 submit;
 120        u16 complete;
 121};
 122
 123struct cppi41_dd {
 124        struct dma_device ddev;
 125
 126        void *qmgr_scratch;
 127        dma_addr_t scratch_phys;
 128
 129        struct cppi41_desc *cd;
 130        dma_addr_t descs_phys;
 131        u32 first_td_desc;
 132        struct cppi41_channel *chan_busy[ALLOC_DECS_NUM];
 133
 134        void __iomem *ctrl_mem;
 135        void __iomem *sched_mem;
 136        void __iomem *qmgr_mem;
 137        unsigned int irq;
 138        const struct chan_queues *queues_rx;
 139        const struct chan_queues *queues_tx;
 140        struct chan_queues td_queue;
 141        u16 first_completion_queue;
 142        u16 qmgr_num_pend;
 143        u32 n_chans;
 144        u8 platform;
 145
 146        struct list_head pending;       /* Pending queued transfers */
 147        spinlock_t lock;                /* Lock for pending list */
 148
 149        /* context for suspend/resume */
 150        unsigned int dma_tdfdq;
 151
 152        bool is_suspended;
 153};
 154
 155static struct chan_queues am335x_usb_queues_tx[] = {
 156        /* USB0 ENDP 1 */
 157        [ 0] = { .submit = 32, .complete =  93},
 158        [ 1] = { .submit = 34, .complete =  94},
 159        [ 2] = { .submit = 36, .complete =  95},
 160        [ 3] = { .submit = 38, .complete =  96},
 161        [ 4] = { .submit = 40, .complete =  97},
 162        [ 5] = { .submit = 42, .complete =  98},
 163        [ 6] = { .submit = 44, .complete =  99},
 164        [ 7] = { .submit = 46, .complete = 100},
 165        [ 8] = { .submit = 48, .complete = 101},
 166        [ 9] = { .submit = 50, .complete = 102},
 167        [10] = { .submit = 52, .complete = 103},
 168        [11] = { .submit = 54, .complete = 104},
 169        [12] = { .submit = 56, .complete = 105},
 170        [13] = { .submit = 58, .complete = 106},
 171        [14] = { .submit = 60, .complete = 107},
 172
 173        /* USB1 ENDP1 */
 174        [15] = { .submit = 62, .complete = 125},
 175        [16] = { .submit = 64, .complete = 126},
 176        [17] = { .submit = 66, .complete = 127},
 177        [18] = { .submit = 68, .complete = 128},
 178        [19] = { .submit = 70, .complete = 129},
 179        [20] = { .submit = 72, .complete = 130},
 180        [21] = { .submit = 74, .complete = 131},
 181        [22] = { .submit = 76, .complete = 132},
 182        [23] = { .submit = 78, .complete = 133},
 183        [24] = { .submit = 80, .complete = 134},
 184        [25] = { .submit = 82, .complete = 135},
 185        [26] = { .submit = 84, .complete = 136},
 186        [27] = { .submit = 86, .complete = 137},
 187        [28] = { .submit = 88, .complete = 138},
 188        [29] = { .submit = 90, .complete = 139},
 189};
 190
 191static const struct chan_queues am335x_usb_queues_rx[] = {
 192        /* USB0 ENDP 1 */
 193        [ 0] = { .submit =  1, .complete = 109},
 194        [ 1] = { .submit =  2, .complete = 110},
 195        [ 2] = { .submit =  3, .complete = 111},
 196        [ 3] = { .submit =  4, .complete = 112},
 197        [ 4] = { .submit =  5, .complete = 113},
 198        [ 5] = { .submit =  6, .complete = 114},
 199        [ 6] = { .submit =  7, .complete = 115},
 200        [ 7] = { .submit =  8, .complete = 116},
 201        [ 8] = { .submit =  9, .complete = 117},
 202        [ 9] = { .submit = 10, .complete = 118},
 203        [10] = { .submit = 11, .complete = 119},
 204        [11] = { .submit = 12, .complete = 120},
 205        [12] = { .submit = 13, .complete = 121},
 206        [13] = { .submit = 14, .complete = 122},
 207        [14] = { .submit = 15, .complete = 123},
 208
 209        /* USB1 ENDP 1 */
 210        [15] = { .submit = 16, .complete = 141},
 211        [16] = { .submit = 17, .complete = 142},
 212        [17] = { .submit = 18, .complete = 143},
 213        [18] = { .submit = 19, .complete = 144},
 214        [19] = { .submit = 20, .complete = 145},
 215        [20] = { .submit = 21, .complete = 146},
 216        [21] = { .submit = 22, .complete = 147},
 217        [22] = { .submit = 23, .complete = 148},
 218        [23] = { .submit = 24, .complete = 149},
 219        [24] = { .submit = 25, .complete = 150},
 220        [25] = { .submit = 26, .complete = 151},
 221        [26] = { .submit = 27, .complete = 152},
 222        [27] = { .submit = 28, .complete = 153},
 223        [28] = { .submit = 29, .complete = 154},
 224        [29] = { .submit = 30, .complete = 155},
 225};
 226
 227static const struct chan_queues da8xx_usb_queues_tx[] = {
 228        [0] = { .submit =  16, .complete = 24},
 229        [1] = { .submit =  18, .complete = 24},
 230        [2] = { .submit =  20, .complete = 24},
 231        [3] = { .submit =  22, .complete = 24},
 232};
 233
 234static const struct chan_queues da8xx_usb_queues_rx[] = {
 235        [0] = { .submit =  1, .complete = 26},
 236        [1] = { .submit =  3, .complete = 26},
 237        [2] = { .submit =  5, .complete = 26},
 238        [3] = { .submit =  7, .complete = 26},
 239};
 240
 241struct cppi_glue_infos {
 242        const struct chan_queues *queues_rx;
 243        const struct chan_queues *queues_tx;
 244        struct chan_queues td_queue;
 245        u16 first_completion_queue;
 246        u16 qmgr_num_pend;
 247};
 248
 249static struct cppi41_channel *to_cpp41_chan(struct dma_chan *c)
 250{
 251        return container_of(c, struct cppi41_channel, chan);
 252}
 253
 254static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc)
 255{
 256        struct cppi41_channel *c;
 257        u32 descs_size;
 258        u32 desc_num;
 259
 260        descs_size = sizeof(struct cppi41_desc) * ALLOC_DECS_NUM;
 261
 262        if (!((desc >= cdd->descs_phys) &&
 263                        (desc < (cdd->descs_phys + descs_size)))) {
 264                return NULL;
 265        }
 266
 267        desc_num = (desc - cdd->descs_phys) / sizeof(struct cppi41_desc);
 268        BUG_ON(desc_num >= ALLOC_DECS_NUM);
 269        c = cdd->chan_busy[desc_num];
 270        cdd->chan_busy[desc_num] = NULL;
 271
 272        /* Usecount for chan_busy[], paired with push_desc_queue() */
 273        pm_runtime_put(cdd->ddev.dev);
 274
 275        return c;
 276}
 277
 278static void cppi_writel(u32 val, void *__iomem *mem)
 279{
 280        __raw_writel(val, mem);
 281}
 282
 283static u32 cppi_readl(void *__iomem *mem)
 284{
 285        return __raw_readl(mem);
 286}
 287
 288static u32 pd_trans_len(u32 val)
 289{
 290        return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1);
 291}
 292
 293static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num)
 294{
 295        u32 desc;
 296
 297        desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num));
 298        desc &= ~0x1f;
 299        return desc;
 300}
 301
 302static irqreturn_t cppi41_irq(int irq, void *data)
 303{
 304        struct cppi41_dd *cdd = data;
 305        u16 first_completion_queue = cdd->first_completion_queue;
 306        u16 qmgr_num_pend = cdd->qmgr_num_pend;
 307        struct cppi41_channel *c;
 308        int i;
 309
 310        for (i = QMGR_PENDING_SLOT_Q(first_completion_queue); i < qmgr_num_pend;
 311                        i++) {
 312                u32 val;
 313                u32 q_num;
 314
 315                val = cppi_readl(cdd->qmgr_mem + QMGR_PEND(i));
 316                if (i == QMGR_PENDING_SLOT_Q(first_completion_queue) && val) {
 317                        u32 mask;
 318                        /* set corresponding bit for completetion Q 93 */
 319                        mask = 1 << QMGR_PENDING_BIT_Q(first_completion_queue);
 320                        /* not set all bits for queues less than Q 93 */
 321                        mask--;
 322                        /* now invert and keep only Q 93+ set */
 323                        val &= ~mask;
 324                }
 325
 326                if (val)
 327                        __iormb();
 328
 329                while (val) {
 330                        u32 desc, len;
 331
 332                        /*
 333                         * This should never trigger, see the comments in
 334                         * push_desc_queue()
 335                         */
 336                        WARN_ON(cdd->is_suspended);
 337
 338                        q_num = __fls(val);
 339                        val &= ~(1 << q_num);
 340                        q_num += 32 * i;
 341                        desc = cppi41_pop_desc(cdd, q_num);
 342                        c = desc_to_chan(cdd, desc);
 343                        if (WARN_ON(!c)) {
 344                                pr_err("%s() q %d desc %08x\n", __func__,
 345                                                q_num, desc);
 346                                continue;
 347                        }
 348
 349                        if (c->desc->pd2 & PD2_ZERO_LENGTH)
 350                                len = 0;
 351                        else
 352                                len = pd_trans_len(c->desc->pd0);
 353
 354                        c->residue = pd_trans_len(c->desc->pd6) - len;
 355                        dma_cookie_complete(&c->txd);
 356                        dmaengine_desc_get_callback_invoke(&c->txd, NULL);
 357                }
 358        }
 359        return IRQ_HANDLED;
 360}
 361
 362static dma_cookie_t cppi41_tx_submit(struct dma_async_tx_descriptor *tx)
 363{
 364        dma_cookie_t cookie;
 365
 366        cookie = dma_cookie_assign(tx);
 367
 368        return cookie;
 369}
 370
 371static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan)
 372{
 373        struct cppi41_channel *c = to_cpp41_chan(chan);
 374        struct cppi41_dd *cdd = c->cdd;
 375        int error;
 376
 377        error = pm_runtime_get_sync(cdd->ddev.dev);
 378        if (error < 0) {
 379                dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n",
 380                        __func__, error);
 381                pm_runtime_put_noidle(cdd->ddev.dev);
 382
 383                return error;
 384        }
 385
 386        dma_cookie_init(chan);
 387        dma_async_tx_descriptor_init(&c->txd, chan);
 388        c->txd.tx_submit = cppi41_tx_submit;
 389
 390        if (!c->is_tx)
 391                cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0);
 392
 393        pm_runtime_mark_last_busy(cdd->ddev.dev);
 394        pm_runtime_put_autosuspend(cdd->ddev.dev);
 395
 396        return 0;
 397}
 398
 399static void cppi41_dma_free_chan_resources(struct dma_chan *chan)
 400{
 401        struct cppi41_channel *c = to_cpp41_chan(chan);
 402        struct cppi41_dd *cdd = c->cdd;
 403        int error;
 404
 405        error = pm_runtime_get_sync(cdd->ddev.dev);
 406        if (error < 0) {
 407                pm_runtime_put_noidle(cdd->ddev.dev);
 408
 409                return;
 410        }
 411
 412        WARN_ON(!list_empty(&cdd->pending));
 413
 414        pm_runtime_mark_last_busy(cdd->ddev.dev);
 415        pm_runtime_put_autosuspend(cdd->ddev.dev);
 416}
 417
 418static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan,
 419        dma_cookie_t cookie, struct dma_tx_state *txstate)
 420{
 421        struct cppi41_channel *c = to_cpp41_chan(chan);
 422        enum dma_status ret;
 423
 424        ret = dma_cookie_status(chan, cookie, txstate);
 425
 426        dma_set_residue(txstate, c->residue);
 427
 428        return ret;
 429}
 430
 431static void push_desc_queue(struct cppi41_channel *c)
 432{
 433        struct cppi41_dd *cdd = c->cdd;
 434        u32 desc_num;
 435        u32 desc_phys;
 436        u32 reg;
 437
 438        c->residue = 0;
 439
 440        reg = GCR_CHAN_ENABLE;
 441        if (!c->is_tx) {
 442                reg |= GCR_STARV_RETRY;
 443                reg |= GCR_DESC_TYPE_HOST;
 444                reg |= c->q_comp_num;
 445        }
 446
 447        cppi_writel(reg, c->gcr_reg);
 448
 449        /*
 450         * We don't use writel() but __raw_writel() so we have to make sure
 451         * that the DMA descriptor in coherent memory made to the main memory
 452         * before starting the dma engine.
 453         */
 454        __iowmb();
 455
 456        /*
 457         * DMA transfers can take at least 200ms to complete with USB mass
 458         * storage connected. To prevent autosuspend timeouts, we must use
 459         * pm_runtime_get/put() when chan_busy[] is modified. This will get
 460         * cleared in desc_to_chan() or cppi41_stop_chan() depending on the
 461         * outcome of the transfer.
 462         */
 463        pm_runtime_get(cdd->ddev.dev);
 464
 465        desc_phys = lower_32_bits(c->desc_phys);
 466        desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc);
 467        WARN_ON(cdd->chan_busy[desc_num]);
 468        cdd->chan_busy[desc_num] = c;
 469
 470        reg = (sizeof(struct cppi41_desc) - 24) / 4;
 471        reg |= desc_phys;
 472        cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num));
 473}
 474
 475/*
 476 * Caller must hold cdd->lock to prevent push_desc_queue()
 477 * getting called out of order. We have both cppi41_dma_issue_pending()
 478 * and cppi41_runtime_resume() call this function.
 479 */
 480static void cppi41_run_queue(struct cppi41_dd *cdd)
 481{
 482        struct cppi41_channel *c, *_c;
 483
 484        list_for_each_entry_safe(c, _c, &cdd->pending, node) {
 485                push_desc_queue(c);
 486                list_del(&c->node);
 487        }
 488}
 489
 490static void cppi41_dma_issue_pending(struct dma_chan *chan)
 491{
 492        struct cppi41_channel *c = to_cpp41_chan(chan);
 493        struct cppi41_dd *cdd = c->cdd;
 494        unsigned long flags;
 495        int error;
 496
 497        error = pm_runtime_get(cdd->ddev.dev);
 498        if ((error != -EINPROGRESS) && error < 0) {
 499                pm_runtime_put_noidle(cdd->ddev.dev);
 500                dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n",
 501                        error);
 502
 503                return;
 504        }
 505
 506        spin_lock_irqsave(&cdd->lock, flags);
 507        list_add_tail(&c->node, &cdd->pending);
 508        if (!cdd->is_suspended)
 509                cppi41_run_queue(cdd);
 510        spin_unlock_irqrestore(&cdd->lock, flags);
 511
 512        pm_runtime_mark_last_busy(cdd->ddev.dev);
 513        pm_runtime_put_autosuspend(cdd->ddev.dev);
 514}
 515
 516static u32 get_host_pd0(u32 length)
 517{
 518        u32 reg;
 519
 520        reg = DESC_TYPE_HOST << DESC_TYPE;
 521        reg |= length;
 522
 523        return reg;
 524}
 525
 526static u32 get_host_pd1(struct cppi41_channel *c)
 527{
 528        u32 reg;
 529
 530        reg = 0;
 531
 532        return reg;
 533}
 534
 535static u32 get_host_pd2(struct cppi41_channel *c)
 536{
 537        u32 reg;
 538
 539        reg = DESC_TYPE_USB;
 540        reg |= c->q_comp_num;
 541
 542        return reg;
 543}
 544
 545static u32 get_host_pd3(u32 length)
 546{
 547        u32 reg;
 548
 549        /* PD3 = packet size */
 550        reg = length;
 551
 552        return reg;
 553}
 554
 555static u32 get_host_pd6(u32 length)
 556{
 557        u32 reg;
 558
 559        /* PD6 buffer size */
 560        reg = DESC_PD_COMPLETE;
 561        reg |= length;
 562
 563        return reg;
 564}
 565
 566static u32 get_host_pd4_or_7(u32 addr)
 567{
 568        u32 reg;
 569
 570        reg = addr;
 571
 572        return reg;
 573}
 574
 575static u32 get_host_pd5(void)
 576{
 577        u32 reg;
 578
 579        reg = 0;
 580
 581        return reg;
 582}
 583
 584static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg(
 585        struct dma_chan *chan, struct scatterlist *sgl, unsigned sg_len,
 586        enum dma_transfer_direction dir, unsigned long tx_flags, void *context)
 587{
 588        struct cppi41_channel *c = to_cpp41_chan(chan);
 589        struct cppi41_desc *d;
 590        struct scatterlist *sg;
 591        unsigned int i;
 592
 593        d = c->desc;
 594        for_each_sg(sgl, sg, sg_len, i) {
 595                u32 addr;
 596                u32 len;
 597
 598                /* We need to use more than one desc once musb supports sg */
 599                addr = lower_32_bits(sg_dma_address(sg));
 600                len = sg_dma_len(sg);
 601
 602                d->pd0 = get_host_pd0(len);
 603                d->pd1 = get_host_pd1(c);
 604                d->pd2 = get_host_pd2(c);
 605                d->pd3 = get_host_pd3(len);
 606                d->pd4 = get_host_pd4_or_7(addr);
 607                d->pd5 = get_host_pd5();
 608                d->pd6 = get_host_pd6(len);
 609                d->pd7 = get_host_pd4_or_7(addr);
 610
 611                d++;
 612        }
 613
 614        return &c->txd;
 615}
 616
 617static void cppi41_compute_td_desc(struct cppi41_desc *d)
 618{
 619        d->pd0 = DESC_TYPE_TEARD << DESC_TYPE;
 620}
 621
 622static int cppi41_tear_down_chan(struct cppi41_channel *c)
 623{
 624        struct dmaengine_result abort_result;
 625        struct cppi41_dd *cdd = c->cdd;
 626        struct cppi41_desc *td;
 627        u32 reg;
 628        u32 desc_phys;
 629        u32 td_desc_phys;
 630
 631        td = cdd->cd;
 632        td += cdd->first_td_desc;
 633
 634        td_desc_phys = cdd->descs_phys;
 635        td_desc_phys += cdd->first_td_desc * sizeof(struct cppi41_desc);
 636
 637        if (!c->td_queued) {
 638                cppi41_compute_td_desc(td);
 639                __iowmb();
 640
 641                reg = (sizeof(struct cppi41_desc) - 24) / 4;
 642                reg |= td_desc_phys;
 643                cppi_writel(reg, cdd->qmgr_mem +
 644                                QMGR_QUEUE_D(cdd->td_queue.submit));
 645
 646                reg = GCR_CHAN_ENABLE;
 647                if (!c->is_tx) {
 648                        reg |= GCR_STARV_RETRY;
 649                        reg |= GCR_DESC_TYPE_HOST;
 650                        reg |= cdd->td_queue.complete;
 651                }
 652                reg |= GCR_TEARDOWN;
 653                cppi_writel(reg, c->gcr_reg);
 654                c->td_queued = 1;
 655                c->td_retry = 500;
 656        }
 657
 658        if (!c->td_seen || !c->td_desc_seen) {
 659
 660                desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete);
 661                if (!desc_phys && c->is_tx)
 662                        desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
 663
 664                if (desc_phys == c->desc_phys) {
 665                        c->td_desc_seen = 1;
 666
 667                } else if (desc_phys == td_desc_phys) {
 668                        u32 pd0;
 669
 670                        __iormb();
 671                        pd0 = td->pd0;
 672                        WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD);
 673                        WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX));
 674                        WARN_ON((pd0 & 0x1f) != c->port_num);
 675                        c->td_seen = 1;
 676                } else if (desc_phys) {
 677                        WARN_ON_ONCE(1);
 678                }
 679        }
 680        c->td_retry--;
 681        /*
 682         * If the TX descriptor / channel is in use, the caller needs to poke
 683         * his TD bit multiple times. After that he hardware releases the
 684         * transfer descriptor followed by TD descriptor. Waiting seems not to
 685         * cause any difference.
 686         * RX seems to be thrown out right away. However once the TearDown
 687         * descriptor gets through we are done. If we have seens the transfer
 688         * descriptor before the TD we fetch it from enqueue, it has to be
 689         * there waiting for us.
 690         */
 691        if (!c->td_seen && c->td_retry) {
 692                udelay(1);
 693                return -EAGAIN;
 694        }
 695        WARN_ON(!c->td_retry);
 696
 697        if (!c->td_desc_seen) {
 698                desc_phys = cppi41_pop_desc(cdd, c->q_num);
 699                if (!desc_phys)
 700                        desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
 701                WARN_ON(!desc_phys);
 702        }
 703
 704        c->td_queued = 0;
 705        c->td_seen = 0;
 706        c->td_desc_seen = 0;
 707        cppi_writel(0, c->gcr_reg);
 708
 709        /* Invoke the callback to do the necessary clean-up */
 710        abort_result.result = DMA_TRANS_ABORTED;
 711        dma_cookie_complete(&c->txd);
 712        dmaengine_desc_get_callback_invoke(&c->txd, &abort_result);
 713
 714        return 0;
 715}
 716
 717static int cppi41_stop_chan(struct dma_chan *chan)
 718{
 719        struct cppi41_channel *c = to_cpp41_chan(chan);
 720        struct cppi41_dd *cdd = c->cdd;
 721        u32 desc_num;
 722        u32 desc_phys;
 723        int ret;
 724
 725        desc_phys = lower_32_bits(c->desc_phys);
 726        desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc);
 727        if (!cdd->chan_busy[desc_num]) {
 728                struct cppi41_channel *cc, *_ct;
 729
 730                /*
 731                 * channels might still be in the pendling list if
 732                 * cppi41_dma_issue_pending() is called after
 733                 * cppi41_runtime_suspend() is called
 734                 */
 735                list_for_each_entry_safe(cc, _ct, &cdd->pending, node) {
 736                        if (cc != c)
 737                                continue;
 738                        list_del(&cc->node);
 739                        break;
 740                }
 741                return 0;
 742        }
 743
 744        ret = cppi41_tear_down_chan(c);
 745        if (ret)
 746                return ret;
 747
 748        WARN_ON(!cdd->chan_busy[desc_num]);
 749        cdd->chan_busy[desc_num] = NULL;
 750
 751        /* Usecount for chan_busy[], paired with push_desc_queue() */
 752        pm_runtime_put(cdd->ddev.dev);
 753
 754        return 0;
 755}
 756
 757static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd)
 758{
 759        struct cppi41_channel *cchan, *chans;
 760        int i;
 761        u32 n_chans = cdd->n_chans;
 762
 763        /*
 764         * The channels can only be used as TX or as RX. So we add twice
 765         * that much dma channels because USB can only do RX or TX.
 766         */
 767        n_chans *= 2;
 768
 769        chans = devm_kcalloc(dev, n_chans, sizeof(*chans), GFP_KERNEL);
 770        if (!chans)
 771                return -ENOMEM;
 772
 773        for (i = 0; i < n_chans; i++) {
 774                cchan = &chans[i];
 775
 776                cchan->cdd = cdd;
 777                if (i & 1) {
 778                        cchan->gcr_reg = cdd->ctrl_mem + DMA_TXGCR(i >> 1);
 779                        cchan->is_tx = 1;
 780                } else {
 781                        cchan->gcr_reg = cdd->ctrl_mem + DMA_RXGCR(i >> 1);
 782                        cchan->is_tx = 0;
 783                }
 784                cchan->port_num = i >> 1;
 785                cchan->desc = &cdd->cd[i];
 786                cchan->desc_phys = cdd->descs_phys;
 787                cchan->desc_phys += i * sizeof(struct cppi41_desc);
 788                cchan->chan.device = &cdd->ddev;
 789                list_add_tail(&cchan->chan.device_node, &cdd->ddev.channels);
 790        }
 791        cdd->first_td_desc = n_chans;
 792
 793        return 0;
 794}
 795
 796static void purge_descs(struct device *dev, struct cppi41_dd *cdd)
 797{
 798        unsigned int mem_decs;
 799        int i;
 800
 801        mem_decs = ALLOC_DECS_NUM * sizeof(struct cppi41_desc);
 802
 803        for (i = 0; i < DESCS_AREAS; i++) {
 804
 805                cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i));
 806                cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i));
 807
 808                dma_free_coherent(dev, mem_decs, cdd->cd,
 809                                cdd->descs_phys);
 810        }
 811}
 812
 813static void disable_sched(struct cppi41_dd *cdd)
 814{
 815        cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL);
 816}
 817
 818static void deinit_cppi41(struct device *dev, struct cppi41_dd *cdd)
 819{
 820        disable_sched(cdd);
 821
 822        purge_descs(dev, cdd);
 823
 824        cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE);
 825        cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE);
 826        dma_free_coherent(dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch,
 827                        cdd->scratch_phys);
 828}
 829
 830static int init_descs(struct device *dev, struct cppi41_dd *cdd)
 831{
 832        unsigned int desc_size;
 833        unsigned int mem_decs;
 834        int i;
 835        u32 reg;
 836        u32 idx;
 837
 838        BUILD_BUG_ON(sizeof(struct cppi41_desc) &
 839                        (sizeof(struct cppi41_desc) - 1));
 840        BUILD_BUG_ON(sizeof(struct cppi41_desc) < 32);
 841        BUILD_BUG_ON(ALLOC_DECS_NUM < 32);
 842
 843        desc_size = sizeof(struct cppi41_desc);
 844        mem_decs = ALLOC_DECS_NUM * desc_size;
 845
 846        idx = 0;
 847        for (i = 0; i < DESCS_AREAS; i++) {
 848
 849                reg = idx << QMGR_MEMCTRL_IDX_SH;
 850                reg |= (ilog2(desc_size) - 5) << QMGR_MEMCTRL_DESC_SH;
 851                reg |= ilog2(ALLOC_DECS_NUM) - 5;
 852
 853                BUILD_BUG_ON(DESCS_AREAS != 1);
 854                cdd->cd = dma_alloc_coherent(dev, mem_decs,
 855                                &cdd->descs_phys, GFP_KERNEL);
 856                if (!cdd->cd)
 857                        return -ENOMEM;
 858
 859                cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i));
 860                cppi_writel(reg, cdd->qmgr_mem + QMGR_MEMCTRL(i));
 861
 862                idx += ALLOC_DECS_NUM;
 863        }
 864        return 0;
 865}
 866
 867static void init_sched(struct cppi41_dd *cdd)
 868{
 869        unsigned ch;
 870        unsigned word;
 871        u32 reg;
 872
 873        word = 0;
 874        cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL);
 875        for (ch = 0; ch < cdd->n_chans; ch += 2) {
 876
 877                reg = SCHED_ENTRY0_CHAN(ch);
 878                reg |= SCHED_ENTRY1_CHAN(ch) | SCHED_ENTRY1_IS_RX;
 879
 880                reg |= SCHED_ENTRY2_CHAN(ch + 1);
 881                reg |= SCHED_ENTRY3_CHAN(ch + 1) | SCHED_ENTRY3_IS_RX;
 882                cppi_writel(reg, cdd->sched_mem + DMA_SCHED_WORD(word));
 883                word++;
 884        }
 885        reg = cdd->n_chans * 2 - 1;
 886        reg |= DMA_SCHED_CTRL_EN;
 887        cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL);
 888}
 889
 890static int init_cppi41(struct device *dev, struct cppi41_dd *cdd)
 891{
 892        int ret;
 893
 894        BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1));
 895        cdd->qmgr_scratch = dma_alloc_coherent(dev, QMGR_SCRATCH_SIZE,
 896                        &cdd->scratch_phys, GFP_KERNEL);
 897        if (!cdd->qmgr_scratch)
 898                return -ENOMEM;
 899
 900        cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE);
 901        cppi_writel(TOTAL_DESCS_NUM, cdd->qmgr_mem + QMGR_LRAM_SIZE);
 902        cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);
 903
 904        ret = init_descs(dev, cdd);
 905        if (ret)
 906                goto err_td;
 907
 908        cppi_writel(cdd->td_queue.submit, cdd->ctrl_mem + DMA_TDFDQ);
 909        init_sched(cdd);
 910
 911        return 0;
 912err_td:
 913        deinit_cppi41(dev, cdd);
 914        return ret;
 915}
 916
 917static struct platform_driver cpp41_dma_driver;
 918/*
 919 * The param format is:
 920 * X Y
 921 * X: Port
 922 * Y: 0 = RX else TX
 923 */
 924#define INFO_PORT       0
 925#define INFO_IS_TX      1
 926
 927static bool cpp41_dma_filter_fn(struct dma_chan *chan, void *param)
 928{
 929        struct cppi41_channel *cchan;
 930        struct cppi41_dd *cdd;
 931        const struct chan_queues *queues;
 932        u32 *num = param;
 933
 934        if (chan->device->dev->driver != &cpp41_dma_driver.driver)
 935                return false;
 936
 937        cchan = to_cpp41_chan(chan);
 938
 939        if (cchan->port_num != num[INFO_PORT])
 940                return false;
 941
 942        if (cchan->is_tx && !num[INFO_IS_TX])
 943                return false;
 944        cdd = cchan->cdd;
 945        if (cchan->is_tx)
 946                queues = cdd->queues_tx;
 947        else
 948                queues = cdd->queues_rx;
 949
 950        BUILD_BUG_ON(ARRAY_SIZE(am335x_usb_queues_rx) !=
 951                     ARRAY_SIZE(am335x_usb_queues_tx));
 952        if (WARN_ON(cchan->port_num >= ARRAY_SIZE(am335x_usb_queues_rx)))
 953                return false;
 954
 955        cchan->q_num = queues[cchan->port_num].submit;
 956        cchan->q_comp_num = queues[cchan->port_num].complete;
 957        return true;
 958}
 959
 960static struct of_dma_filter_info cpp41_dma_info = {
 961        .filter_fn = cpp41_dma_filter_fn,
 962};
 963
 964static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec,
 965                struct of_dma *ofdma)
 966{
 967        int count = dma_spec->args_count;
 968        struct of_dma_filter_info *info = ofdma->of_dma_data;
 969
 970        if (!info || !info->filter_fn)
 971                return NULL;
 972
 973        if (count != 2)
 974                return NULL;
 975
 976        return dma_request_channel(info->dma_cap, info->filter_fn,
 977                        &dma_spec->args[0]);
 978}
 979
 980static const struct cppi_glue_infos am335x_usb_infos = {
 981        .queues_rx = am335x_usb_queues_rx,
 982        .queues_tx = am335x_usb_queues_tx,
 983        .td_queue = { .submit = 31, .complete = 0 },
 984        .first_completion_queue = 93,
 985        .qmgr_num_pend = 5,
 986};
 987
 988static const struct cppi_glue_infos da8xx_usb_infos = {
 989        .queues_rx = da8xx_usb_queues_rx,
 990        .queues_tx = da8xx_usb_queues_tx,
 991        .td_queue = { .submit = 31, .complete = 0 },
 992        .first_completion_queue = 24,
 993        .qmgr_num_pend = 2,
 994};
 995
 996static const struct of_device_id cppi41_dma_ids[] = {
 997        { .compatible = "ti,am3359-cppi41", .data = &am335x_usb_infos},
 998        { .compatible = "ti,da830-cppi41", .data = &da8xx_usb_infos},
 999        {},
1000};
1001MODULE_DEVICE_TABLE(of, cppi41_dma_ids);
1002
1003static const struct cppi_glue_infos *get_glue_info(struct device *dev)
1004{
1005        const struct of_device_id *of_id;
1006
1007        of_id = of_match_node(cppi41_dma_ids, dev->of_node);
1008        if (!of_id)
1009                return NULL;
1010        return of_id->data;
1011}
1012
1013#define CPPI41_DMA_BUSWIDTHS    (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
1014                                BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
1015                                BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
1016                                BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
1017
1018static int cppi41_dma_probe(struct platform_device *pdev)
1019{
1020        struct cppi41_dd *cdd;
1021        struct device *dev = &pdev->dev;
1022        const struct cppi_glue_infos *glue_info;
1023        struct resource *mem;
1024        int index;
1025        int irq;
1026        int ret;
1027
1028        glue_info = get_glue_info(dev);
1029        if (!glue_info)
1030                return -EINVAL;
1031
1032        cdd = devm_kzalloc(&pdev->dev, sizeof(*cdd), GFP_KERNEL);
1033        if (!cdd)
1034                return -ENOMEM;
1035
1036        dma_cap_set(DMA_SLAVE, cdd->ddev.cap_mask);
1037        cdd->ddev.device_alloc_chan_resources = cppi41_dma_alloc_chan_resources;
1038        cdd->ddev.device_free_chan_resources = cppi41_dma_free_chan_resources;
1039        cdd->ddev.device_tx_status = cppi41_dma_tx_status;
1040        cdd->ddev.device_issue_pending = cppi41_dma_issue_pending;
1041        cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg;
1042        cdd->ddev.device_terminate_all = cppi41_stop_chan;
1043        cdd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
1044        cdd->ddev.src_addr_widths = CPPI41_DMA_BUSWIDTHS;
1045        cdd->ddev.dst_addr_widths = CPPI41_DMA_BUSWIDTHS;
1046        cdd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
1047        cdd->ddev.dev = dev;
1048        INIT_LIST_HEAD(&cdd->ddev.channels);
1049        cpp41_dma_info.dma_cap = cdd->ddev.cap_mask;
1050
1051        index = of_property_match_string(dev->of_node,
1052                                         "reg-names", "controller");
1053        if (index < 0)
1054                return index;
1055
1056        mem = platform_get_resource(pdev, IORESOURCE_MEM, index);
1057        cdd->ctrl_mem = devm_ioremap_resource(dev, mem);
1058        if (IS_ERR(cdd->ctrl_mem))
1059                return PTR_ERR(cdd->ctrl_mem);
1060
1061        mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 1);
1062        cdd->sched_mem = devm_ioremap_resource(dev, mem);
1063        if (IS_ERR(cdd->sched_mem))
1064                return PTR_ERR(cdd->sched_mem);
1065
1066        mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 2);
1067        cdd->qmgr_mem = devm_ioremap_resource(dev, mem);
1068        if (IS_ERR(cdd->qmgr_mem))
1069                return PTR_ERR(cdd->qmgr_mem);
1070
1071        spin_lock_init(&cdd->lock);
1072        INIT_LIST_HEAD(&cdd->pending);
1073
1074        platform_set_drvdata(pdev, cdd);
1075
1076        pm_runtime_enable(dev);
1077        pm_runtime_set_autosuspend_delay(dev, 100);
1078        pm_runtime_use_autosuspend(dev);
1079        ret = pm_runtime_get_sync(dev);
1080        if (ret < 0)
1081                goto err_get_sync;
1082
1083        cdd->queues_rx = glue_info->queues_rx;
1084        cdd->queues_tx = glue_info->queues_tx;
1085        cdd->td_queue = glue_info->td_queue;
1086        cdd->qmgr_num_pend = glue_info->qmgr_num_pend;
1087        cdd->first_completion_queue = glue_info->first_completion_queue;
1088
1089        ret = of_property_read_u32(dev->of_node,
1090                                   "#dma-channels", &cdd->n_chans);
1091        if (ret)
1092                goto err_get_n_chans;
1093
1094        ret = init_cppi41(dev, cdd);
1095        if (ret)
1096                goto err_init_cppi;
1097
1098        ret = cppi41_add_chans(dev, cdd);
1099        if (ret)
1100                goto err_chans;
1101
1102        irq = irq_of_parse_and_map(dev->of_node, 0);
1103        if (!irq) {
1104                ret = -EINVAL;
1105                goto err_chans;
1106        }
1107
1108        ret = devm_request_irq(&pdev->dev, irq, cppi41_irq, IRQF_SHARED,
1109                        dev_name(dev), cdd);
1110        if (ret)
1111                goto err_chans;
1112        cdd->irq = irq;
1113
1114        ret = dma_async_device_register(&cdd->ddev);
1115        if (ret)
1116                goto err_chans;
1117
1118        ret = of_dma_controller_register(dev->of_node,
1119                        cppi41_dma_xlate, &cpp41_dma_info);
1120        if (ret)
1121                goto err_of;
1122
1123        pm_runtime_mark_last_busy(dev);
1124        pm_runtime_put_autosuspend(dev);
1125
1126        return 0;
1127err_of:
1128        dma_async_device_unregister(&cdd->ddev);
1129err_chans:
1130        deinit_cppi41(dev, cdd);
1131err_init_cppi:
1132        pm_runtime_dont_use_autosuspend(dev);
1133err_get_n_chans:
1134err_get_sync:
1135        pm_runtime_put_sync(dev);
1136        pm_runtime_disable(dev);
1137        return ret;
1138}
1139
1140static int cppi41_dma_remove(struct platform_device *pdev)
1141{
1142        struct cppi41_dd *cdd = platform_get_drvdata(pdev);
1143        int error;
1144
1145        error = pm_runtime_get_sync(&pdev->dev);
1146        if (error < 0)
1147                dev_err(&pdev->dev, "%s could not pm_runtime_get: %i\n",
1148                        __func__, error);
1149        of_dma_controller_free(pdev->dev.of_node);
1150        dma_async_device_unregister(&cdd->ddev);
1151
1152        devm_free_irq(&pdev->dev, cdd->irq, cdd);
1153        deinit_cppi41(&pdev->dev, cdd);
1154        pm_runtime_dont_use_autosuspend(&pdev->dev);
1155        pm_runtime_put_sync(&pdev->dev);
1156        pm_runtime_disable(&pdev->dev);
1157        return 0;
1158}
1159
1160static int __maybe_unused cppi41_suspend(struct device *dev)
1161{
1162        struct cppi41_dd *cdd = dev_get_drvdata(dev);
1163
1164        cdd->dma_tdfdq = cppi_readl(cdd->ctrl_mem + DMA_TDFDQ);
1165        disable_sched(cdd);
1166
1167        return 0;
1168}
1169
1170static int __maybe_unused cppi41_resume(struct device *dev)
1171{
1172        struct cppi41_dd *cdd = dev_get_drvdata(dev);
1173        struct cppi41_channel *c;
1174        int i;
1175
1176        for (i = 0; i < DESCS_AREAS; i++)
1177                cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i));
1178
1179        list_for_each_entry(c, &cdd->ddev.channels, chan.device_node)
1180                if (!c->is_tx)
1181                        cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0);
1182
1183        init_sched(cdd);
1184
1185        cppi_writel(cdd->dma_tdfdq, cdd->ctrl_mem + DMA_TDFDQ);
1186        cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE);
1187        cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE);
1188        cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);
1189
1190        return 0;
1191}
1192
1193static int __maybe_unused cppi41_runtime_suspend(struct device *dev)
1194{
1195        struct cppi41_dd *cdd = dev_get_drvdata(dev);
1196        unsigned long flags;
1197
1198        spin_lock_irqsave(&cdd->lock, flags);
1199        cdd->is_suspended = true;
1200        WARN_ON(!list_empty(&cdd->pending));
1201        spin_unlock_irqrestore(&cdd->lock, flags);
1202
1203        return 0;
1204}
1205
1206static int __maybe_unused cppi41_runtime_resume(struct device *dev)
1207{
1208        struct cppi41_dd *cdd = dev_get_drvdata(dev);
1209        unsigned long flags;
1210
1211        spin_lock_irqsave(&cdd->lock, flags);
1212        cdd->is_suspended = false;
1213        cppi41_run_queue(cdd);
1214        spin_unlock_irqrestore(&cdd->lock, flags);
1215
1216        return 0;
1217}
1218
1219static const struct dev_pm_ops cppi41_pm_ops = {
1220        SET_LATE_SYSTEM_SLEEP_PM_OPS(cppi41_suspend, cppi41_resume)
1221        SET_RUNTIME_PM_OPS(cppi41_runtime_suspend,
1222                           cppi41_runtime_resume,
1223                           NULL)
1224};
1225
1226static struct platform_driver cpp41_dma_driver = {
1227        .probe  = cppi41_dma_probe,
1228        .remove = cppi41_dma_remove,
1229        .driver = {
1230                .name = "cppi41-dma-engine",
1231                .pm = &cppi41_pm_ops,
1232                .of_match_table = of_match_ptr(cppi41_dma_ids),
1233        },
1234};
1235
1236module_platform_driver(cpp41_dma_driver);
1237MODULE_LICENSE("GPL");
1238MODULE_AUTHOR("Sebastian Andrzej Siewior <bigeasy@linutronix.de>");
1239