linux/arch/arm/mm/cache-l2x0.c
<<
>>
Prefs
   1/*
   2 * arch/arm/mm/cache-l2x0.c - L210/L220 cache controller support
   3 *
   4 * Copyright (C) 2007 ARM Limited
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful,
  11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 * GNU General Public License for more details.
  14 *
  15 * You should have received a copy of the GNU General Public License
  16 * along with this program; if not, write to the Free Software
  17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18 */
  19#include <linux/cpu.h>
  20#include <linux/err.h>
  21#include <linux/init.h>
  22#include <linux/smp.h>
  23#include <linux/spinlock.h>
  24#include <linux/io.h>
  25#include <linux/of.h>
  26#include <linux/of_address.h>
  27
  28#include <asm/cacheflush.h>
  29#include <asm/cp15.h>
  30#include <asm/cputype.h>
  31#include <asm/hardware/cache-l2x0.h>
  32#include "cache-tauros3.h"
  33#include "cache-aurora-l2.h"
  34
  35struct l2c_init_data {
  36        const char *type;
  37        unsigned way_size_0;
  38        unsigned num_lock;
  39        void (*of_parse)(const struct device_node *, u32 *, u32 *);
  40        void (*enable)(void __iomem *, u32, unsigned);
  41        void (*fixup)(void __iomem *, u32, struct outer_cache_fns *);
  42        void (*save)(void __iomem *);
  43        struct outer_cache_fns outer_cache;
  44};
  45
  46#define CACHE_LINE_SIZE         32
  47
  48static void __iomem *l2x0_base;
  49static DEFINE_RAW_SPINLOCK(l2x0_lock);
  50static u32 l2x0_way_mask;       /* Bitmask of active ways */
  51static u32 l2x0_size;
  52static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
  53
  54struct l2x0_regs l2x0_saved_regs;
  55
  56/*
  57 * Common code for all cache controllers.
  58 */
  59static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask)
  60{
  61        /* wait for cache operation by line or way to complete */
  62        while (readl_relaxed(reg) & mask)
  63                cpu_relax();
  64}
  65
  66/*
  67 * By default, we write directly to secure registers.  Platforms must
  68 * override this if they are running non-secure.
  69 */
  70static void l2c_write_sec(unsigned long val, void __iomem *base, unsigned reg)
  71{
  72        if (val == readl_relaxed(base + reg))
  73                return;
  74        if (outer_cache.write_sec)
  75                outer_cache.write_sec(val, reg);
  76        else
  77                writel_relaxed(val, base + reg);
  78}
  79
  80/*
  81 * This should only be called when we have a requirement that the
  82 * register be written due to a work-around, as platforms running
  83 * in non-secure mode may not be able to access this register.
  84 */
  85static inline void l2c_set_debug(void __iomem *base, unsigned long val)
  86{
  87        l2c_write_sec(val, base, L2X0_DEBUG_CTRL);
  88}
  89
  90static void __l2c_op_way(void __iomem *reg)
  91{
  92        writel_relaxed(l2x0_way_mask, reg);
  93        l2c_wait_mask(reg, l2x0_way_mask);
  94}
  95
  96static inline void l2c_unlock(void __iomem *base, unsigned num)
  97{
  98        unsigned i;
  99
 100        for (i = 0; i < num; i++) {
 101                writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE +
 102                               i * L2X0_LOCKDOWN_STRIDE);
 103                writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE +
 104                               i * L2X0_LOCKDOWN_STRIDE);
 105        }
 106}
 107
 108/*
 109 * Enable the L2 cache controller.  This function must only be
 110 * called when the cache controller is known to be disabled.
 111 */
 112static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
 113{
 114        unsigned long flags;
 115
 116        l2c_write_sec(aux, base, L2X0_AUX_CTRL);
 117
 118        l2c_unlock(base, num_lock);
 119
 120        local_irq_save(flags);
 121        __l2c_op_way(base + L2X0_INV_WAY);
 122        writel_relaxed(0, base + sync_reg_offset);
 123        l2c_wait_mask(base + sync_reg_offset, 1);
 124        local_irq_restore(flags);
 125
 126        l2c_write_sec(L2X0_CTRL_EN, base, L2X0_CTRL);
 127}
 128
 129static void l2c_disable(void)
 130{
 131        void __iomem *base = l2x0_base;
 132
 133        outer_cache.flush_all();
 134        l2c_write_sec(0, base, L2X0_CTRL);
 135        dsb(st);
 136}
 137
 138#ifdef CONFIG_CACHE_PL310
 139static inline void cache_wait(void __iomem *reg, unsigned long mask)
 140{
 141        /* cache operations by line are atomic on PL310 */
 142}
 143#else
 144#define cache_wait      l2c_wait_mask
 145#endif
 146
 147static inline void cache_sync(void)
 148{
 149        void __iomem *base = l2x0_base;
 150
 151        writel_relaxed(0, base + sync_reg_offset);
 152        cache_wait(base + L2X0_CACHE_SYNC, 1);
 153}
 154
 155#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915)
 156static inline void debug_writel(unsigned long val)
 157{
 158        l2c_set_debug(l2x0_base, val);
 159}
 160#else
 161/* Optimised out for non-errata case */
 162static inline void debug_writel(unsigned long val)
 163{
 164}
 165#endif
 166
 167static void l2x0_cache_sync(void)
 168{
 169        unsigned long flags;
 170
 171        raw_spin_lock_irqsave(&l2x0_lock, flags);
 172        cache_sync();
 173        raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 174}
 175
 176static void __l2x0_flush_all(void)
 177{
 178        debug_writel(0x03);
 179        __l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY);
 180        cache_sync();
 181        debug_writel(0x00);
 182}
 183
 184static void l2x0_flush_all(void)
 185{
 186        unsigned long flags;
 187
 188        /* clean all ways */
 189        raw_spin_lock_irqsave(&l2x0_lock, flags);
 190        __l2x0_flush_all();
 191        raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 192}
 193
 194static void l2x0_disable(void)
 195{
 196        unsigned long flags;
 197
 198        raw_spin_lock_irqsave(&l2x0_lock, flags);
 199        __l2x0_flush_all();
 200        l2c_write_sec(0, l2x0_base, L2X0_CTRL);
 201        dsb(st);
 202        raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 203}
 204
 205static void l2c_save(void __iomem *base)
 206{
 207        l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
 208}
 209
 210/*
 211 * L2C-210 specific code.
 212 *
 213 * The L2C-2x0 PA, set/way and sync operations are atomic, but we must
 214 * ensure that no background operation is running.  The way operations
 215 * are all background tasks.
 216 *
 217 * While a background operation is in progress, any new operation is
 218 * ignored (unspecified whether this causes an error.)  Thankfully, not
 219 * used on SMP.
 220 *
 221 * Never has a different sync register other than L2X0_CACHE_SYNC, but
 222 * we use sync_reg_offset here so we can share some of this with L2C-310.
 223 */
 224static void __l2c210_cache_sync(void __iomem *base)
 225{
 226        writel_relaxed(0, base + sync_reg_offset);
 227}
 228
 229static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start,
 230        unsigned long end)
 231{
 232        while (start < end) {
 233                writel_relaxed(start, reg);
 234                start += CACHE_LINE_SIZE;
 235        }
 236}
 237
 238static void l2c210_inv_range(unsigned long start, unsigned long end)
 239{
 240        void __iomem *base = l2x0_base;
 241
 242        if (start & (CACHE_LINE_SIZE - 1)) {
 243                start &= ~(CACHE_LINE_SIZE - 1);
 244                writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA);
 245                start += CACHE_LINE_SIZE;
 246        }
 247
 248        if (end & (CACHE_LINE_SIZE - 1)) {
 249                end &= ~(CACHE_LINE_SIZE - 1);
 250                writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA);
 251        }
 252
 253        __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end);
 254        __l2c210_cache_sync(base);
 255}
 256
 257static void l2c210_clean_range(unsigned long start, unsigned long end)
 258{
 259        void __iomem *base = l2x0_base;
 260
 261        start &= ~(CACHE_LINE_SIZE - 1);
 262        __l2c210_op_pa_range(base + L2X0_CLEAN_LINE_PA, start, end);
 263        __l2c210_cache_sync(base);
 264}
 265
 266static void l2c210_flush_range(unsigned long start, unsigned long end)
 267{
 268        void __iomem *base = l2x0_base;
 269
 270        start &= ~(CACHE_LINE_SIZE - 1);
 271        __l2c210_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, start, end);
 272        __l2c210_cache_sync(base);
 273}
 274
 275static void l2c210_flush_all(void)
 276{
 277        void __iomem *base = l2x0_base;
 278
 279        BUG_ON(!irqs_disabled());
 280
 281        __l2c_op_way(base + L2X0_CLEAN_INV_WAY);
 282        __l2c210_cache_sync(base);
 283}
 284
 285static void l2c210_sync(void)
 286{
 287        __l2c210_cache_sync(l2x0_base);
 288}
 289
 290static void l2c210_resume(void)
 291{
 292        void __iomem *base = l2x0_base;
 293
 294        if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
 295                l2c_enable(base, l2x0_saved_regs.aux_ctrl, 1);
 296}
 297
 298static const struct l2c_init_data l2c210_data __initconst = {
 299        .type = "L2C-210",
 300        .way_size_0 = SZ_8K,
 301        .num_lock = 1,
 302        .enable = l2c_enable,
 303        .save = l2c_save,
 304        .outer_cache = {
 305                .inv_range = l2c210_inv_range,
 306                .clean_range = l2c210_clean_range,
 307                .flush_range = l2c210_flush_range,
 308                .flush_all = l2c210_flush_all,
 309                .disable = l2c_disable,
 310                .sync = l2c210_sync,
 311                .resume = l2c210_resume,
 312        },
 313};
 314
 315/*
 316 * L2C-220 specific code.
 317 *
 318 * All operations are background operations: they have to be waited for.
 319 * Conflicting requests generate a slave error (which will cause an
 320 * imprecise abort.)  Never uses sync_reg_offset, so we hard-code the
 321 * sync register here.
 322 *
 323 * However, we can re-use the l2c210_resume call.
 324 */
 325static inline void __l2c220_cache_sync(void __iomem *base)
 326{
 327        writel_relaxed(0, base + L2X0_CACHE_SYNC);
 328        l2c_wait_mask(base + L2X0_CACHE_SYNC, 1);
 329}
 330
 331static void l2c220_op_way(void __iomem *base, unsigned reg)
 332{
 333        unsigned long flags;
 334
 335        raw_spin_lock_irqsave(&l2x0_lock, flags);
 336        __l2c_op_way(base + reg);
 337        __l2c220_cache_sync(base);
 338        raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 339}
 340
 341static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start,
 342        unsigned long end, unsigned long flags)
 343{
 344        raw_spinlock_t *lock = &l2x0_lock;
 345
 346        while (start < end) {
 347                unsigned long blk_end = start + min(end - start, 4096UL);
 348
 349                while (start < blk_end) {
 350                        l2c_wait_mask(reg, 1);
 351                        writel_relaxed(start, reg);
 352                        start += CACHE_LINE_SIZE;
 353                }
 354
 355                if (blk_end < end) {
 356                        raw_spin_unlock_irqrestore(lock, flags);
 357                        raw_spin_lock_irqsave(lock, flags);
 358                }
 359        }
 360
 361        return flags;
 362}
 363
 364static void l2c220_inv_range(unsigned long start, unsigned long end)
 365{
 366        void __iomem *base = l2x0_base;
 367        unsigned long flags;
 368
 369        raw_spin_lock_irqsave(&l2x0_lock, flags);
 370        if ((start | end) & (CACHE_LINE_SIZE - 1)) {
 371                if (start & (CACHE_LINE_SIZE - 1)) {
 372                        start &= ~(CACHE_LINE_SIZE - 1);
 373                        writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA);
 374                        start += CACHE_LINE_SIZE;
 375                }
 376
 377                if (end & (CACHE_LINE_SIZE - 1)) {
 378                        end &= ~(CACHE_LINE_SIZE - 1);
 379                        l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
 380                        writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA);
 381                }
 382        }
 383
 384        flags = l2c220_op_pa_range(base + L2X0_INV_LINE_PA,
 385                                   start, end, flags);
 386        l2c_wait_mask(base + L2X0_INV_LINE_PA, 1);
 387        __l2c220_cache_sync(base);
 388        raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 389}
 390
 391static void l2c220_clean_range(unsigned long start, unsigned long end)
 392{
 393        void __iomem *base = l2x0_base;
 394        unsigned long flags;
 395
 396        start &= ~(CACHE_LINE_SIZE - 1);
 397        if ((end - start) >= l2x0_size) {
 398                l2c220_op_way(base, L2X0_CLEAN_WAY);
 399                return;
 400        }
 401
 402        raw_spin_lock_irqsave(&l2x0_lock, flags);
 403        flags = l2c220_op_pa_range(base + L2X0_CLEAN_LINE_PA,
 404                                   start, end, flags);
 405        l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
 406        __l2c220_cache_sync(base);
 407        raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 408}
 409
 410static void l2c220_flush_range(unsigned long start, unsigned long end)
 411{
 412        void __iomem *base = l2x0_base;
 413        unsigned long flags;
 414
 415        start &= ~(CACHE_LINE_SIZE - 1);
 416        if ((end - start) >= l2x0_size) {
 417                l2c220_op_way(base, L2X0_CLEAN_INV_WAY);
 418                return;
 419        }
 420
 421        raw_spin_lock_irqsave(&l2x0_lock, flags);
 422        flags = l2c220_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA,
 423                                   start, end, flags);
 424        l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
 425        __l2c220_cache_sync(base);
 426        raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 427}
 428
 429static void l2c220_flush_all(void)
 430{
 431        l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY);
 432}
 433
 434static void l2c220_sync(void)
 435{
 436        unsigned long flags;
 437
 438        raw_spin_lock_irqsave(&l2x0_lock, flags);
 439        __l2c220_cache_sync(l2x0_base);
 440        raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 441}
 442
 443static void l2c220_enable(void __iomem *base, u32 aux, unsigned num_lock)
 444{
 445        /*
 446         * Always enable non-secure access to the lockdown registers -
 447         * we write to them as part of the L2C enable sequence so they
 448         * need to be accessible.
 449         */
 450        aux |= L220_AUX_CTRL_NS_LOCKDOWN;
 451
 452        l2c_enable(base, aux, num_lock);
 453}
 454
 455static const struct l2c_init_data l2c220_data = {
 456        .type = "L2C-220",
 457        .way_size_0 = SZ_8K,
 458        .num_lock = 1,
 459        .enable = l2c220_enable,
 460        .save = l2c_save,
 461        .outer_cache = {
 462                .inv_range = l2c220_inv_range,
 463                .clean_range = l2c220_clean_range,
 464                .flush_range = l2c220_flush_range,
 465                .flush_all = l2c220_flush_all,
 466                .disable = l2c_disable,
 467                .sync = l2c220_sync,
 468                .resume = l2c210_resume,
 469        },
 470};
 471
 472/*
 473 * L2C-310 specific code.
 474 *
 475 * Very similar to L2C-210, the PA, set/way and sync operations are atomic,
 476 * and the way operations are all background tasks.  However, issuing an
 477 * operation while a background operation is in progress results in a
 478 * SLVERR response.  We can reuse:
 479 *
 480 *  __l2c210_cache_sync (using sync_reg_offset)
 481 *  l2c210_sync
 482 *  l2c210_inv_range (if 588369 is not applicable)
 483 *  l2c210_clean_range
 484 *  l2c210_flush_range (if 588369 is not applicable)
 485 *  l2c210_flush_all (if 727915 is not applicable)
 486 *
 487 * Errata:
 488 * 588369: PL310 R0P0->R1P0, fixed R2P0.
 489 *      Affects: all clean+invalidate operations
 490 *      clean and invalidate skips the invalidate step, so we need to issue
 491 *      separate operations.  We also require the above debug workaround
 492 *      enclosing this code fragment on affected parts.  On unaffected parts,
 493 *      we must not use this workaround without the debug register writes
 494 *      to avoid exposing a problem similar to 727915.
 495 *
 496 * 727915: PL310 R2P0->R3P0, fixed R3P1.
 497 *      Affects: clean+invalidate by way
 498 *      clean and invalidate by way runs in the background, and a store can
 499 *      hit the line between the clean operation and invalidate operation,
 500 *      resulting in the store being lost.
 501 *
 502 * 752271: PL310 R3P0->R3P1-50REL0, fixed R3P2.
 503 *      Affects: 8x64-bit (double fill) line fetches
 504 *      double fill line fetches can fail to cause dirty data to be evicted
 505 *      from the cache before the new data overwrites the second line.
 506 *
 507 * 753970: PL310 R3P0, fixed R3P1.
 508 *      Affects: sync
 509 *      prevents merging writes after the sync operation, until another L2C
 510 *      operation is performed (or a number of other conditions.)
 511 *
 512 * 769419: PL310 R0P0->R3P1, fixed R3P2.
 513 *      Affects: store buffer
 514 *      store buffer is not automatically drained.
 515 */
 516static void l2c310_inv_range_erratum(unsigned long start, unsigned long end)
 517{
 518        void __iomem *base = l2x0_base;
 519
 520        if ((start | end) & (CACHE_LINE_SIZE - 1)) {
 521                unsigned long flags;
 522
 523                /* Erratum 588369 for both clean+invalidate operations */
 524                raw_spin_lock_irqsave(&l2x0_lock, flags);
 525                l2c_set_debug(base, 0x03);
 526
 527                if (start & (CACHE_LINE_SIZE - 1)) {
 528                        start &= ~(CACHE_LINE_SIZE - 1);
 529                        writel_relaxed(start, base + L2X0_CLEAN_LINE_PA);
 530                        writel_relaxed(start, base + L2X0_INV_LINE_PA);
 531                        start += CACHE_LINE_SIZE;
 532                }
 533
 534                if (end & (CACHE_LINE_SIZE - 1)) {
 535                        end &= ~(CACHE_LINE_SIZE - 1);
 536                        writel_relaxed(end, base + L2X0_CLEAN_LINE_PA);
 537                        writel_relaxed(end, base + L2X0_INV_LINE_PA);
 538                }
 539
 540                l2c_set_debug(base, 0x00);
 541                raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 542        }
 543
 544        __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end);
 545        __l2c210_cache_sync(base);
 546}
 547
 548static void l2c310_flush_range_erratum(unsigned long start, unsigned long end)
 549{
 550        raw_spinlock_t *lock = &l2x0_lock;
 551        unsigned long flags;
 552        void __iomem *base = l2x0_base;
 553
 554        raw_spin_lock_irqsave(lock, flags);
 555        while (start < end) {
 556                unsigned long blk_end = start + min(end - start, 4096UL);
 557
 558                l2c_set_debug(base, 0x03);
 559                while (start < blk_end) {
 560                        writel_relaxed(start, base + L2X0_CLEAN_LINE_PA);
 561                        writel_relaxed(start, base + L2X0_INV_LINE_PA);
 562                        start += CACHE_LINE_SIZE;
 563                }
 564                l2c_set_debug(base, 0x00);
 565
 566                if (blk_end < end) {
 567                        raw_spin_unlock_irqrestore(lock, flags);
 568                        raw_spin_lock_irqsave(lock, flags);
 569                }
 570        }
 571        raw_spin_unlock_irqrestore(lock, flags);
 572        __l2c210_cache_sync(base);
 573}
 574
 575static void l2c310_flush_all_erratum(void)
 576{
 577        void __iomem *base = l2x0_base;
 578        unsigned long flags;
 579
 580        raw_spin_lock_irqsave(&l2x0_lock, flags);
 581        l2c_set_debug(base, 0x03);
 582        __l2c_op_way(base + L2X0_CLEAN_INV_WAY);
 583        l2c_set_debug(base, 0x00);
 584        __l2c210_cache_sync(base);
 585        raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 586}
 587
 588static void __init l2c310_save(void __iomem *base)
 589{
 590        unsigned revision;
 591
 592        l2c_save(base);
 593
 594        l2x0_saved_regs.tag_latency = readl_relaxed(base +
 595                L310_TAG_LATENCY_CTRL);
 596        l2x0_saved_regs.data_latency = readl_relaxed(base +
 597                L310_DATA_LATENCY_CTRL);
 598        l2x0_saved_regs.filter_end = readl_relaxed(base +
 599                L310_ADDR_FILTER_END);
 600        l2x0_saved_regs.filter_start = readl_relaxed(base +
 601                L310_ADDR_FILTER_START);
 602
 603        revision = readl_relaxed(base + L2X0_CACHE_ID) &
 604                        L2X0_CACHE_ID_RTL_MASK;
 605
 606        /* From r2p0, there is Prefetch offset/control register */
 607        if (revision >= L310_CACHE_ID_RTL_R2P0)
 608                l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base +
 609                                                        L310_PREFETCH_CTRL);
 610
 611        /* From r3p0, there is Power control register */
 612        if (revision >= L310_CACHE_ID_RTL_R3P0)
 613                l2x0_saved_regs.pwr_ctrl = readl_relaxed(base +
 614                                                        L310_POWER_CTRL);
 615}
 616
 617static void l2c310_resume(void)
 618{
 619        void __iomem *base = l2x0_base;
 620
 621        if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
 622                unsigned revision;
 623
 624                /* restore pl310 setup */
 625                writel_relaxed(l2x0_saved_regs.tag_latency,
 626                               base + L310_TAG_LATENCY_CTRL);
 627                writel_relaxed(l2x0_saved_regs.data_latency,
 628                               base + L310_DATA_LATENCY_CTRL);
 629                writel_relaxed(l2x0_saved_regs.filter_end,
 630                               base + L310_ADDR_FILTER_END);
 631                writel_relaxed(l2x0_saved_regs.filter_start,
 632                               base + L310_ADDR_FILTER_START);
 633
 634                revision = readl_relaxed(base + L2X0_CACHE_ID) &
 635                                L2X0_CACHE_ID_RTL_MASK;
 636
 637                if (revision >= L310_CACHE_ID_RTL_R2P0)
 638                        l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base,
 639                                      L310_PREFETCH_CTRL);
 640                if (revision >= L310_CACHE_ID_RTL_R3P0)
 641                        l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base,
 642                                      L310_POWER_CTRL);
 643
 644                l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
 645
 646                /* Re-enable full-line-of-zeros for Cortex-A9 */
 647                if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
 648                        set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
 649        }
 650}
 651
 652static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, void *data)
 653{
 654        switch (act & ~CPU_TASKS_FROZEN) {
 655        case CPU_STARTING:
 656                set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
 657                break;
 658        case CPU_DYING:
 659                set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1)));
 660                break;
 661        }
 662        return NOTIFY_OK;
 663}
 664
 665static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
 666{
 667        unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK;
 668        bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
 669
 670        if (rev >= L310_CACHE_ID_RTL_R2P0) {
 671                if (cortex_a9) {
 672                        aux |= L310_AUX_CTRL_EARLY_BRESP;
 673                        pr_info("L2C-310 enabling early BRESP for Cortex-A9\n");
 674                } else if (aux & L310_AUX_CTRL_EARLY_BRESP) {
 675                        pr_warn("L2C-310 early BRESP only supported with Cortex-A9\n");
 676                        aux &= ~L310_AUX_CTRL_EARLY_BRESP;
 677                }
 678        }
 679
 680        if (cortex_a9) {
 681                u32 aux_cur = readl_relaxed(base + L2X0_AUX_CTRL);
 682                u32 acr = get_auxcr();
 683
 684                pr_debug("Cortex-A9 ACR=0x%08x\n", acr);
 685
 686                if (acr & BIT(3) && !(aux_cur & L310_AUX_CTRL_FULL_LINE_ZERO))
 687                        pr_err("L2C-310: full line of zeros enabled in Cortex-A9 but not L2C-310 - invalid\n");
 688
 689                if (aux & L310_AUX_CTRL_FULL_LINE_ZERO && !(acr & BIT(3)))
 690                        pr_err("L2C-310: enabling full line of zeros but not enabled in Cortex-A9\n");
 691
 692                if (!(aux & L310_AUX_CTRL_FULL_LINE_ZERO) && !outer_cache.write_sec) {
 693                        aux |= L310_AUX_CTRL_FULL_LINE_ZERO;
 694                        pr_info("L2C-310 full line of zeros enabled for Cortex-A9\n");
 695                }
 696        } else if (aux & (L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP)) {
 697                pr_err("L2C-310: disabling Cortex-A9 specific feature bits\n");
 698                aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP);
 699        }
 700
 701        if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) {
 702                u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL);
 703
 704                pr_info("L2C-310 %s%s prefetch enabled, offset %u lines\n",
 705                        aux & L310_AUX_CTRL_INSTR_PREFETCH ? "I" : "",
 706                        aux & L310_AUX_CTRL_DATA_PREFETCH ? "D" : "",
 707                        1 + (prefetch & L310_PREFETCH_CTRL_OFFSET_MASK));
 708        }
 709
 710        /* r3p0 or later has power control register */
 711        if (rev >= L310_CACHE_ID_RTL_R3P0) {
 712                u32 power_ctrl;
 713
 714                l2c_write_sec(L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN,
 715                              base, L310_POWER_CTRL);
 716                power_ctrl = readl_relaxed(base + L310_POWER_CTRL);
 717                pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n",
 718                        power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis",
 719                        power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis");
 720        }
 721
 722        /*
 723         * Always enable non-secure access to the lockdown registers -
 724         * we write to them as part of the L2C enable sequence so they
 725         * need to be accessible.
 726         */
 727        aux |= L310_AUX_CTRL_NS_LOCKDOWN;
 728
 729        l2c_enable(base, aux, num_lock);
 730
 731        if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) {
 732                set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
 733                cpu_notifier(l2c310_cpu_enable_flz, 0);
 734        }
 735}
 736
 737static void __init l2c310_fixup(void __iomem *base, u32 cache_id,
 738        struct outer_cache_fns *fns)
 739{
 740        unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK;
 741        const char *errata[8];
 742        unsigned n = 0;
 743
 744        if (IS_ENABLED(CONFIG_PL310_ERRATA_588369) &&
 745            revision < L310_CACHE_ID_RTL_R2P0 &&
 746            /* For bcm compatibility */
 747            fns->inv_range == l2c210_inv_range) {
 748                fns->inv_range = l2c310_inv_range_erratum;
 749                fns->flush_range = l2c310_flush_range_erratum;
 750                errata[n++] = "588369";
 751        }
 752
 753        if (IS_ENABLED(CONFIG_PL310_ERRATA_727915) &&
 754            revision >= L310_CACHE_ID_RTL_R2P0 &&
 755            revision < L310_CACHE_ID_RTL_R3P1) {
 756                fns->flush_all = l2c310_flush_all_erratum;
 757                errata[n++] = "727915";
 758        }
 759
 760        if (revision >= L310_CACHE_ID_RTL_R3P0 &&
 761            revision < L310_CACHE_ID_RTL_R3P2) {
 762                u32 val = readl_relaxed(base + L310_PREFETCH_CTRL);
 763                /* I don't think bit23 is required here... but iMX6 does so */
 764                if (val & (BIT(30) | BIT(23))) {
 765                        val &= ~(BIT(30) | BIT(23));
 766                        l2c_write_sec(val, base, L310_PREFETCH_CTRL);
 767                        errata[n++] = "752271";
 768                }
 769        }
 770
 771        if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) &&
 772            revision == L310_CACHE_ID_RTL_R3P0) {
 773                sync_reg_offset = L2X0_DUMMY_REG;
 774                errata[n++] = "753970";
 775        }
 776
 777        if (IS_ENABLED(CONFIG_PL310_ERRATA_769419))
 778                errata[n++] = "769419";
 779
 780        if (n) {
 781                unsigned i;
 782
 783                pr_info("L2C-310 errat%s", n > 1 ? "a" : "um");
 784                for (i = 0; i < n; i++)
 785                        pr_cont(" %s", errata[i]);
 786                pr_cont(" enabled\n");
 787        }
 788}
 789
 790static void l2c310_disable(void)
 791{
 792        /*
 793         * If full-line-of-zeros is enabled, we must first disable it in the
 794         * Cortex-A9 auxiliary control register before disabling the L2 cache.
 795         */
 796        if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
 797                set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1)));
 798
 799        l2c_disable();
 800}
 801
 802static const struct l2c_init_data l2c310_init_fns __initconst = {
 803        .type = "L2C-310",
 804        .way_size_0 = SZ_8K,
 805        .num_lock = 8,
 806        .enable = l2c310_enable,
 807        .fixup = l2c310_fixup,
 808        .save = l2c310_save,
 809        .outer_cache = {
 810                .inv_range = l2c210_inv_range,
 811                .clean_range = l2c210_clean_range,
 812                .flush_range = l2c210_flush_range,
 813                .flush_all = l2c210_flush_all,
 814                .disable = l2c310_disable,
 815                .sync = l2c210_sync,
 816                .resume = l2c310_resume,
 817        },
 818};
 819
 820static void __init __l2c_init(const struct l2c_init_data *data,
 821        u32 aux_val, u32 aux_mask, u32 cache_id)
 822{
 823        struct outer_cache_fns fns;
 824        unsigned way_size_bits, ways;
 825        u32 aux, old_aux;
 826
 827        /*
 828         * Sanity check the aux values.  aux_mask is the bits we preserve
 829         * from reading the hardware register, and aux_val is the bits we
 830         * set.
 831         */
 832        if (aux_val & aux_mask)
 833                pr_alert("L2C: platform provided aux values permit register corruption.\n");
 834
 835        old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
 836        aux &= aux_mask;
 837        aux |= aux_val;
 838
 839        if (old_aux != aux)
 840                pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
 841                        old_aux, aux);
 842
 843        /* Determine the number of ways */
 844        switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
 845        case L2X0_CACHE_ID_PART_L310:
 846                if ((aux_val | ~aux_mask) & (L2C_AUX_CTRL_WAY_SIZE_MASK | L310_AUX_CTRL_ASSOCIATIVITY_16))
 847                        pr_warn("L2C: DT/platform tries to modify or specify cache size\n");
 848                if (aux & (1 << 16))
 849                        ways = 16;
 850                else
 851                        ways = 8;
 852                break;
 853
 854        case L2X0_CACHE_ID_PART_L210:
 855        case L2X0_CACHE_ID_PART_L220:
 856                ways = (aux >> 13) & 0xf;
 857                break;
 858
 859        case AURORA_CACHE_ID:
 860                ways = (aux >> 13) & 0xf;
 861                ways = 2 << ((ways + 1) >> 2);
 862                break;
 863
 864        default:
 865                /* Assume unknown chips have 8 ways */
 866                ways = 8;
 867                break;
 868        }
 869
 870        l2x0_way_mask = (1 << ways) - 1;
 871
 872        /*
 873         * way_size_0 is the size that a way_size value of zero would be
 874         * given the calculation: way_size = way_size_0 << way_size_bits.
 875         * So, if way_size_bits=0 is reserved, but way_size_bits=1 is 16k,
 876         * then way_size_0 would be 8k.
 877         *
 878         * L2 cache size = number of ways * way size.
 879         */
 880        way_size_bits = (aux & L2C_AUX_CTRL_WAY_SIZE_MASK) >>
 881                        L2C_AUX_CTRL_WAY_SIZE_SHIFT;
 882        l2x0_size = ways * (data->way_size_0 << way_size_bits);
 883
 884        fns = data->outer_cache;
 885        fns.write_sec = outer_cache.write_sec;
 886        if (data->fixup)
 887                data->fixup(l2x0_base, cache_id, &fns);
 888
 889        /*
 890         * Check if l2x0 controller is already enabled.  If we are booting
 891         * in non-secure mode accessing the below registers will fault.
 892         */
 893        if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
 894                data->enable(l2x0_base, aux, data->num_lock);
 895
 896        outer_cache = fns;
 897
 898        /*
 899         * It is strange to save the register state before initialisation,
 900         * but hey, this is what the DT implementations decided to do.
 901         */
 902        if (data->save)
 903                data->save(l2x0_base);
 904
 905        /* Re-read it in case some bits are reserved. */
 906        aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
 907
 908        pr_info("%s cache controller enabled, %d ways, %d kB\n",
 909                data->type, ways, l2x0_size >> 10);
 910        pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n",
 911                data->type, cache_id, aux);
 912}
 913
 914void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 915{
 916        const struct l2c_init_data *data;
 917        u32 cache_id;
 918
 919        l2x0_base = base;
 920
 921        cache_id = readl_relaxed(base + L2X0_CACHE_ID);
 922
 923        switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
 924        default:
 925        case L2X0_CACHE_ID_PART_L210:
 926                data = &l2c210_data;
 927                break;
 928
 929        case L2X0_CACHE_ID_PART_L220:
 930                data = &l2c220_data;
 931                break;
 932
 933        case L2X0_CACHE_ID_PART_L310:
 934                data = &l2c310_init_fns;
 935                break;
 936        }
 937
 938        __l2c_init(data, aux_val, aux_mask, cache_id);
 939}
 940
 941#ifdef CONFIG_OF
 942static int l2_wt_override;
 943
 944/* Aurora don't have the cache ID register available, so we have to
 945 * pass it though the device tree */
 946static u32 cache_id_part_number_from_dt;
 947
 948static void __init l2x0_of_parse(const struct device_node *np,
 949                                 u32 *aux_val, u32 *aux_mask)
 950{
 951        u32 data[2] = { 0, 0 };
 952        u32 tag = 0;
 953        u32 dirty = 0;
 954        u32 val = 0, mask = 0;
 955
 956        of_property_read_u32(np, "arm,tag-latency", &tag);
 957        if (tag) {
 958                mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
 959                val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
 960        }
 961
 962        of_property_read_u32_array(np, "arm,data-latency",
 963                                   data, ARRAY_SIZE(data));
 964        if (data[0] && data[1]) {
 965                mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
 966                        L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
 967                val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
 968                       ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
 969        }
 970
 971        of_property_read_u32(np, "arm,dirty-latency", &dirty);
 972        if (dirty) {
 973                mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
 974                val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
 975        }
 976
 977        *aux_val &= ~mask;
 978        *aux_val |= val;
 979        *aux_mask &= ~mask;
 980}
 981
 982static const struct l2c_init_data of_l2c210_data __initconst = {
 983        .type = "L2C-210",
 984        .way_size_0 = SZ_8K,
 985        .num_lock = 1,
 986        .of_parse = l2x0_of_parse,
 987        .enable = l2c_enable,
 988        .save = l2c_save,
 989        .outer_cache = {
 990                .inv_range   = l2c210_inv_range,
 991                .clean_range = l2c210_clean_range,
 992                .flush_range = l2c210_flush_range,
 993                .flush_all   = l2c210_flush_all,
 994                .disable     = l2c_disable,
 995                .sync        = l2c210_sync,
 996                .resume      = l2c210_resume,
 997        },
 998};
 999
1000static const struct l2c_init_data of_l2c220_data __initconst = {
1001        .type = "L2C-220",
1002        .way_size_0 = SZ_8K,
1003        .num_lock = 1,
1004        .of_parse = l2x0_of_parse,
1005        .enable = l2c220_enable,
1006        .save = l2c_save,
1007        .outer_cache = {
1008                .inv_range   = l2c220_inv_range,
1009                .clean_range = l2c220_clean_range,
1010                .flush_range = l2c220_flush_range,
1011                .flush_all   = l2c220_flush_all,
1012                .disable     = l2c_disable,
1013                .sync        = l2c220_sync,
1014                .resume      = l2c210_resume,
1015        },
1016};
1017
1018static void __init l2c310_of_parse(const struct device_node *np,
1019        u32 *aux_val, u32 *aux_mask)
1020{
1021        u32 data[3] = { 0, 0, 0 };
1022        u32 tag[3] = { 0, 0, 0 };
1023        u32 filter[2] = { 0, 0 };
1024
1025        of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
1026        if (tag[0] && tag[1] && tag[2])
1027                writel_relaxed(
1028                        L310_LATENCY_CTRL_RD(tag[0] - 1) |
1029                        L310_LATENCY_CTRL_WR(tag[1] - 1) |
1030                        L310_LATENCY_CTRL_SETUP(tag[2] - 1),
1031                        l2x0_base + L310_TAG_LATENCY_CTRL);
1032
1033        of_property_read_u32_array(np, "arm,data-latency",
1034                                   data, ARRAY_SIZE(data));
1035        if (data[0] && data[1] && data[2])
1036                writel_relaxed(
1037                        L310_LATENCY_CTRL_RD(data[0] - 1) |
1038                        L310_LATENCY_CTRL_WR(data[1] - 1) |
1039                        L310_LATENCY_CTRL_SETUP(data[2] - 1),
1040                        l2x0_base + L310_DATA_LATENCY_CTRL);
1041
1042        of_property_read_u32_array(np, "arm,filter-ranges",
1043                                   filter, ARRAY_SIZE(filter));
1044        if (filter[1]) {
1045                writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
1046                               l2x0_base + L310_ADDR_FILTER_END);
1047                writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN,
1048                               l2x0_base + L310_ADDR_FILTER_START);
1049        }
1050}
1051
1052static const struct l2c_init_data of_l2c310_data __initconst = {
1053        .type = "L2C-310",
1054        .way_size_0 = SZ_8K,
1055        .num_lock = 8,
1056        .of_parse = l2c310_of_parse,
1057        .enable = l2c310_enable,
1058        .fixup = l2c310_fixup,
1059        .save  = l2c310_save,
1060        .outer_cache = {
1061                .inv_range   = l2c210_inv_range,
1062                .clean_range = l2c210_clean_range,
1063                .flush_range = l2c210_flush_range,
1064                .flush_all   = l2c210_flush_all,
1065                .disable     = l2c310_disable,
1066                .sync        = l2c210_sync,
1067                .resume      = l2c310_resume,
1068        },
1069};
1070
1071/*
1072 * This is a variant of the of_l2c310_data with .sync set to
1073 * NULL. Outer sync operations are not needed when the system is I/O
1074 * coherent, and potentially harmful in certain situations (PCIe/PL310
1075 * deadlock on Armada 375/38x due to hardware I/O coherency). The
1076 * other operations are kept because they are infrequent (therefore do
1077 * not cause the deadlock in practice) and needed for secondary CPU
1078 * boot and other power management activities.
1079 */
1080static const struct l2c_init_data of_l2c310_coherent_data __initconst = {
1081        .type = "L2C-310 Coherent",
1082        .way_size_0 = SZ_8K,
1083        .num_lock = 8,
1084        .of_parse = l2c310_of_parse,
1085        .enable = l2c310_enable,
1086        .fixup = l2c310_fixup,
1087        .save  = l2c310_save,
1088        .outer_cache = {
1089                .inv_range   = l2c210_inv_range,
1090                .clean_range = l2c210_clean_range,
1091                .flush_range = l2c210_flush_range,
1092                .flush_all   = l2c210_flush_all,
1093                .disable     = l2c310_disable,
1094                .resume      = l2c310_resume,
1095        },
1096};
1097
1098/*
1099 * Note that the end addresses passed to Linux primitives are
1100 * noninclusive, while the hardware cache range operations use
1101 * inclusive start and end addresses.
1102 */
1103static unsigned long calc_range_end(unsigned long start, unsigned long end)
1104{
1105        /*
1106         * Limit the number of cache lines processed at once,
1107         * since cache range operations stall the CPU pipeline
1108         * until completion.
1109         */
1110        if (end > start + MAX_RANGE_SIZE)
1111                end = start + MAX_RANGE_SIZE;
1112
1113        /*
1114         * Cache range operations can't straddle a page boundary.
1115         */
1116        if (end > PAGE_ALIGN(start+1))
1117                end = PAGE_ALIGN(start+1);
1118
1119        return end;
1120}
1121
1122/*
1123 * Make sure 'start' and 'end' reference the same page, as L2 is PIPT
1124 * and range operations only do a TLB lookup on the start address.
1125 */
1126static void aurora_pa_range(unsigned long start, unsigned long end,
1127                        unsigned long offset)
1128{
1129        unsigned long flags;
1130
1131        raw_spin_lock_irqsave(&l2x0_lock, flags);
1132        writel_relaxed(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG);
1133        writel_relaxed(end, l2x0_base + offset);
1134        raw_spin_unlock_irqrestore(&l2x0_lock, flags);
1135
1136        cache_sync();
1137}
1138
1139static void aurora_inv_range(unsigned long start, unsigned long end)
1140{
1141        /*
1142         * round start and end adresses up to cache line size
1143         */
1144        start &= ~(CACHE_LINE_SIZE - 1);
1145        end = ALIGN(end, CACHE_LINE_SIZE);
1146
1147        /*
1148         * Invalidate all full cache lines between 'start' and 'end'.
1149         */
1150        while (start < end) {
1151                unsigned long range_end = calc_range_end(start, end);
1152                aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
1153                                AURORA_INVAL_RANGE_REG);
1154                start = range_end;
1155        }
1156}
1157
1158static void aurora_clean_range(unsigned long start, unsigned long end)
1159{
1160        /*
1161         * If L2 is forced to WT, the L2 will always be clean and we
1162         * don't need to do anything here.
1163         */
1164        if (!l2_wt_override) {
1165                start &= ~(CACHE_LINE_SIZE - 1);
1166                end = ALIGN(end, CACHE_LINE_SIZE);
1167                while (start != end) {
1168                        unsigned long range_end = calc_range_end(start, end);
1169                        aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
1170                                        AURORA_CLEAN_RANGE_REG);
1171                        start = range_end;
1172                }
1173        }
1174}
1175
1176static void aurora_flush_range(unsigned long start, unsigned long end)
1177{
1178        start &= ~(CACHE_LINE_SIZE - 1);
1179        end = ALIGN(end, CACHE_LINE_SIZE);
1180        while (start != end) {
1181                unsigned long range_end = calc_range_end(start, end);
1182                /*
1183                 * If L2 is forced to WT, the L2 will always be clean and we
1184                 * just need to invalidate.
1185                 */
1186                if (l2_wt_override)
1187                        aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
1188                                                        AURORA_INVAL_RANGE_REG);
1189                else
1190                        aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
1191                                                        AURORA_FLUSH_RANGE_REG);
1192                start = range_end;
1193        }
1194}
1195
1196static void aurora_save(void __iomem *base)
1197{
1198        l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
1199        l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL);
1200}
1201
1202static void aurora_resume(void)
1203{
1204        void __iomem *base = l2x0_base;
1205
1206        if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
1207                writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL);
1208                writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL);
1209        }
1210}
1211
1212/*
1213 * For Aurora cache in no outer mode, enable via the CP15 coprocessor
1214 * broadcasting of cache commands to L2.
1215 */
1216static void __init aurora_enable_no_outer(void __iomem *base, u32 aux,
1217        unsigned num_lock)
1218{
1219        u32 u;
1220
1221        asm volatile("mrc p15, 1, %0, c15, c2, 0" : "=r" (u));
1222        u |= AURORA_CTRL_FW;            /* Set the FW bit */
1223        asm volatile("mcr p15, 1, %0, c15, c2, 0" : : "r" (u));
1224
1225        isb();
1226
1227        l2c_enable(base, aux, num_lock);
1228}
1229
1230static void __init aurora_fixup(void __iomem *base, u32 cache_id,
1231        struct outer_cache_fns *fns)
1232{
1233        sync_reg_offset = AURORA_SYNC_REG;
1234}
1235
1236static void __init aurora_of_parse(const struct device_node *np,
1237                                u32 *aux_val, u32 *aux_mask)
1238{
1239        u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU;
1240        u32 mask =  AURORA_ACR_REPLACEMENT_MASK;
1241
1242        of_property_read_u32(np, "cache-id-part",
1243                        &cache_id_part_number_from_dt);
1244
1245        /* Determine and save the write policy */
1246        l2_wt_override = of_property_read_bool(np, "wt-override");
1247
1248        if (l2_wt_override) {
1249                val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY;
1250                mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
1251        }
1252
1253        *aux_val &= ~mask;
1254        *aux_val |= val;
1255        *aux_mask &= ~mask;
1256}
1257
1258static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
1259        .type = "Aurora",
1260        .way_size_0 = SZ_4K,
1261        .num_lock = 4,
1262        .of_parse = aurora_of_parse,
1263        .enable = l2c_enable,
1264        .fixup = aurora_fixup,
1265        .save  = aurora_save,
1266        .outer_cache = {
1267                .inv_range   = aurora_inv_range,
1268                .clean_range = aurora_clean_range,
1269                .flush_range = aurora_flush_range,
1270                .flush_all   = l2x0_flush_all,
1271                .disable     = l2x0_disable,
1272                .sync        = l2x0_cache_sync,
1273                .resume      = aurora_resume,
1274        },
1275};
1276
1277static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
1278        .type = "Aurora",
1279        .way_size_0 = SZ_4K,
1280        .num_lock = 4,
1281        .of_parse = aurora_of_parse,
1282        .enable = aurora_enable_no_outer,
1283        .fixup = aurora_fixup,
1284        .save  = aurora_save,
1285        .outer_cache = {
1286                .resume      = aurora_resume,
1287        },
1288};
1289
1290/*
1291 * For certain Broadcom SoCs, depending on the address range, different offsets
1292 * need to be added to the address before passing it to L2 for
1293 * invalidation/clean/flush
1294 *
1295 * Section Address Range              Offset        EMI
1296 *   1     0x00000000 - 0x3FFFFFFF    0x80000000    VC
1297 *   2     0x40000000 - 0xBFFFFFFF    0x40000000    SYS
1298 *   3     0xC0000000 - 0xFFFFFFFF    0x80000000    VC
1299 *
1300 * When the start and end addresses have crossed two different sections, we
1301 * need to break the L2 operation into two, each within its own section.
1302 * For example, if we need to invalidate addresses starts at 0xBFFF0000 and
1303 * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2)
1304 * 0xC0000000 - 0xC0001000
1305 *
1306 * Note 1:
1307 * By breaking a single L2 operation into two, we may potentially suffer some
1308 * performance hit, but keep in mind the cross section case is very rare
1309 *
1310 * Note 2:
1311 * We do not need to handle the case when the start address is in
1312 * Section 1 and the end address is in Section 3, since it is not a valid use
1313 * case
1314 *
1315 * Note 3:
1316 * Section 1 in practical terms can no longer be used on rev A2. Because of
1317 * that the code does not need to handle section 1 at all.
1318 *
1319 */
1320#define BCM_SYS_EMI_START_ADDR        0x40000000UL
1321#define BCM_VC_EMI_SEC3_START_ADDR    0xC0000000UL
1322
1323#define BCM_SYS_EMI_OFFSET            0x40000000UL
1324#define BCM_VC_EMI_OFFSET             0x80000000UL
1325
1326static inline int bcm_addr_is_sys_emi(unsigned long addr)
1327{
1328        return (addr >= BCM_SYS_EMI_START_ADDR) &&
1329                (addr < BCM_VC_EMI_SEC3_START_ADDR);
1330}
1331
1332static inline unsigned long bcm_l2_phys_addr(unsigned long addr)
1333{
1334        if (bcm_addr_is_sys_emi(addr))
1335                return addr + BCM_SYS_EMI_OFFSET;
1336        else
1337                return addr + BCM_VC_EMI_OFFSET;
1338}
1339
1340static void bcm_inv_range(unsigned long start, unsigned long end)
1341{
1342        unsigned long new_start, new_end;
1343
1344        BUG_ON(start < BCM_SYS_EMI_START_ADDR);
1345
1346        if (unlikely(end <= start))
1347                return;
1348
1349        new_start = bcm_l2_phys_addr(start);
1350        new_end = bcm_l2_phys_addr(end);
1351
1352        /* normal case, no cross section between start and end */
1353        if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
1354                l2c210_inv_range(new_start, new_end);
1355                return;
1356        }
1357
1358        /* They cross sections, so it can only be a cross from section
1359         * 2 to section 3
1360         */
1361        l2c210_inv_range(new_start,
1362                bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
1363        l2c210_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
1364                new_end);
1365}
1366
1367static void bcm_clean_range(unsigned long start, unsigned long end)
1368{
1369        unsigned long new_start, new_end;
1370
1371        BUG_ON(start < BCM_SYS_EMI_START_ADDR);
1372
1373        if (unlikely(end <= start))
1374                return;
1375
1376        new_start = bcm_l2_phys_addr(start);
1377        new_end = bcm_l2_phys_addr(end);
1378
1379        /* normal case, no cross section between start and end */
1380        if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
1381                l2c210_clean_range(new_start, new_end);
1382                return;
1383        }
1384
1385        /* They cross sections, so it can only be a cross from section
1386         * 2 to section 3
1387         */
1388        l2c210_clean_range(new_start,
1389                bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
1390        l2c210_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
1391                new_end);
1392}
1393
1394static void bcm_flush_range(unsigned long start, unsigned long end)
1395{
1396        unsigned long new_start, new_end;
1397
1398        BUG_ON(start < BCM_SYS_EMI_START_ADDR);
1399
1400        if (unlikely(end <= start))
1401                return;
1402
1403        if ((end - start) >= l2x0_size) {
1404                outer_cache.flush_all();
1405                return;
1406        }
1407
1408        new_start = bcm_l2_phys_addr(start);
1409        new_end = bcm_l2_phys_addr(end);
1410
1411        /* normal case, no cross section between start and end */
1412        if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
1413                l2c210_flush_range(new_start, new_end);
1414                return;
1415        }
1416
1417        /* They cross sections, so it can only be a cross from section
1418         * 2 to section 3
1419         */
1420        l2c210_flush_range(new_start,
1421                bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
1422        l2c210_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
1423                new_end);
1424}
1425
1426/* Broadcom L2C-310 start from ARMs R3P2 or later, and require no fixups */
1427static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
1428        .type = "BCM-L2C-310",
1429        .way_size_0 = SZ_8K,
1430        .num_lock = 8,
1431        .of_parse = l2c310_of_parse,
1432        .enable = l2c310_enable,
1433        .save  = l2c310_save,
1434        .outer_cache = {
1435                .inv_range   = bcm_inv_range,
1436                .clean_range = bcm_clean_range,
1437                .flush_range = bcm_flush_range,
1438                .flush_all   = l2c210_flush_all,
1439                .disable     = l2c310_disable,
1440                .sync        = l2c210_sync,
1441                .resume      = l2c310_resume,
1442        },
1443};
1444
1445static void __init tauros3_save(void __iomem *base)
1446{
1447        l2c_save(base);
1448
1449        l2x0_saved_regs.aux2_ctrl =
1450                readl_relaxed(base + TAUROS3_AUX2_CTRL);
1451        l2x0_saved_regs.prefetch_ctrl =
1452                readl_relaxed(base + L310_PREFETCH_CTRL);
1453}
1454
1455static void tauros3_resume(void)
1456{
1457        void __iomem *base = l2x0_base;
1458
1459        if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
1460                writel_relaxed(l2x0_saved_regs.aux2_ctrl,
1461                               base + TAUROS3_AUX2_CTRL);
1462                writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
1463                               base + L310_PREFETCH_CTRL);
1464
1465                l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
1466        }
1467}
1468
1469static const struct l2c_init_data of_tauros3_data __initconst = {
1470        .type = "Tauros3",
1471        .way_size_0 = SZ_8K,
1472        .num_lock = 8,
1473        .enable = l2c_enable,
1474        .save  = tauros3_save,
1475        /* Tauros3 broadcasts L1 cache operations to L2 */
1476        .outer_cache = {
1477                .resume      = tauros3_resume,
1478        },
1479};
1480
1481#define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns }
1482static const struct of_device_id l2x0_ids[] __initconst = {
1483        L2C_ID("arm,l210-cache", of_l2c210_data),
1484        L2C_ID("arm,l220-cache", of_l2c220_data),
1485        L2C_ID("arm,pl310-cache", of_l2c310_data),
1486        L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
1487        L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data),
1488        L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data),
1489        L2C_ID("marvell,tauros3-cache", of_tauros3_data),
1490        /* Deprecated IDs */
1491        L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
1492        {}
1493};
1494
1495int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
1496{
1497        const struct l2c_init_data *data;
1498        struct device_node *np;
1499        struct resource res;
1500        u32 cache_id, old_aux;
1501
1502        np = of_find_matching_node(NULL, l2x0_ids);
1503        if (!np)
1504                return -ENODEV;
1505
1506        if (of_address_to_resource(np, 0, &res))
1507                return -ENODEV;
1508
1509        l2x0_base = ioremap(res.start, resource_size(&res));
1510        if (!l2x0_base)
1511                return -ENOMEM;
1512
1513        l2x0_saved_regs.phy_base = res.start;
1514
1515        data = of_match_node(l2x0_ids, np)->data;
1516
1517        if (of_device_is_compatible(np, "arm,pl310-cache") &&
1518            of_property_read_bool(np, "arm,io-coherent"))
1519                data = &of_l2c310_coherent_data;
1520
1521        old_aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
1522        if (old_aux != ((old_aux & aux_mask) | aux_val)) {
1523                pr_warn("L2C: platform modifies aux control register: 0x%08x -> 0x%08x\n",
1524                        old_aux, (old_aux & aux_mask) | aux_val);
1525        } else if (aux_mask != ~0U && aux_val != 0) {
1526                pr_alert("L2C: platform provided aux values match the hardware, so have no effect.  Please remove them.\n");
1527        }
1528
1529        /* All L2 caches are unified, so this property should be specified */
1530        if (!of_property_read_bool(np, "cache-unified"))
1531                pr_err("L2C: device tree omits to specify unified cache\n");
1532
1533        /* L2 configuration can only be changed if the cache is disabled */
1534        if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
1535                if (data->of_parse)
1536                        data->of_parse(np, &aux_val, &aux_mask);
1537
1538        if (cache_id_part_number_from_dt)
1539                cache_id = cache_id_part_number_from_dt;
1540        else
1541                cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
1542
1543        __l2c_init(data, aux_val, aux_mask, cache_id);
1544
1545        return 0;
1546}
1547#endif
1548