linux/arch/arc/mm/cache_arc700.c
<<
>>
Prefs
   1/*
   2 * ARC700 VIPT Cache Management
   3 *
   4 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 *
  10 *  vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs
  11 *   -flush_cache_dup_mm (fork)
  12 *   -likewise for flush_cache_mm (exit/execve)
  13 *   -likewise for flush_cache_range,flush_cache_page (munmap, exit, COW-break)
  14 *
  15 * vineetg: Apr 2011
  16 *  -Now that MMU can support larger pg sz (16K), the determiniation of
  17 *   aliasing shd not be based on assumption of 8k pg
  18 *
  19 * vineetg: Mar 2011
  20 *  -optimised version of flush_icache_range( ) for making I/D coherent
  21 *   when vaddr is available (agnostic of num of aliases)
  22 *
  23 * vineetg: Mar 2011
  24 *  -Added documentation about I-cache aliasing on ARC700 and the way it
  25 *   was handled up until MMU V2.
  26 *  -Spotted a three year old bug when killing the 4 aliases, which needs
  27 *   bottom 2 bits, so we need to do paddr | {0x00, 0x01, 0x02, 0x03}
  28 *                        instead of paddr | {0x00, 0x01, 0x10, 0x11}
  29 *   (Rajesh you owe me one now)
  30 *
  31 * vineetg: Dec 2010
  32 *  -Off-by-one error when computing num_of_lines to flush
  33 *   This broke signal handling with bionic which uses synthetic sigret stub
  34 *
  35 * vineetg: Mar 2010
  36 *  -GCC can't generate ZOL for core cache flush loops.
  37 *   Conv them into iterations based as opposed to while (start < end) types
  38 *
  39 * Vineetg: July 2009
  40 *  -In I-cache flush routine we used to chk for aliasing for every line INV.
  41 *   Instead now we setup routines per cache geometry and invoke them
  42 *   via function pointers.
  43 *
  44 * Vineetg: Jan 2009
  45 *  -Cache Line flush routines used to flush an extra line beyond end addr
  46 *   because check was while (end >= start) instead of (end > start)
  47 *     =Some call sites had to work around by doing -1, -4 etc to end param
  48 *     =Some callers didnt care. This was spec bad in case of INV routines
  49 *      which would discard valid data (cause of the horrible ext2 bug
  50 *      in ARC IDE driver)
  51 *
  52 * vineetg: June 11th 2008: Fixed flush_icache_range( )
  53 *  -Since ARC700 caches are not coherent (I$ doesnt snoop D$) both need
  54 *   to be flushed, which it was not doing.
  55 *  -load_module( ) passes vmalloc addr (Kernel Virtual Addr) to the API,
  56 *   however ARC cache maintenance OPs require PHY addr. Thus need to do
  57 *   vmalloc_to_phy.
  58 *  -Also added optimisation there, that for range > PAGE SIZE we flush the
  59 *   entire cache in one shot rather than line by line. For e.g. a module
  60 *   with Code sz 600k, old code flushed 600k worth of cache (line-by-line),
  61 *   while cache is only 16 or 32k.
  62 */
  63
  64#include <linux/module.h>
  65#include <linux/mm.h>
  66#include <linux/sched.h>
  67#include <linux/cache.h>
  68#include <linux/mmu_context.h>
  69#include <linux/syscalls.h>
  70#include <linux/uaccess.h>
  71#include <asm/cacheflush.h>
  72#include <asm/cachectl.h>
  73#include <asm/setup.h>
  74
  75
  76#ifdef CONFIG_ARC_HAS_ICACHE
  77static void __ic_line_inv_no_alias(unsigned long, int);
  78static void __ic_line_inv_2_alias(unsigned long, int);
  79static void __ic_line_inv_4_alias(unsigned long, int);
  80
  81/* Holds the ptr to flush routine, dependign on size due to aliasing issues */
  82static void (*___flush_icache_rtn) (unsigned long, int);
  83#endif
  84
  85char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len)
  86{
  87        int n = 0;
  88        unsigned int c = smp_processor_id();
  89
  90#define PR_CACHE(p, enb, str)                                           \
  91{                                                                       \
  92        if (!(p)->ver)                                                  \
  93                n += scnprintf(buf + n, len - n, str"\t\t: N/A\n");     \
  94        else                                                            \
  95                n += scnprintf(buf + n, len - n,                        \
  96                        str"\t\t: (%uK) VIPT, %dway set-asc, %ub Line %s\n", \
  97                        TO_KB((p)->sz), (p)->assoc, (p)->line_len,      \
  98                        enb ?  "" : "DISABLED (kernel-build)");         \
  99}
 100
 101        PR_CACHE(&cpuinfo_arc700[c].icache, __CONFIG_ARC_HAS_ICACHE, "I-Cache");
 102        PR_CACHE(&cpuinfo_arc700[c].dcache, __CONFIG_ARC_HAS_DCACHE, "D-Cache");
 103
 104        return buf;
 105}
 106
 107/*
 108 * Read the Cache Build Confuration Registers, Decode them and save into
 109 * the cpuinfo structure for later use.
 110 * No Validation done here, simply read/convert the BCRs
 111 */
 112void __init read_decode_cache_bcr(void)
 113{
 114        struct bcr_cache ibcr, dbcr;
 115        struct cpuinfo_arc_cache *p_ic, *p_dc;
 116        unsigned int cpu = smp_processor_id();
 117
 118        p_ic = &cpuinfo_arc700[cpu].icache;
 119        READ_BCR(ARC_REG_IC_BCR, ibcr);
 120
 121        if (ibcr.config == 0x3)
 122                p_ic->assoc = 2;
 123        p_ic->line_len = 8 << ibcr.line_len;
 124        p_ic->sz = 0x200 << ibcr.sz;
 125        p_ic->ver = ibcr.ver;
 126
 127        p_dc = &cpuinfo_arc700[cpu].dcache;
 128        READ_BCR(ARC_REG_DC_BCR, dbcr);
 129
 130        if (dbcr.config == 0x2)
 131                p_dc->assoc = 4;
 132        p_dc->line_len = 16 << dbcr.line_len;
 133        p_dc->sz = 0x200 << dbcr.sz;
 134        p_dc->ver = dbcr.ver;
 135}
 136
 137/*
 138 * 1. Validate the Cache Geomtery (compile time config matches hardware)
 139 * 2. If I-cache suffers from aliasing, setup work arounds (difft flush rtn)
 140 *    (aliasing D-cache configurations are not supported YET)
 141 * 3. Enable the Caches, setup default flush mode for D-Cache
 142 * 3. Calculate the SHMLBA used by user space
 143 */
 144void __init arc_cache_init(void)
 145{
 146        unsigned int temp;
 147        unsigned int cpu = smp_processor_id();
 148        struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
 149        struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
 150        int way_pg_ratio = way_pg_ratio;
 151        char str[256];
 152
 153        printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
 154
 155        if (!ic->ver)
 156                goto chk_dc;
 157
 158#ifdef CONFIG_ARC_HAS_ICACHE
 159        /* 1. Confirm some of I-cache params which Linux assumes */
 160        if ((ic->assoc != ARC_ICACHE_WAYS) ||
 161            (ic->line_len != ARC_ICACHE_LINE_LEN)) {
 162                panic("Cache H/W doesn't match kernel Config");
 163        }
 164#if (CONFIG_ARC_MMU_VER > 2)
 165        if (ic->ver != 3) {
 166                if (running_on_hw)
 167                        panic("Cache ver doesn't match MMU ver\n");
 168
 169                /* For ISS - suggest the toggles to use */
 170                pr_err("Use -prop=icache_version=3,-prop=dcache_version=3\n");
 171
 172        }
 173#endif
 174
 175        /*
 176         * if Cache way size is <= page size then no aliasing exhibited
 177         * otherwise ratio determines num of aliases.
 178         * e.g. 32K I$, 2 way set assoc, 8k pg size
 179         *       way-sz = 32k/2 = 16k
 180         *       way-pg-ratio = 16k/8k = 2, so 2 aliases possible
 181         *       (meaning 1 line could be in 2 possible locations).
 182         */
 183        way_pg_ratio = ic->sz / ARC_ICACHE_WAYS / PAGE_SIZE;
 184        switch (way_pg_ratio) {
 185        case 0:
 186        case 1:
 187                ___flush_icache_rtn = __ic_line_inv_no_alias;
 188                break;
 189        case 2:
 190                ___flush_icache_rtn = __ic_line_inv_2_alias;
 191                break;
 192        case 4:
 193                ___flush_icache_rtn = __ic_line_inv_4_alias;
 194                break;
 195        default:
 196                panic("Unsupported I-Cache Sz\n");
 197        }
 198#endif
 199
 200        /* Enable/disable I-Cache */
 201        temp = read_aux_reg(ARC_REG_IC_CTRL);
 202
 203#ifdef CONFIG_ARC_HAS_ICACHE
 204        temp &= ~IC_CTRL_CACHE_DISABLE;
 205#else
 206        temp |= IC_CTRL_CACHE_DISABLE;
 207#endif
 208
 209        write_aux_reg(ARC_REG_IC_CTRL, temp);
 210
 211chk_dc:
 212        if (!dc->ver)
 213                return;
 214
 215#ifdef CONFIG_ARC_HAS_DCACHE
 216        if ((dc->assoc != ARC_DCACHE_WAYS) ||
 217            (dc->line_len != ARC_DCACHE_LINE_LEN)) {
 218                panic("Cache H/W doesn't match kernel Config");
 219        }
 220
 221        /* check for D-Cache aliasing */
 222        if ((dc->sz / ARC_DCACHE_WAYS) > PAGE_SIZE)
 223                panic("D$ aliasing not handled right now\n");
 224#endif
 225
 226        /* Set the default Invalidate Mode to "simpy discard dirty lines"
 227         *  as this is more frequent then flush before invalidate
 228         * Ofcourse we toggle this default behviour when desired
 229         */
 230        temp = read_aux_reg(ARC_REG_DC_CTRL);
 231        temp &= ~DC_CTRL_INV_MODE_FLUSH;
 232
 233#ifdef CONFIG_ARC_HAS_DCACHE
 234        /* Enable D-Cache: Clear Bit 0 */
 235        write_aux_reg(ARC_REG_DC_CTRL, temp & ~IC_CTRL_CACHE_DISABLE);
 236#else
 237        /* Flush D cache */
 238        write_aux_reg(ARC_REG_DC_FLSH, 0x1);
 239        /* Disable D cache */
 240        write_aux_reg(ARC_REG_DC_CTRL, temp | IC_CTRL_CACHE_DISABLE);
 241#endif
 242
 243        return;
 244}
 245
 246#define OP_INV          0x1
 247#define OP_FLUSH        0x2
 248#define OP_FLUSH_N_INV  0x3
 249
 250#ifdef CONFIG_ARC_HAS_DCACHE
 251
 252/***************************************************************
 253 * Machine specific helpers for Entire D-Cache or Per Line ops
 254 */
 255
 256static inline void wait_for_flush(void)
 257{
 258        while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS)
 259                ;
 260}
 261
 262/*
 263 * Operation on Entire D-Cache
 264 * @cacheop = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
 265 * Note that constant propagation ensures all the checks are gone
 266 * in generated code
 267 */
 268static inline void __dc_entire_op(const int cacheop)
 269{
 270        unsigned long flags, tmp = tmp;
 271        int aux;
 272
 273        local_irq_save(flags);
 274
 275        if (cacheop == OP_FLUSH_N_INV) {
 276                /* Dcache provides 2 cmd: FLUSH or INV
 277                 * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
 278                 * flush-n-inv is achieved by INV cmd but with IM=1
 279                 * Default INV sub-mode is DISCARD, which needs to be toggled
 280                 */
 281                tmp = read_aux_reg(ARC_REG_DC_CTRL);
 282                write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH);
 283        }
 284
 285        if (cacheop & OP_INV)   /* Inv or flush-n-inv use same cmd reg */
 286                aux = ARC_REG_DC_IVDC;
 287        else
 288                aux = ARC_REG_DC_FLSH;
 289
 290        write_aux_reg(aux, 0x1);
 291
 292        if (cacheop & OP_FLUSH) /* flush / flush-n-inv both wait */
 293                wait_for_flush();
 294
 295        /* Switch back the DISCARD ONLY Invalidate mode */
 296        if (cacheop == OP_FLUSH_N_INV)
 297                write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH);
 298
 299        local_irq_restore(flags);
 300}
 301
 302/*
 303 * Per Line Operation on D-Cache
 304 * Doesn't deal with type-of-op/IRQ-disabling/waiting-for-flush-to-complete
 305 * It's sole purpose is to help gcc generate ZOL
 306 */
 307static inline void __dc_line_loop(unsigned long start, unsigned long sz,
 308                                          int aux_reg)
 309{
 310        int num_lines, slack;
 311
 312        /* Ensure we properly floor/ceil the non-line aligned/sized requests
 313         * and have @start - aligned to cache line and integral @num_lines.
 314         * This however can be avoided for page sized since:
 315         *  -@start will be cache-line aligned already (being page aligned)
 316         *  -@sz will be integral multiple of line size (being page sized).
 317         */
 318        if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
 319                slack = start & ~DCACHE_LINE_MASK;
 320                sz += slack;
 321                start -= slack;
 322        }
 323
 324        num_lines = DIV_ROUND_UP(sz, ARC_DCACHE_LINE_LEN);
 325
 326        while (num_lines-- > 0) {
 327#if (CONFIG_ARC_MMU_VER > 2)
 328                /*
 329                 * Just as for I$, in MMU v3, D$ ops also require
 330                 * "tag" bits in DC_PTAG, "index" bits in FLDL,IVDL ops
 331                 * But we pass phy addr for both. This works since Linux
 332                 * doesn't support aliasing configs for D$, yet.
 333                 * Thus paddr is enough to provide both tag and index.
 334                 */
 335                write_aux_reg(ARC_REG_DC_PTAG, start);
 336#endif
 337                write_aux_reg(aux_reg, start);
 338                start += ARC_DCACHE_LINE_LEN;
 339        }
 340}
 341
 342/*
 343 * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback)
 344 */
 345static inline void __dc_line_op(unsigned long start, unsigned long sz,
 346                                        const int cacheop)
 347{
 348        unsigned long flags, tmp = tmp;
 349        int aux;
 350
 351        local_irq_save(flags);
 352
 353        if (cacheop == OP_FLUSH_N_INV) {
 354                /*
 355                 * Dcache provides 2 cmd: FLUSH or INV
 356                 * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
 357                 * flush-n-inv is achieved by INV cmd but with IM=1
 358                 * Default INV sub-mode is DISCARD, which needs to be toggled
 359                 */
 360                tmp = read_aux_reg(ARC_REG_DC_CTRL);
 361                write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH);
 362        }
 363
 364        if (cacheop & OP_INV)   /* Inv / flush-n-inv use same cmd reg */
 365                aux = ARC_REG_DC_IVDL;
 366        else
 367                aux = ARC_REG_DC_FLDL;
 368
 369        __dc_line_loop(start, sz, aux);
 370
 371        if (cacheop & OP_FLUSH) /* flush / flush-n-inv both wait */
 372                wait_for_flush();
 373
 374        /* Switch back the DISCARD ONLY Invalidate mode */
 375        if (cacheop == OP_FLUSH_N_INV)
 376                write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH);
 377
 378        local_irq_restore(flags);
 379}
 380
 381#else
 382
 383#define __dc_entire_op(cacheop)
 384#define __dc_line_op(start, sz, cacheop)
 385
 386#endif /* CONFIG_ARC_HAS_DCACHE */
 387
 388
 389#ifdef CONFIG_ARC_HAS_ICACHE
 390
 391/*
 392 *              I-Cache Aliasing in ARC700 VIPT caches
 393 *
 394 * For fetching code from I$, ARC700 uses vaddr (embedded in program code)
 395 * to "index" into SET of cache-line and paddr from MMU to match the TAG
 396 * in the WAYS of SET.
 397 *
 398 * However the CDU iterface (to flush/inv) lines from software, only takes
 399 * paddr (to have simpler hardware interface). For simpler cases, using paddr
 400 * alone suffices.
 401 * e.g. 2-way-set-assoc, 16K I$ (8k MMU pg sz, 32b cache line size):
 402 *      way_sz = cache_sz / num_ways = 16k/2 = 8k
 403 *      num_sets = way_sz / line_sz = 8k/32 = 256 => 8 bits
 404 *   Ignoring the bottom 5 bits corresp to the off within a 32b cacheline,
 405 *   bits req for calc set-index = bits 12:5 (0 based). Since this range fits
 406 *   inside the bottom 13 bits of paddr, which are same for vaddr and paddr
 407 *   (with 8k pg sz), paddr alone can be safely used by CDU to unambigously
 408 *   locate a cache-line.
 409 *
 410 * However for a difft sized cache, say 32k I$, above math yields need
 411 * for 14 bits of vaddr to locate a cache line, which can't be provided by
 412 * paddr, since the bit 13 (0 based) might differ between the two.
 413 *
 414 * This lack of extra bits needed for correct line addressing, defines the
 415 * classical problem of Cache aliasing with VIPT architectures
 416 * num_aliases = 1 << extra_bits
 417 * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz => 2 aliases
 418 *      2-way-set-assoc, 64K I$ with 8k MMU pg sz => 4 aliases
 419 *      2-way-set-assoc, 16K I$ with 8k MMU pg sz => NO aliases
 420 *
 421 * ------------------
 422 * MMU v1/v2 (Fixed Page Size 8k)
 423 * ------------------
 424 * The solution was to provide CDU with these additonal vaddr bits. These
 425 * would be bits [x:13], x would depend on cache-geom.
 426 * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
 427 * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
 428 * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
 429 * represent the offset within cache-line. The adv of using this "clumsy"
 430 * interface for additional info was no new reg was needed in CDU.
 431 *
 432 * 17:13 represented the max num of bits passable, actual bits needed were
 433 * fewer, based on the num-of-aliases possible.
 434 * -for 2 alias possibility, only bit 13 needed (32K cache)
 435 * -for 4 alias possibility, bits 14:13 needed (64K cache)
 436 *
 437 * Since vaddr was not available for all instances of I$ flush req by core
 438 * kernel, the only safe way (non-optimal though) was to kill all possible
 439 * lines which could represent an alias (even if they didnt represent one
 440 * in execution).
 441 * e.g. for 64K I$, 4 aliases possible, so we did
 442 *      flush start
 443 *      flush start | 0x01
 444 *      flush start | 0x2
 445 *      flush start | 0x3
 446 *
 447 * The penalty was invoking the operation itself, since tag match is anyways
 448 * paddr based, a line which didn't represent an alias would not match the
 449 * paddr, hence wont be killed
 450 *
 451 * Note that aliasing concerns are independent of line-sz for a given cache
 452 * geometry (size + set_assoc) because the extra bits required by line-sz are
 453 * reduced from the set calc.
 454 * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz and using math above
 455 *  32b line-sz: 9 bits set-index-calc, 5 bits offset-in-line => 1 extra bit
 456 *  64b line-sz: 8 bits set-index-calc, 6 bits offset-in-line => 1 extra bit
 457 *
 458 * ------------------
 459 * MMU v3
 460 * ------------------
 461 * This ver of MMU supports var page sizes (1k-16k) - Linux will support
 462 * 8k (default), 16k and 4k.
 463 * However from hardware perspective, smaller page sizes aggrevate aliasing
 464 * meaning more vaddr bits needed to disambiguate the cache-line-op ;
 465 * the existing scheme of piggybacking won't work for certain configurations.
 466 * Two new registers IC_PTAG and DC_PTAG inttoduced.
 467 * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
 468 */
 469
 470/***********************************************************
 471 * Machine specific helpers for per line I-Cache invalidate.
 472 * 3 routines to accpunt for 1, 2, 4 aliases possible
 473 */
 474
 475static void __ic_line_inv_no_alias(unsigned long start, int num_lines)
 476{
 477        while (num_lines-- > 0) {
 478#if (CONFIG_ARC_MMU_VER > 2)
 479                write_aux_reg(ARC_REG_IC_PTAG, start);
 480#endif
 481                write_aux_reg(ARC_REG_IC_IVIL, start);
 482                start += ARC_ICACHE_LINE_LEN;
 483        }
 484}
 485
 486static void __ic_line_inv_2_alias(unsigned long start, int num_lines)
 487{
 488        while (num_lines-- > 0) {
 489
 490#if (CONFIG_ARC_MMU_VER > 2)
 491                /*
 492                 *  MMU v3, CDU prog model (for line ops) now uses a new IC_PTAG
 493                 * reg to pass the "tag" bits and existing IVIL reg only looks
 494                 * at bits relevant for "index" (details above)
 495                 * Programming Notes:
 496                 * -when writing tag to PTAG reg, bit chopping can be avoided,
 497                 *  CDU ignores non-tag bits.
 498                 * -Ideally "index" must be computed from vaddr, but it is not
 499                 *  avail in these rtns. So to be safe, we kill the lines in all
 500                 *  possible indexes corresp to num of aliases possible for
 501                 *  given cache config.
 502                 */
 503                write_aux_reg(ARC_REG_IC_PTAG, start);
 504                write_aux_reg(ARC_REG_IC_IVIL,
 505                                  start & ~(0x1 << PAGE_SHIFT));
 506                write_aux_reg(ARC_REG_IC_IVIL, start | (0x1 << PAGE_SHIFT));
 507#else
 508                write_aux_reg(ARC_REG_IC_IVIL, start);
 509                write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
 510#endif
 511                start += ARC_ICACHE_LINE_LEN;
 512        }
 513}
 514
 515static void __ic_line_inv_4_alias(unsigned long start, int num_lines)
 516{
 517        while (num_lines-- > 0) {
 518
 519#if (CONFIG_ARC_MMU_VER > 2)
 520                write_aux_reg(ARC_REG_IC_PTAG, start);
 521
 522                write_aux_reg(ARC_REG_IC_IVIL,
 523                                  start & ~(0x3 << PAGE_SHIFT));
 524                write_aux_reg(ARC_REG_IC_IVIL,
 525                                  start & ~(0x2 << PAGE_SHIFT));
 526                write_aux_reg(ARC_REG_IC_IVIL,
 527                                  start & ~(0x1 << PAGE_SHIFT));
 528                write_aux_reg(ARC_REG_IC_IVIL, start | (0x3 << PAGE_SHIFT));
 529#else
 530                write_aux_reg(ARC_REG_IC_IVIL, start);
 531                write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
 532                write_aux_reg(ARC_REG_IC_IVIL, start | 0x02);
 533                write_aux_reg(ARC_REG_IC_IVIL, start | 0x03);
 534#endif
 535                start += ARC_ICACHE_LINE_LEN;
 536        }
 537}
 538
 539static void __ic_line_inv(unsigned long start, unsigned long sz)
 540{
 541        unsigned long flags;
 542        int num_lines, slack;
 543
 544        /*
 545         * Ensure we properly floor/ceil the non-line aligned/sized requests
 546         * and have @start - aligned to cache line, and integral @num_lines
 547         * However page sized flushes can be compile time optimised.
 548         *  -@start will be cache-line aligned already (being page aligned)
 549         *  -@sz will be integral multiple of line size (being page sized).
 550         */
 551        if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
 552                slack = start & ~ICACHE_LINE_MASK;
 553                sz += slack;
 554                start -= slack;
 555        }
 556
 557        num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
 558
 559        local_irq_save(flags);
 560        (*___flush_icache_rtn) (start, num_lines);
 561        local_irq_restore(flags);
 562}
 563
 564/* Unlike routines above, having vaddr for flush op (along with paddr),
 565 * prevents the need to speculatively kill the lines in multiple sets
 566 * based on ratio of way_sz : pg_sz
 567 */
 568static void __ic_line_inv_vaddr(unsigned long phy_start,
 569                                         unsigned long vaddr, unsigned long sz)
 570{
 571        unsigned long flags;
 572        int num_lines, slack;
 573        unsigned int addr;
 574
 575        slack = phy_start & ~ICACHE_LINE_MASK;
 576        sz += slack;
 577        phy_start -= slack;
 578        num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
 579
 580#if (CONFIG_ARC_MMU_VER > 2)
 581        vaddr &= ~ICACHE_LINE_MASK;
 582        addr = phy_start;
 583#else
 584        /* bits 17:13 of vaddr go as bits 4:0 of paddr */
 585        addr = phy_start | ((vaddr >> 13) & 0x1F);
 586#endif
 587
 588        local_irq_save(flags);
 589        while (num_lines-- > 0) {
 590#if (CONFIG_ARC_MMU_VER > 2)
 591                /* tag comes from phy addr */
 592                write_aux_reg(ARC_REG_IC_PTAG, addr);
 593
 594                /* index bits come from vaddr */
 595                write_aux_reg(ARC_REG_IC_IVIL, vaddr);
 596                vaddr += ARC_ICACHE_LINE_LEN;
 597#else
 598                /* this paddr contains vaddrs bits as needed */
 599                write_aux_reg(ARC_REG_IC_IVIL, addr);
 600#endif
 601                addr += ARC_ICACHE_LINE_LEN;
 602        }
 603        local_irq_restore(flags);
 604}
 605
 606#else
 607
 608#define __ic_line_inv(start, sz)
 609#define __ic_line_inv_vaddr(pstart, vstart, sz)
 610
 611#endif /* CONFIG_ARC_HAS_ICACHE */
 612
 613
 614/***********************************************************
 615 * Exported APIs
 616 */
 617
 618/* TBD: use pg_arch_1 to optimize this */
 619void flush_dcache_page(struct page *page)
 620{
 621        __dc_line_op((unsigned long)page_address(page), PAGE_SIZE, OP_FLUSH);
 622}
 623EXPORT_SYMBOL(flush_dcache_page);
 624
 625
 626void dma_cache_wback_inv(unsigned long start, unsigned long sz)
 627{
 628        __dc_line_op(start, sz, OP_FLUSH_N_INV);
 629}
 630EXPORT_SYMBOL(dma_cache_wback_inv);
 631
 632void dma_cache_inv(unsigned long start, unsigned long sz)
 633{
 634        __dc_line_op(start, sz, OP_INV);
 635}
 636EXPORT_SYMBOL(dma_cache_inv);
 637
 638void dma_cache_wback(unsigned long start, unsigned long sz)
 639{
 640        __dc_line_op(start, sz, OP_FLUSH);
 641}
 642EXPORT_SYMBOL(dma_cache_wback);
 643
 644/*
 645 * This is API for making I/D Caches consistent when modifying code
 646 * (loadable modules, kprobes,  etc)
 647 * This is called on insmod, with kernel virtual address for CODE of
 648 * the module. ARC cache maintenance ops require PHY address thus we
 649 * need to convert vmalloc addr to PHY addr
 650 */
 651void flush_icache_range(unsigned long kstart, unsigned long kend)
 652{
 653        unsigned int tot_sz, off, sz;
 654        unsigned long phy, pfn;
 655        unsigned long flags;
 656
 657        /* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */
 658
 659        /* This is not the right API for user virtual address */
 660        if (kstart < TASK_SIZE) {
 661                BUG_ON("Flush icache range for user virtual addr space");
 662                return;
 663        }
 664
 665        /* Shortcut for bigger flush ranges.
 666         * Here we don't care if this was kernel virtual or phy addr
 667         */
 668        tot_sz = kend - kstart;
 669        if (tot_sz > PAGE_SIZE) {
 670                flush_cache_all();
 671                return;
 672        }
 673
 674        /* Case: Kernel Phy addr (0x8000_0000 onwards) */
 675        if (likely(kstart > PAGE_OFFSET)) {
 676                __ic_line_inv(kstart, kend - kstart);
 677                __dc_line_op(kstart, kend - kstart, OP_FLUSH);
 678                return;
 679        }
 680
 681        /*
 682         * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff)
 683         * (1) ARC Cache Maintenance ops only take Phy addr, hence special
 684         *     handling of kernel vaddr.
 685         *
 686         * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already),
 687         *     it still needs to handle  a 2 page scenario, where the range
 688         *     straddles across 2 virtual pages and hence need for loop
 689         */
 690        while (tot_sz > 0) {
 691                off = kstart % PAGE_SIZE;
 692                pfn = vmalloc_to_pfn((void *)kstart);
 693                phy = (pfn << PAGE_SHIFT) + off;
 694                sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
 695                local_irq_save(flags);
 696                __dc_line_op(phy, sz, OP_FLUSH);
 697                __ic_line_inv(phy, sz);
 698                local_irq_restore(flags);
 699                kstart += sz;
 700                tot_sz -= sz;
 701        }
 702}
 703
 704/*
 705 * Optimised ver of flush_icache_range() with spec callers: ptrace/signals
 706 * where vaddr is also available. This allows passing both vaddr and paddr
 707 * bits to CDU for cache flush, short-circuting the current pessimistic algo
 708 * which kills all possible aliases.
 709 * An added adv of knowing that vaddr is user-vaddr avoids various checks
 710 * and handling for k-vaddr, k-paddr as done in orig ver above
 711 */
 712void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr,
 713                              int len)
 714{
 715        __ic_line_inv_vaddr(paddr, u_vaddr, len);
 716        __dc_line_op(paddr, len, OP_FLUSH);
 717}
 718
 719/*
 720 * XXX: This also needs to be optim using pg_arch_1
 721 * This is called when a page-cache page is about to be mapped into a
 722 * user process' address space.  It offers an opportunity for a
 723 * port to ensure d-cache/i-cache coherency if necessary.
 724 */
 725void flush_icache_page(struct vm_area_struct *vma, struct page *page)
 726{
 727        if (!(vma->vm_flags & VM_EXEC))
 728                return;
 729
 730        __ic_line_inv((unsigned long)page_address(page), PAGE_SIZE);
 731}
 732
 733void flush_icache_all(void)
 734{
 735        unsigned long flags;
 736
 737        local_irq_save(flags);
 738
 739        write_aux_reg(ARC_REG_IC_IVIC, 1);
 740
 741        /* lr will not complete till the icache inv operation is not over */
 742        read_aux_reg(ARC_REG_IC_CTRL);
 743        local_irq_restore(flags);
 744}
 745
 746noinline void flush_cache_all(void)
 747{
 748        unsigned long flags;
 749
 750        local_irq_save(flags);
 751
 752        flush_icache_all();
 753        __dc_entire_op(OP_FLUSH_N_INV);
 754
 755        local_irq_restore(flags);
 756
 757}
 758
 759/**********************************************************************
 760 * Explicit Cache flush request from user space via syscall
 761 * Needed for JITs which generate code on the fly
 762 */
 763SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
 764{
 765        /* TBD: optimize this */
 766        flush_cache_all();
 767        return 0;
 768}
 769