linux/arch/mips/include/asm/barrier.h
<<
>>
Prefs
   1/*
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * Copyright (C) 2006 by Ralf Baechle (ralf@linux-mips.org)
   7 */
   8#ifndef __ASM_BARRIER_H
   9#define __ASM_BARRIER_H
  10
  11#include <asm/addrspace.h>
  12
  13/*
  14 * Sync types defined by the MIPS architecture (document MD00087 table 6.5)
  15 * These values are used with the sync instruction to perform memory barriers.
  16 * Types of ordering guarantees available through the SYNC instruction:
  17 * - Completion Barriers
  18 * - Ordering Barriers
  19 * As compared to the completion barrier, the ordering barrier is a
  20 * lighter-weight operation as it does not require the specified instructions
  21 * before the SYNC to be already completed. Instead it only requires that those
  22 * specified instructions which are subsequent to the SYNC in the instruction
  23 * stream are never re-ordered for processing ahead of the specified
  24 * instructions which are before the SYNC in the instruction stream.
  25 * This potentially reduces how many cycles the barrier instruction must stall
  26 * before it completes.
  27 * Implementations that do not use any of the non-zero values of stype to define
  28 * different barriers, such as ordering barriers, must make those stype values
  29 * act the same as stype zero.
  30 */
  31
  32/*
  33 * Completion barriers:
  34 * - Every synchronizable specified memory instruction (loads or stores or both)
  35 *   that occurs in the instruction stream before the SYNC instruction must be
  36 *   already globally performed before any synchronizable specified memory
  37 *   instructions that occur after the SYNC are allowed to be performed, with
  38 *   respect to any other processor or coherent I/O module.
  39 *
  40 * - The barrier does not guarantee the order in which instruction fetches are
  41 *   performed.
  42 *
  43 * - A stype value of zero will always be defined such that it performs the most
  44 *   complete set of synchronization operations that are defined.This means
  45 *   stype zero always does a completion barrier that affects both loads and
  46 *   stores preceding the SYNC instruction and both loads and stores that are
  47 *   subsequent to the SYNC instruction. Non-zero values of stype may be defined
  48 *   by the architecture or specific implementations to perform synchronization
  49 *   behaviors that are less complete than that of stype zero. If an
  50 *   implementation does not use one of these non-zero values to define a
  51 *   different synchronization behavior, then that non-zero value of stype must
  52 *   act the same as stype zero completion barrier. This allows software written
  53 *   for an implementation with a lighter-weight barrier to work on another
  54 *   implementation which only implements the stype zero completion barrier.
  55 *
  56 * - A completion barrier is required, potentially in conjunction with SSNOP (in
  57 *   Release 1 of the Architecture) or EHB (in Release 2 of the Architecture),
  58 *   to guarantee that memory reference results are visible across operating
  59 *   mode changes. For example, a completion barrier is required on some
  60 *   implementations on entry to and exit from Debug Mode to guarantee that
  61 *   memory effects are handled correctly.
  62 */
  63
  64/*
  65 * stype 0 - A completion barrier that affects preceding loads and stores and
  66 * subsequent loads and stores.
  67 * Older instructions which must reach the load/store ordering point before the
  68 * SYNC instruction completes: Loads, Stores
  69 * Younger instructions which must reach the load/store ordering point only
  70 * after the SYNC instruction completes: Loads, Stores
  71 * Older instructions which must be globally performed when the SYNC instruction
  72 * completes: Loads, Stores
  73 */
  74#define STYPE_SYNC 0x0
  75
  76/*
  77 * Ordering barriers:
  78 * - Every synchronizable specified memory instruction (loads or stores or both)
  79 *   that occurs in the instruction stream before the SYNC instruction must
  80 *   reach a stage in the load/store datapath after which no instruction
  81 *   re-ordering is possible before any synchronizable specified memory
  82 *   instruction which occurs after the SYNC instruction in the instruction
  83 *   stream reaches the same stage in the load/store datapath.
  84 *
  85 * - If any memory instruction before the SYNC instruction in program order,
  86 *   generates a memory request to the external memory and any memory
  87 *   instruction after the SYNC instruction in program order also generates a
  88 *   memory request to external memory, the memory request belonging to the
  89 *   older instruction must be globally performed before the time the memory
  90 *   request belonging to the younger instruction is globally performed.
  91 *
  92 * - The barrier does not guarantee the order in which instruction fetches are
  93 *   performed.
  94 */
  95
  96/*
  97 * stype 0x10 - An ordering barrier that affects preceding loads and stores and
  98 * subsequent loads and stores.
  99 * Older instructions which must reach the load/store ordering point before the
 100 * SYNC instruction completes: Loads, Stores
 101 * Younger instructions which must reach the load/store ordering point only
 102 * after the SYNC instruction completes: Loads, Stores
 103 * Older instructions which must be globally performed when the SYNC instruction
 104 * completes: N/A
 105 */
 106#define STYPE_SYNC_MB 0x10
 107
 108/*
 109 * stype 0x14 - A completion barrier specific to global invalidations
 110 *
 111 * When a sync instruction of this type completes any preceding GINVI or GINVT
 112 * operation has been globalized & completed on all coherent CPUs. Anything
 113 * that the GINV* instruction should invalidate will have been invalidated on
 114 * all coherent CPUs when this instruction completes. It is implementation
 115 * specific whether the GINV* instructions themselves will ensure completion,
 116 * or this sync type will.
 117 *
 118 * In systems implementing global invalidates (ie. with Config5.GI == 2 or 3)
 119 * this sync type also requires that previous SYNCI operations have completed.
 120 */
 121#define STYPE_GINV      0x14
 122
 123#ifdef CONFIG_CPU_HAS_SYNC
 124#define __sync()                                \
 125        __asm__ __volatile__(                   \
 126                ".set   push\n\t"               \
 127                ".set   noreorder\n\t"          \
 128                ".set   mips2\n\t"              \
 129                "sync\n\t"                      \
 130                ".set   pop"                    \
 131                : /* no output */               \
 132                : /* no input */                \
 133                : "memory")
 134#else
 135#define __sync()        do { } while(0)
 136#endif
 137
 138#define __fast_iob()                            \
 139        __asm__ __volatile__(                   \
 140                ".set   push\n\t"               \
 141                ".set   noreorder\n\t"          \
 142                "lw     $0,%0\n\t"              \
 143                "nop\n\t"                       \
 144                ".set   pop"                    \
 145                : /* no output */               \
 146                : "m" (*(int *)CKSEG1)          \
 147                : "memory")
 148#ifdef CONFIG_CPU_CAVIUM_OCTEON
 149# define OCTEON_SYNCW_STR       ".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"
 150# define __syncw()      __asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")
 151
 152# define fast_wmb()     __syncw()
 153# define fast_rmb()     barrier()
 154# define fast_mb()      __sync()
 155# define fast_iob()     do { } while (0)
 156#else /* ! CONFIG_CPU_CAVIUM_OCTEON */
 157# define fast_wmb()     __sync()
 158# define fast_rmb()     __sync()
 159# define fast_mb()      __sync()
 160# ifdef CONFIG_SGI_IP28
 161#  define fast_iob()                            \
 162        __asm__ __volatile__(                   \
 163                ".set   push\n\t"               \
 164                ".set   noreorder\n\t"          \
 165                "lw     $0,%0\n\t"              \
 166                "sync\n\t"                      \
 167                "lw     $0,%0\n\t"              \
 168                ".set   pop"                    \
 169                : /* no output */               \
 170                : "m" (*(int *)CKSEG1ADDR(0x1fa00004)) \
 171                : "memory")
 172# else
 173#  define fast_iob()                            \
 174        do {                                    \
 175                __sync();                       \
 176                __fast_iob();                   \
 177        } while (0)
 178# endif
 179#endif /* CONFIG_CPU_CAVIUM_OCTEON */
 180
 181#ifdef CONFIG_CPU_HAS_WB
 182
 183#include <asm/wbflush.h>
 184
 185#define mb()            wbflush()
 186#define iob()           wbflush()
 187
 188#else /* !CONFIG_CPU_HAS_WB */
 189
 190#define mb()            fast_mb()
 191#define iob()           fast_iob()
 192
 193#endif /* !CONFIG_CPU_HAS_WB */
 194
 195#define wmb()           fast_wmb()
 196#define rmb()           fast_rmb()
 197
 198#if defined(CONFIG_WEAK_ORDERING)
 199# ifdef CONFIG_CPU_CAVIUM_OCTEON
 200#  define __smp_mb()    __sync()
 201#  define __smp_rmb()   barrier()
 202#  define __smp_wmb()   __syncw()
 203# else
 204#  define __smp_mb()    __asm__ __volatile__("sync" : : :"memory")
 205#  define __smp_rmb()   __asm__ __volatile__("sync" : : :"memory")
 206#  define __smp_wmb()   __asm__ __volatile__("sync" : : :"memory")
 207# endif
 208#else
 209#define __smp_mb()      barrier()
 210#define __smp_rmb()     barrier()
 211#define __smp_wmb()     barrier()
 212#endif
 213
 214#if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
 215#define __WEAK_LLSC_MB          "       sync    \n"
 216#else
 217#define __WEAK_LLSC_MB          "               \n"
 218#endif
 219
 220#define smp_llsc_mb()   __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
 221
 222#ifdef CONFIG_CPU_CAVIUM_OCTEON
 223#define smp_mb__before_llsc() smp_wmb()
 224#define __smp_mb__before_llsc() __smp_wmb()
 225/* Cause previous writes to become visible on all CPUs as soon as possible */
 226#define nudge_writes() __asm__ __volatile__(".set push\n\t"             \
 227                                            ".set arch=octeon\n\t"      \
 228                                            "syncw\n\t"                 \
 229                                            ".set pop" : : : "memory")
 230#else
 231#define smp_mb__before_llsc() smp_llsc_mb()
 232#define __smp_mb__before_llsc() smp_llsc_mb()
 233#define nudge_writes() mb()
 234#endif
 235
 236#define __smp_mb__before_atomic()       __smp_mb__before_llsc()
 237#define __smp_mb__after_atomic()        smp_llsc_mb()
 238
 239/*
 240 * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
 241 * store or pref) in between an ll & sc can cause the sc instruction to
 242 * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
 243 * containing such sequences, this bug bites harder than we might otherwise
 244 * expect due to reordering & speculation:
 245 *
 246 * 1) A memory access appearing prior to the ll in program order may actually
 247 *    be executed after the ll - this is the reordering case.
 248 *
 249 *    In order to avoid this we need to place a memory barrier (ie. a sync
 250 *    instruction) prior to every ll instruction, in between it & any earlier
 251 *    memory access instructions. Many of these cases are already covered by
 252 *    smp_mb__before_llsc() but for the remaining cases, typically ones in
 253 *    which multiple CPUs may operate on a memory location but ordering is not
 254 *    usually guaranteed, we use loongson_llsc_mb() below.
 255 *
 256 *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
 257 *
 258 * 2) If a conditional branch exists between an ll & sc with a target outside
 259 *    of the ll-sc loop, for example an exit upon value mismatch in cmpxchg()
 260 *    or similar, then misprediction of the branch may allow speculative
 261 *    execution of memory accesses from outside of the ll-sc loop.
 262 *
 263 *    In order to avoid this we need a memory barrier (ie. a sync instruction)
 264 *    at each affected branch target, for which we also use loongson_llsc_mb()
 265 *    defined below.
 266 *
 267 *    This case affects all current Loongson 3 CPUs.
 268 */
 269#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
 270#define loongson_llsc_mb()      __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
 271#else
 272#define loongson_llsc_mb()      do { } while (0)
 273#endif
 274
 275static inline void sync_ginv(void)
 276{
 277        asm volatile("sync\t%0" :: "i"(STYPE_GINV));
 278}
 279
 280#include <asm-generic/barrier.h>
 281
 282#endif /* __ASM_BARRIER_H */
 283