LXR linux/arch/sh/include/asm/unaligned-sh4a.h

   1#ifndef __ASM_SH_UNALIGNED_SH4A_H
   2#define __ASM_SH_UNALIGNED_SH4A_H
   3
   4/*
   5 * SH-4A has support for unaligned 32-bit loads, and 32-bit loads only.
   6 * Support for 64-bit accesses are done through shifting and masking
   7 * relative to the endianness. Unaligned stores are not supported by the
   8 * instruction encoding, so these continue to use the packed
   9 * struct.
  10 *
  11 * The same note as with the movli.l/movco.l pair applies here, as long
  12 * as the load is guaranteed to be inlined, nothing else will hook in to
  13 * r0 and we get the return value for free.
  14 *
  15 * NOTE: Due to the fact we require r0 encoding, care should be taken to
  16 * avoid mixing these heavily with other r0 consumers, such as the atomic
  17 * ops. Failure to adhere to this can result in the compiler running out
  18 * of spill registers and blowing up when building at low optimization
  19 * levels. See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34777.
  20 */
  21#include <linux/unaligned/packed_struct.h>
  22#include <linux/types.h>
  23#include <asm/byteorder.h>
  24
  25static inline u16 sh4a_get_unaligned_cpu16(const u8 *p)
  26{
  27#ifdef __LITTLE_ENDIAN
  28        return p[0] | p[1] << 8;
  29#else
  30        return p[0] << 8 | p[1];
  31#endif
  32}
  33
  34static __always_inline u32 sh4a_get_unaligned_cpu32(const u8 *p)
  35{
  36        unsigned long unaligned;
  37
  38        __asm__ __volatile__ (
  39                "movua.l        @%1, %0\n\t"
  40                 : "=z" (unaligned)
  41                 : "r" (p)
  42        );
  43
  44        return unaligned;
  45}
  46
  47/*
  48 * Even though movua.l supports auto-increment on the read side, it can
  49 * only store to r0 due to instruction encoding constraints, so just let
  50 * the compiler sort it out on its own.
  51 */
  52static inline u64 sh4a_get_unaligned_cpu64(const u8 *p)
  53{
  54#ifdef __LITTLE_ENDIAN
  55        return (u64)sh4a_get_unaligned_cpu32(p + 4) << 32 |
  56                    sh4a_get_unaligned_cpu32(p);
  57#else
  58        return (u64)sh4a_get_unaligned_cpu32(p) << 32 |
  59                    sh4a_get_unaligned_cpu32(p + 4);
  60#endif
  61}
  62
  63static inline u16 get_unaligned_le16(const void *p)
  64{
  65        return le16_to_cpu(sh4a_get_unaligned_cpu16(p));
  66}
  67
  68static inline u32 get_unaligned_le32(const void *p)
  69{
  70        return le32_to_cpu(sh4a_get_unaligned_cpu32(p));
  71}
  72
  73static inline u64 get_unaligned_le64(const void *p)
  74{
  75        return le64_to_cpu(sh4a_get_unaligned_cpu64(p));
  76}
  77
  78static inline u16 get_unaligned_be16(const void *p)
  79{
  80        return be16_to_cpu(sh4a_get_unaligned_cpu16(p));
  81}
  82
  83static inline u32 get_unaligned_be32(const void *p)
  84{
  85        return be32_to_cpu(sh4a_get_unaligned_cpu32(p));
  86}
  87
  88static inline u64 get_unaligned_be64(const void *p)
  89{
  90        return be64_to_cpu(sh4a_get_unaligned_cpu64(p));
  91}
  92
  93static inline void nonnative_put_le16(u16 val, u8 *p)
  94{
  95        *p++ = val;
  96        *p++ = val >> 8;
  97}
  98
  99static inline void nonnative_put_le32(u32 val, u8 *p)
 100{
 101        nonnative_put_le16(val, p);
 102        nonnative_put_le16(val >> 16, p + 2);
 103}
 104
 105static inline void nonnative_put_le64(u64 val, u8 *p)
 106{
 107        nonnative_put_le32(val, p);
 108        nonnative_put_le32(val >> 32, p + 4);
 109}
 110
 111static inline void nonnative_put_be16(u16 val, u8 *p)
 112{
 113        *p++ = val >> 8;
 114        *p++ = val;
 115}
 116
 117static inline void nonnative_put_be32(u32 val, u8 *p)
 118{
 119        nonnative_put_be16(val >> 16, p);
 120        nonnative_put_be16(val, p + 2);
 121}
 122
 123static inline void nonnative_put_be64(u64 val, u8 *p)
 124{
 125        nonnative_put_be32(val >> 32, p);
 126        nonnative_put_be32(val, p + 4);
 127}
 128
 129static inline void put_unaligned_le16(u16 val, void *p)
 130{
 131#ifdef __LITTLE_ENDIAN
 132        __put_unaligned_cpu16(val, p);
 133#else
 134        nonnative_put_le16(val, p);
 135#endif
 136}
 137
 138static inline void put_unaligned_le32(u32 val, void *p)
 139{
 140#ifdef __LITTLE_ENDIAN
 141        __put_unaligned_cpu32(val, p);
 142#else
 143        nonnative_put_le32(val, p);
 144#endif
 145}
 146
 147static inline void put_unaligned_le64(u64 val, void *p)
 148{
 149#ifdef __LITTLE_ENDIAN
 150        __put_unaligned_cpu64(val, p);
 151#else
 152        nonnative_put_le64(val, p);
 153#endif
 154}
 155
 156static inline void put_unaligned_be16(u16 val, void *p)
 157{
 158#ifdef __BIG_ENDIAN
 159        __put_unaligned_cpu16(val, p);
 160#else
 161        nonnative_put_be16(val, p);
 162#endif
 163}
 164
 165static inline void put_unaligned_be32(u32 val, void *p)
 166{
 167#ifdef __BIG_ENDIAN
 168        __put_unaligned_cpu32(val, p);
 169#else
 170        nonnative_put_be32(val, p);
 171#endif
 172}
 173
 174static inline void put_unaligned_be64(u64 val, void *p)
 175{
 176#ifdef __BIG_ENDIAN
 177        __put_unaligned_cpu64(val, p);
 178#else
 179        nonnative_put_be64(val, p);
 180#endif
 181}
 182
 183/*
 184 * While it's a bit non-obvious, even though the generic le/be wrappers
 185 * use the __get/put_xxx prefixing, they actually wrap in to the
 186 * non-prefixed get/put_xxx variants as provided above.
 187 */
 188#include <linux/unaligned/generic.h>
 189
 190#ifdef __LITTLE_ENDIAN
 191# define get_unaligned __get_unaligned_le
 192# define put_unaligned __put_unaligned_le
 193#else
 194# define get_unaligned __get_unaligned_be
 195# define put_unaligned __put_unaligned_be
 196#endif
 197
 198#endif /* __ASM_SH_UNALIGNED_SH4A_H */
 199