LXR linux/arch/arm/vfp/vfp.h

   1/*
   2 *  linux/arch/arm/vfp/vfp.h
   3 *
   4 *  Copyright (C) 2004 ARM Limited.
   5 *  Written by Deep Blue Solutions Limited.
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11
  12static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift)
  13{
  14        if (shift) {
  15                if (shift < 32)
  16                        val = val >> shift | ((val << (32 - shift)) != 0);
  17                else
  18                        val = val != 0;
  19        }
  20        return val;
  21}
  22
  23static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift)
  24{
  25        if (shift) {
  26                if (shift < 64)
  27                        val = val >> shift | ((val << (64 - shift)) != 0);
  28                else
  29                        val = val != 0;
  30        }
  31        return val;
  32}
  33
  34static inline u32 vfp_hi64to32jamming(u64 val)
  35{
  36        u32 v;
  37
  38        asm(
  39        "cmp    %Q1, #1         @ vfp_hi64to32jamming\n\t"
  40        "movcc  %0, %R1\n\t"
  41        "orrcs  %0, %R1, #1"
  42        : "=r" (v) : "r" (val) : "cc");
  43
  44        return v;
  45}
  46
  47static inline void add128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml)
  48{
  49        asm(    "adds   %Q0, %Q2, %Q4\n\t"
  50                "adcs   %R0, %R2, %R4\n\t"
  51                "adcs   %Q1, %Q3, %Q5\n\t"
  52                "adc    %R1, %R3, %R5"
  53            : "=r" (nl), "=r" (nh)
  54            : "0" (nl), "1" (nh), "r" (ml), "r" (mh)
  55            : "cc");
  56        *resh = nh;
  57        *resl = nl;
  58}
  59
  60static inline void sub128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml)
  61{
  62        asm(    "subs   %Q0, %Q2, %Q4\n\t"
  63                "sbcs   %R0, %R2, %R4\n\t"
  64                "sbcs   %Q1, %Q3, %Q5\n\t"
  65                "sbc    %R1, %R3, %R5\n\t"
  66            : "=r" (nl), "=r" (nh)
  67            : "0" (nl), "1" (nh), "r" (ml), "r" (mh)
  68            : "cc");
  69        *resh = nh;
  70        *resl = nl;
  71}
  72
  73static inline void mul64to128(u64 *resh, u64 *resl, u64 n, u64 m)
  74{
  75        u32 nh, nl, mh, ml;
  76        u64 rh, rma, rmb, rl;
  77
  78        nl = n;
  79        ml = m;
  80        rl = (u64)nl * ml;
  81
  82        nh = n >> 32;
  83        rma = (u64)nh * ml;
  84
  85        mh = m >> 32;
  86        rmb = (u64)nl * mh;
  87        rma += rmb;
  88
  89        rh = (u64)nh * mh;
  90        rh += ((u64)(rma < rmb) << 32) + (rma >> 32);
  91
  92        rma <<= 32;
  93        rl += rma;
  94        rh += (rl < rma);
  95
  96        *resl = rl;
  97        *resh = rh;
  98}
  99
 100static inline void shift64left(u64 *resh, u64 *resl, u64 n)
 101{
 102        *resh = n >> 63;
 103        *resl = n << 1;
 104}
 105
 106static inline u64 vfp_hi64multiply64(u64 n, u64 m)
 107{
 108        u64 rh, rl;
 109        mul64to128(&rh, &rl, n, m);
 110        return rh | (rl != 0);
 111}
 112
 113static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m)
 114{
 115        u64 mh, ml, remh, reml, termh, terml, z;
 116
 117        if (nh >= m)
 118                return ~0ULL;
 119        mh = m >> 32;
 120        if (mh << 32 <= nh) {
 121                z = 0xffffffff00000000ULL;
 122        } else {
 123                z = nh;
 124                do_div(z, mh);
 125                z <<= 32;
 126        }
 127        mul64to128(&termh, &terml, m, z);
 128        sub128(&remh, &reml, nh, nl, termh, terml);
 129        ml = m << 32;
 130        while ((s64)remh < 0) {
 131                z -= 0x100000000ULL;
 132                add128(&remh, &reml, remh, reml, mh, ml);
 133        }
 134        remh = (remh << 32) | (reml >> 32);
 135        if (mh << 32 <= remh) {
 136                z |= 0xffffffff;
 137        } else {
 138                do_div(remh, mh);
 139                z |= remh;
 140        }
 141        return z;
 142}
 143
 144/*
 145 * Operations on unpacked elements
 146 */
 147#define vfp_sign_negate(sign)   (sign ^ 0x8000)
 148
 149/*
 150 * Single-precision
 151 */
 152struct vfp_single {
 153        s16     exponent;
 154        u16     sign;
 155        u32     significand;
 156};
 157
 158extern s32 vfp_get_float(unsigned int reg);
 159extern void vfp_put_float(s32 val, unsigned int reg);
 160
 161/*
 162 * VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa
 163 * VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent
 164 * VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand
 165 *  which are not propagated to the float upon packing.
 166 */
 167#define VFP_SINGLE_MANTISSA_BITS        (23)
 168#define VFP_SINGLE_EXPONENT_BITS        (8)
 169#define VFP_SINGLE_LOW_BITS             (32 - VFP_SINGLE_MANTISSA_BITS - 2)
 170#define VFP_SINGLE_LOW_BITS_MASK        ((1 << VFP_SINGLE_LOW_BITS) - 1)
 171
 172/*
 173 * The bit in an unpacked float which indicates that it is a quiet NaN
 174 */
 175#define VFP_SINGLE_SIGNIFICAND_QNAN     (1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS))
 176
 177/*
 178 * Operations on packed single-precision numbers
 179 */
 180#define vfp_single_packed_sign(v)       ((v) & 0x80000000)
 181#define vfp_single_packed_negate(v)     ((v) ^ 0x80000000)
 182#define vfp_single_packed_abs(v)        ((v) & ~0x80000000)
 183#define vfp_single_packed_exponent(v)   (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1))
 184#define vfp_single_packed_mantissa(v)   ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1))
 185
 186/*
 187 * Unpack a single-precision float.  Note that this returns the magnitude
 188 * of the single-precision float mantissa with the 1. if necessary,
 189 * aligned to bit 30.
 190 */
 191static inline void vfp_single_unpack(struct vfp_single *s, s32 val)
 192{
 193        u32 significand;
 194
 195        s->sign = vfp_single_packed_sign(val) >> 16,
 196        s->exponent = vfp_single_packed_exponent(val);
 197
 198        significand = (u32) val;
 199        significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2;
 200        if (s->exponent && s->exponent != 255)
 201                significand |= 0x40000000;
 202        s->significand = significand;
 203}
 204
 205/*
 206 * Re-pack a single-precision float.  This assumes that the float is
 207 * already normalised such that the MSB is bit 30, _not_ bit 31.
 208 */
 209static inline s32 vfp_single_pack(struct vfp_single *s)
 210{
 211        u32 val;
 212        val = (s->sign << 16) +
 213              (s->exponent << VFP_SINGLE_MANTISSA_BITS) +
 214              (s->significand >> VFP_SINGLE_LOW_BITS);
 215        return (s32)val;
 216}
 217
 218#define VFP_NUMBER              (1<<0)
 219#define VFP_ZERO                (1<<1)
 220#define VFP_DENORMAL            (1<<2)
 221#define VFP_INFINITY            (1<<3)
 222#define VFP_NAN                 (1<<4)
 223#define VFP_NAN_SIGNAL          (1<<5)
 224
 225#define VFP_QNAN                (VFP_NAN)
 226#define VFP_SNAN                (VFP_NAN|VFP_NAN_SIGNAL)
 227
 228static inline int vfp_single_type(struct vfp_single *s)
 229{
 230        int type = VFP_NUMBER;
 231        if (s->exponent == 255) {
 232                if (s->significand == 0)
 233                        type = VFP_INFINITY;
 234                else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN)
 235                        type = VFP_QNAN;
 236                else
 237                        type = VFP_SNAN;
 238        } else if (s->exponent == 0) {
 239                if (s->significand == 0)
 240                        type |= VFP_ZERO;
 241                else
 242                        type |= VFP_DENORMAL;
 243        }
 244        return type;
 245}
 246
 247#ifndef DEBUG
 248#define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
 249u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions);
 250#else
 251u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func);
 252#endif
 253
 254/*
 255 * Double-precision
 256 */
 257struct vfp_double {
 258        s16     exponent;
 259        u16     sign;
 260        u64     significand;
 261};
 262
 263/*
 264 * VFP_REG_ZERO is a special register number for vfp_get_double
 265 * which returns (double)0.0.  This is useful for the compare with
 266 * zero instructions.
 267 */
 268#ifdef CONFIG_VFPv3
 269#define VFP_REG_ZERO    32
 270#else
 271#define VFP_REG_ZERO    16
 272#endif
 273extern u64 vfp_get_double(unsigned int reg);
 274extern void vfp_put_double(u64 val, unsigned int reg);
 275
 276#define VFP_DOUBLE_MANTISSA_BITS        (52)
 277#define VFP_DOUBLE_EXPONENT_BITS        (11)
 278#define VFP_DOUBLE_LOW_BITS             (64 - VFP_DOUBLE_MANTISSA_BITS - 2)
 279#define VFP_DOUBLE_LOW_BITS_MASK        ((1 << VFP_DOUBLE_LOW_BITS) - 1)
 280
 281/*
 282 * The bit in an unpacked double which indicates that it is a quiet NaN
 283 */
 284#define VFP_DOUBLE_SIGNIFICAND_QNAN     (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS))
 285
 286/*
 287 * Operations on packed single-precision numbers
 288 */
 289#define vfp_double_packed_sign(v)       ((v) & (1ULL << 63))
 290#define vfp_double_packed_negate(v)     ((v) ^ (1ULL << 63))
 291#define vfp_double_packed_abs(v)        ((v) & ~(1ULL << 63))
 292#define vfp_double_packed_exponent(v)   (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1))
 293#define vfp_double_packed_mantissa(v)   ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1))
 294
 295/*
 296 * Unpack a double-precision float.  Note that this returns the magnitude
 297 * of the double-precision float mantissa with the 1. if necessary,
 298 * aligned to bit 62.
 299 */
 300static inline void vfp_double_unpack(struct vfp_double *s, s64 val)
 301{
 302        u64 significand;
 303
 304        s->sign = vfp_double_packed_sign(val) >> 48;
 305        s->exponent = vfp_double_packed_exponent(val);
 306
 307        significand = (u64) val;
 308        significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2;
 309        if (s->exponent && s->exponent != 2047)
 310                significand |= (1ULL << 62);
 311        s->significand = significand;
 312}
 313
 314/*
 315 * Re-pack a double-precision float.  This assumes that the float is
 316 * already normalised such that the MSB is bit 30, _not_ bit 31.
 317 */
 318static inline s64 vfp_double_pack(struct vfp_double *s)
 319{
 320        u64 val;
 321        val = ((u64)s->sign << 48) +
 322              ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) +
 323              (s->significand >> VFP_DOUBLE_LOW_BITS);
 324        return (s64)val;
 325}
 326
 327static inline int vfp_double_type(struct vfp_double *s)
 328{
 329        int type = VFP_NUMBER;
 330        if (s->exponent == 2047) {
 331                if (s->significand == 0)
 332                        type = VFP_INFINITY;
 333                else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN)
 334                        type = VFP_QNAN;
 335                else
 336                        type = VFP_SNAN;
 337        } else if (s->exponent == 0) {
 338                if (s->significand == 0)
 339                        type |= VFP_ZERO;
 340                else
 341                        type |= VFP_DENORMAL;
 342        }
 343        return type;
 344}
 345
 346u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func);
 347
 348u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand);
 349
 350/*
 351 * A special flag to tell the normalisation code not to normalise.
 352 */
 353#define VFP_NAN_FLAG    0x100
 354
 355/*
 356 * A bit pattern used to indicate the initial (unset) value of the
 357 * exception mask, in case nothing handles an instruction.  This
 358 * doesn't include the NAN flag, which get masked out before
 359 * we check for an error.
 360 */
 361#define VFP_EXCEPTION_ERROR     ((u32)-1 & ~VFP_NAN_FLAG)
 362
 363/*
 364 * A flag to tell vfp instruction type.
 365 *  OP_SCALAR - this operation always operates in scalar mode
 366 *  OP_SD - the instruction exceptionally writes to a single precision result.
 367 *  OP_DD - the instruction exceptionally writes to a double precision result.
 368 *  OP_SM - the instruction exceptionally reads from a single precision operand.
 369 */
 370#define OP_SCALAR       (1 << 0)
 371#define OP_SD           (1 << 1)
 372#define OP_DD           (1 << 1)
 373#define OP_SM           (1 << 2)
 374
 375struct op {
 376        u32 (* const fn)(int dd, int dn, int dm, u32 fpscr);
 377        u32 flags;
 378};
 379
 380extern void vfp_save_state(void *location, u32 fpexc);
 381