linux/arch/ia64/lib/idiv32.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * Copyright (C) 2000 Hewlett-Packard Co
   4 * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
   5 *
   6 * 32-bit integer division.
   7 *
   8 * This code is based on the application note entitled "Divide, Square Root
   9 * and Remainder Algorithms for the IA-64 Architecture".  This document
  10 * is available as Intel document number 248725-002 or via the web at
  11 * http://developer.intel.com/software/opensource/numerics/
  12 *
  13 * For more details on the theory behind these algorithms, see "IA-64
  14 * and Elementary Functions" by Peter Markstein; HP Professional Books
  15 * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions)
  16 */
  17
  18#include <asm/asmmacro.h>
  19#include <asm/export.h>
  20
  21#ifdef MODULO
  22# define OP     mod
  23#else
  24# define OP     div
  25#endif
  26
  27#ifdef UNSIGNED
  28# define SGN    u
  29# define EXTEND zxt4
  30# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b
  31# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b
  32#else
  33# define SGN
  34# define EXTEND sxt4
  35# define INT_TO_FP(a,b) fcvt.xf a=b
  36# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b
  37#endif
  38
  39#define PASTE1(a,b)     a##b
  40#define PASTE(a,b)      PASTE1(a,b)
  41#define NAME            PASTE(PASTE(__,SGN),PASTE(OP,si3))
  42
  43GLOBAL_ENTRY(NAME)
  44        .regstk 2,0,0,0
  45        // Transfer inputs to FP registers.
  46        mov r2 = 0xffdd                 // r2 = -34 + 65535 (fp reg format bias)
  47        EXTEND in0 = in0                // in0 = a
  48        EXTEND in1 = in1                // in1 = b
  49        ;;
  50        setf.sig f8 = in0
  51        setf.sig f9 = in1
  52#ifdef MODULO
  53        sub in1 = r0, in1               // in1 = -b
  54#endif
  55        ;;
  56        // Convert the inputs to FP, to avoid FP software-assist faults.
  57        INT_TO_FP(f8, f8)
  58        INT_TO_FP(f9, f9)
  59        ;;
  60        setf.exp f7 = r2                // f7 = 2^-34
  61        frcpa.s1 f6, p6 = f8, f9        // y0 = frcpa(b)
  62        ;;
  63(p6)    fmpy.s1 f8 = f8, f6             // q0 = a*y0
  64(p6)    fnma.s1 f6 = f9, f6, f1         // e0 = -b*y0 + 1 
  65        ;;
  66#ifdef MODULO
  67        setf.sig f9 = in1               // f9 = -b
  68#endif
  69(p6)    fma.s1 f8 = f6, f8, f8          // q1 = e0*q0 + q0
  70(p6)    fma.s1 f6 = f6, f6, f7          // e1 = e0*e0 + 2^-34
  71        ;;
  72#ifdef MODULO
  73        setf.sig f7 = in0
  74#endif
  75(p6)    fma.s1 f6 = f6, f8, f8          // q2 = e1*q1 + q1
  76        ;;
  77        FP_TO_INT(f6, f6)               // q = trunc(q2)
  78        ;;
  79#ifdef MODULO
  80        xma.l f6 = f6, f9, f7           // r = q*(-b) + a
  81        ;;
  82#endif
  83        getf.sig r8 = f6                // transfer result to result register
  84        br.ret.sptk.many rp
  85END(NAME)
  86EXPORT_SYMBOL(NAME)
  87