busybox/coreutils/factor.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2017 Denys Vlasenko <vda.linux@googlemail.com>
   3 *
   4 * Licensed under GPLv2, see file LICENSE in this source tree.
   5 */
   6//config:config FACTOR
   7//config:       bool "factor (2.7 kb)"
   8//config:       default y
   9//config:       help
  10//config:       factor factorizes integers
  11
  12//applet:IF_FACTOR(APPLET(factor, BB_DIR_USR_BIN, BB_SUID_DROP))
  13
  14//kbuild:lib-$(CONFIG_FACTOR) += factor.o
  15
  16//usage:#define factor_trivial_usage
  17//usage:       "[NUMBER]..."
  18//usage:#define factor_full_usage "\n\n"
  19//usage:       "Print prime factors"
  20
  21#include "libbb.h"
  22#include "common_bufsiz.h"
  23
  24#if 0
  25# define dbg(...) bb_error_msg(__VA_ARGS__)
  26#else
  27# define dbg(...) ((void)0)
  28#endif
  29
  30typedef unsigned long long wide_t;
  31
  32#if ULLONG_MAX == (UINT_MAX * UINT_MAX + 2 * UINT_MAX)
  33/* "unsigned" is half as wide as ullong */
  34typedef unsigned half_t;
  35#define HALF_MAX UINT_MAX
  36#define HALF_FMT ""
  37#elif ULLONG_MAX == (ULONG_MAX * ULONG_MAX + 2 * ULONG_MAX)
  38/* long is half as wide as ullong */
  39typedef unsigned long half_t;
  40#define HALF_MAX ULONG_MAX
  41#define HALF_FMT "l"
  42#else
  43#error Cant find an integer type which is half as wide as ullong
  44#endif
  45
  46/* The trial divisor increment wheel.  Use it to skip over divisors that
  47 * are composites of 2, 3, 5, 7, or 11.
  48 * Larger wheels improve sieving only slightly, but quickly grow in size
  49 * (adding just one prime, 13, results in 5766 element sieve).
  50 */
  51#define R(a,b,c,d,e,f,g,h,i,j,A,B,C,D,E,F,G,H,I,J) \
  52        (((uint64_t)(a<<0) | (b<<3) | (c<<6) | (d<<9) | (e<<12) | (f<<15) | (g<<18) | (h<<21) | (i<<24) | (j<<27)) << 1) | \
  53        (((uint64_t)(A<<0) | (B<<3) | (C<<6) | (D<<9) | (E<<12) | (F<<15) | (G<<18) | (H<<21) | (I<<24) | (J<<27)) << 31)
  54#define P(a,b,c,d,e,f,g,h,i,j,A,B,C,D,E,F,G,H,I,J) \
  55        R(      (a/2),(b/2),(c/2),(d/2),(e/2),(f/2),(g/2),(h/2),(i/2),(j/2), \
  56                (A/2),(B/2),(C/2),(D/2),(E/2),(F/2),(G/2),(H/2),(I/2),(J/2)  )
  57static const uint64_t packed_wheel[] = {
  58        /*1, 2, 2, 4, 2,*/
  59        P( 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2, 6, 4, 6, 8, 4), //01
  60        P( 2, 4, 2, 4,14, 4, 6, 2,10, 2, 6, 6, 4, 2, 4, 6, 2,10, 2, 4), //02
  61        P( 2,12,10, 2, 4, 2, 4, 6, 2, 6, 4, 6, 6, 6, 2, 6, 4, 2, 6, 4), //03
  62        P( 6, 8, 4, 2, 4, 6, 8, 6,10, 2, 4, 6, 2, 6, 6, 4, 2, 4, 6, 2), //04
  63        P( 6, 4, 2, 6,10, 2,10, 2, 4, 2, 4, 6, 8, 4, 2, 4,12, 2, 6, 4), //05
  64        P( 2, 6, 4, 6,12, 2, 4, 2, 4, 8, 6, 4, 6, 2, 4, 6, 2, 6,10, 2), //06
  65        P( 4, 6, 2, 6, 4, 2, 4, 2,10, 2,10, 2, 4, 6, 6, 2, 6, 6, 4, 6), //07
  66        P( 6, 2, 6, 4, 2, 6, 4, 6, 8, 4, 2, 6, 4, 8, 6, 4, 6, 2, 4, 6), //08
  67        P( 8, 6, 4, 2,10, 2, 6, 4, 2, 4, 2,10, 2,10, 2, 4, 2, 4, 8, 6), //09
  68        P( 4, 2, 4, 6, 6, 2, 6, 4, 8, 4, 6, 8, 4, 2, 4, 2, 4, 8, 6, 4), //10
  69        P( 6, 6, 6, 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4, 2,10, 2,10, 2), //11
  70        P( 6, 4, 6, 2, 6, 4, 2, 4, 6, 6, 8, 4, 2, 6,10, 8, 4, 2, 4, 2), //12
  71        P( 4, 8,10, 6, 2, 4, 8, 6, 6, 4, 2, 4, 6, 2, 6, 4, 6, 2,10, 2), //13
  72        P(10, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 6, 6, 4, 6, 8), //14
  73        P( 4, 2, 4, 2, 4, 8, 6, 4, 8, 4, 6, 2, 6, 6, 4, 2, 4, 6, 8, 4), //15
  74        P( 2, 4, 2,10, 2,10, 2, 4, 2, 4, 6, 2,10, 2, 4, 6, 8, 6, 4, 2), //16
  75        P( 6, 4, 6, 8, 4, 6, 2, 4, 8, 6, 4, 6, 2, 4, 6, 2, 6, 6, 4, 6), //17
  76        P( 6, 2, 6, 6, 4, 2,10, 2,10, 2, 4, 2, 4, 6, 2, 6, 4, 2,10, 6), //18
  77        P( 2, 6, 4, 2, 6, 4, 6, 8, 4, 2, 4, 2,12, 6, 4, 6, 2, 4, 6, 2), //19
  78        P(12, 4, 2, 4, 8, 6, 4, 2, 4, 2,10, 2,10, 6, 2, 4, 6, 2, 6, 4), //20
  79        P( 2, 4, 6, 6, 2, 6, 4, 2,10, 6, 8, 6, 4, 2, 4, 8, 6, 4, 6, 2), //21
  80        P( 4, 6, 2, 6, 6, 6, 4, 6, 2, 6, 4, 2, 4, 2,10,12, 2, 4, 2,10), //22
  81        P( 2, 6, 4, 2, 4, 6, 6, 2,10, 2, 6, 4,14, 4, 2, 4, 2, 4, 8, 6), //23
  82        P( 4, 6, 2, 4, 6, 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4,12, 2,12), //24
  83};
  84#undef P
  85#undef R
  86#define WHEEL_START 5
  87#define WHEEL_SIZE (5 + 24 * 20)
  88#define square_count (((uint8_t*)&bb_common_bufsiz1)[0])
  89#define wheel_tab    (((uint8_t*)&bb_common_bufsiz1) + 1)
  90/*
  91 * Why, you ask?
  92 * plain byte array:
  93 *      function                old     new   delta
  94 *      wheel_tab                 -     485    +485
  95 * 3-bit-packed insanity:
  96 *      packed_wheel              -     192    +192
  97 *      factor_main             108     171     +63
  98 */
  99static void unpack_wheel(void)
 100{
 101        int i;
 102        uint8_t *p;
 103
 104        setup_common_bufsiz();
 105        wheel_tab[0] = 1;
 106        wheel_tab[1] = 2;
 107        wheel_tab[2] = 2;
 108        wheel_tab[3] = 4;
 109        wheel_tab[4] = 2;
 110        p = &wheel_tab[5];
 111        for (i = 0; i < ARRAY_SIZE(packed_wheel); i++) {
 112                uint64_t v = packed_wheel[i];
 113                while ((v & 0xe) != 0) {
 114                        *p = v & 0xe;
 115                        //printf("%2u,", *p);
 116                        p++;
 117                        v >>= 3;
 118                }
 119                //printf("\n");
 120        }
 121}
 122
 123/* Prevent inlining, factorize() needs all help it can get with reducing register pressure */
 124static NOINLINE void print_w(wide_t n)
 125{
 126        unsigned rep = square_count;
 127        do
 128                printf(" %llu", n);
 129        while (--rep != 0);
 130}
 131static NOINLINE void print_h(half_t n)
 132{
 133        print_w(n);
 134}
 135
 136static void factorize(wide_t N);
 137
 138static half_t isqrt_odd(wide_t N)
 139{
 140        half_t s = isqrt(N);
 141        /* s^2 is <= N, (s+1)^2 > N */
 142
 143        /* If s^2 in fact is EQUAL to N, it's very lucky.
 144         * Examples:
 145         * factor 18446743988964486098 = 2 * 3037000493 * 3037000493
 146         * factor 18446743902517389507 = 3 * 2479700513 * 2479700513
 147         */
 148        if ((wide_t)s * s == N) {
 149                /* factorize sqrt(N), printing each factor twice */
 150                square_count *= 2;
 151                factorize(s);
 152                /* Let caller know we recursed */
 153                return 0;
 154        }
 155
 156        /* Subtract 1 from even s, odd s won't change: */
 157        /* (doesnt work for zero, but we know that s != 0 here) */
 158        s = (s - 1) | 1;
 159        return s;
 160}
 161
 162static NOINLINE void factorize(wide_t N)
 163{
 164        unsigned w;
 165        half_t factor;
 166        half_t max_factor;
 167
 168        if (N < 4)
 169                goto end;
 170
 171        /* The code needs to be optimized for the case where
 172         * there are large prime factors. For example,
 173         * this is not hard:
 174         * 8262075252869367027 = 3 7 17 23 47 101 113 127 131 137 823
 175         *  (the largest divisor to test for largest factor 823
 176         *  is only ~sqrt(823) = 28, the entire factorization needs
 177         *  only ~33 trial divisions)
 178         * but this is:
 179         * 18446744073709551601 = 53 348051774975651917
 180         * the last factor requires testing up to
 181         * 589959129 - about 100 million iterations.
 182         * The slowest case (largest prime) for N < 2^64 is
 183         * factor 18446744073709551557 (0xffffffffffffffc5).
 184         */
 185        max_factor = isqrt_odd(N);
 186        if (!max_factor)
 187                return; /* square was detected and recursively factored */
 188        factor = 2;
 189        w = 0;
 190        for (;;) {
 191                half_t fw;
 192
 193                /* The division is the most costly part of the loop.
 194                 * On 64bit CPUs, takes at best 12 cycles, often ~20.
 195                 */
 196                while ((N % factor) == 0) { /* not likely */
 197                        N = N / factor;
 198                        print_h(factor);
 199                        max_factor = isqrt_odd(N);
 200                        if (!max_factor)
 201                                return; /* square was detected */
 202                }
 203                if (factor >= max_factor)
 204                        break;
 205                fw = factor + wheel_tab[w];
 206                if (fw < factor)
 207                        break; /* overflow */
 208                factor = fw;
 209                w++;
 210                if (w < WHEEL_SIZE)
 211                        continue;
 212                w = WHEEL_START;
 213        }
 214 end:
 215        if (N > 1)
 216                print_w(N);
 217        bb_putchar('\n');
 218}
 219
 220static void factorize_numstr(const char *numstr)
 221{
 222        wide_t N;
 223
 224        /* Leading + is ok (coreutils compat) */
 225        if (*numstr == '+')
 226                numstr++;
 227        N = bb_strtoull(numstr, NULL, 10);
 228        if (errno)
 229                bb_show_usage();
 230        printf("%llu:", N);
 231        square_count = 1;
 232        factorize(N);
 233}
 234
 235int factor_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
 236int factor_main(int argc UNUSED_PARAM, char **argv)
 237{
 238        unpack_wheel();
 239
 240        //// coreutils has undocumented option ---debug (three dashes)
 241        //getopt32(argv, "");
 242        //argv += optind;
 243        argv++;
 244
 245        if (!*argv) {
 246                /* Read from stdin, several numbers per line are accepted */
 247                for (;;) {
 248                        char *numstr, *line;
 249                        line = xmalloc_fgetline(stdin);
 250                        if (!line)
 251                                return EXIT_SUCCESS;
 252                        numstr = line;
 253                        for (;;) {
 254                                char *end;
 255                                numstr = skip_whitespace(numstr);
 256                                if (!numstr[0])
 257                                        break;
 258                                end = skip_non_whitespace(numstr);
 259                                if (*end != '\0')
 260                                        *end++ = '\0';
 261                                factorize_numstr(numstr);
 262                                numstr = end;
 263                        }
 264                        free(line);
 265                }
 266        }
 267
 268        do {
 269                /* Leading spaces are ok (coreutils compat) */
 270                factorize_numstr(skip_whitespace(*argv));
 271        } while (*++argv);
 272
 273        return EXIT_SUCCESS;
 274}
 275