linux/scripts/kallsyms.c
<<
>>
Prefs
   1/* Generate assembler source containing symbol information
   2 *
   3 * Copyright 2002       by Kai Germaschewski
   4 *
   5 * This software may be used and distributed according to the terms
   6 * of the GNU General Public License, incorporated herein by reference.
   7 *
   8 * Usage: nm -n vmlinux | scripts/kallsyms [--all-symbols] > symbols.S
   9 *
  10 * ChangeLog:
  11 *
  12 * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
  13 *      Changed the compression method from stem compression to "table lookup"
  14 *      compression
  15 *
  16 *      Table compression uses all the unused char codes on the symbols and
  17 *  maps these to the most used substrings (tokens). For instance, it might
  18 *  map char code 0xF7 to represent "write_" and then in every symbol where
  19 *  "write_" appears it can be replaced by 0xF7, saving 5 bytes.
  20 *      The used codes themselves are also placed in the table so that the
  21 *  decompresion can work without "special cases".
  22 *      Applied to kernel symbols, this usually produces a compression ratio
  23 *  of about 50%.
  24 *
  25 */
  26
  27#include <stdio.h>
  28#include <stdlib.h>
  29#include <string.h>
  30#include <ctype.h>
  31
  32#define KSYM_NAME_LEN           128
  33
  34
  35struct sym_entry {
  36        unsigned long long addr;
  37        unsigned int len;
  38        unsigned char *sym;
  39};
  40
  41
  42static struct sym_entry *table;
  43static unsigned int table_size, table_cnt;
  44static unsigned long long _text, _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext;
  45static int all_symbols = 0;
  46static char symbol_prefix_char = '\0';
  47
  48int token_profit[0x10000];
  49
  50/* the table that holds the result of the compression */
  51unsigned char best_table[256][2];
  52unsigned char best_table_len[256];
  53
  54
  55static void usage(void)
  56{
  57        fprintf(stderr, "Usage: kallsyms [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n");
  58        exit(1);
  59}
  60
  61/*
  62 * This ignores the intensely annoying "mapping symbols" found
  63 * in ARM ELF files: $a, $t and $d.
  64 */
  65static inline int is_arm_mapping_symbol(const char *str)
  66{
  67        return str[0] == '$' && strchr("atd", str[1])
  68               && (str[2] == '\0' || str[2] == '.');
  69}
  70
  71static int read_symbol(FILE *in, struct sym_entry *s)
  72{
  73        char str[500];
  74        char *sym, stype;
  75        int rc;
  76
  77        rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, str);
  78        if (rc != 3) {
  79                if (rc != EOF) {
  80                        /* skip line */
  81                        fgets(str, 500, in);
  82                }
  83                return -1;
  84        }
  85
  86        sym = str;
  87        /* skip prefix char */
  88        if (symbol_prefix_char && str[0] == symbol_prefix_char)
  89                sym++;
  90
  91        /* Ignore most absolute/undefined (?) symbols. */
  92        if (strcmp(sym, "_text") == 0)
  93                _text = s->addr;
  94        else if (strcmp(sym, "_stext") == 0)
  95                _stext = s->addr;
  96        else if (strcmp(sym, "_etext") == 0)
  97                _etext = s->addr;
  98        else if (strcmp(sym, "_sinittext") == 0)
  99                _sinittext = s->addr;
 100        else if (strcmp(sym, "_einittext") == 0)
 101                _einittext = s->addr;
 102        else if (strcmp(sym, "_sextratext") == 0)
 103                _sextratext = s->addr;
 104        else if (strcmp(sym, "_eextratext") == 0)
 105                _eextratext = s->addr;
 106        else if (toupper(stype) == 'A')
 107        {
 108                /* Keep these useful absolute symbols */
 109                if (strcmp(sym, "__kernel_syscall_via_break") &&
 110                    strcmp(sym, "__kernel_syscall_via_epc") &&
 111                    strcmp(sym, "__kernel_sigtramp") &&
 112                    strcmp(sym, "__gp"))
 113                        return -1;
 114
 115        }
 116        else if (toupper(stype) == 'U' ||
 117                 is_arm_mapping_symbol(sym))
 118                return -1;
 119        /* exclude also MIPS ELF local symbols ($L123 instead of .L123) */
 120        else if (str[0] == '$')
 121                return -1;
 122
 123        /* include the type field in the symbol name, so that it gets
 124         * compressed together */
 125        s->len = strlen(str) + 1;
 126        s->sym = malloc(s->len + 1);
 127        if (!s->sym) {
 128                fprintf(stderr, "kallsyms failure: "
 129                        "unable to allocate required amount of memory\n");
 130                exit(EXIT_FAILURE);
 131        }
 132        strcpy((char *)s->sym + 1, str);
 133        s->sym[0] = stype;
 134
 135        return 0;
 136}
 137
 138static int symbol_valid(struct sym_entry *s)
 139{
 140        /* Symbols which vary between passes.  Passes 1 and 2 must have
 141         * identical symbol lists.  The kallsyms_* symbols below are only added
 142         * after pass 1, they would be included in pass 2 when --all-symbols is
 143         * specified so exclude them to get a stable symbol list.
 144         */
 145        static char *special_symbols[] = {
 146                "kallsyms_addresses",
 147                "kallsyms_num_syms",
 148                "kallsyms_names",
 149                "kallsyms_markers",
 150                "kallsyms_token_table",
 151                "kallsyms_token_index",
 152
 153        /* Exclude linker generated symbols which vary between passes */
 154                "_SDA_BASE_",           /* ppc */
 155                "_SDA2_BASE_",          /* ppc */
 156                NULL };
 157        int i;
 158        int offset = 1;
 159
 160        /* skip prefix char */
 161        if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char)
 162                offset++;
 163
 164        /* if --all-symbols is not specified, then symbols outside the text
 165         * and inittext sections are discarded */
 166        if (!all_symbols) {
 167                if ((s->addr < _stext || s->addr > _etext)
 168                    && (s->addr < _sinittext || s->addr > _einittext)
 169                    && (s->addr < _sextratext || s->addr > _eextratext))
 170                        return 0;
 171                /* Corner case.  Discard any symbols with the same value as
 172                 * _etext _einittext or _eextratext; they can move between pass
 173                 * 1 and 2 when the kallsyms data are added.  If these symbols
 174                 * move then they may get dropped in pass 2, which breaks the
 175                 * kallsyms rules.
 176                 */
 177                if ((s->addr == _etext && strcmp((char*)s->sym + offset, "_etext")) ||
 178                    (s->addr == _einittext && strcmp((char*)s->sym + offset, "_einittext")) ||
 179                    (s->addr == _eextratext && strcmp((char*)s->sym + offset, "_eextratext")))
 180                        return 0;
 181        }
 182
 183        /* Exclude symbols which vary between passes. */
 184        if (strstr((char *)s->sym + offset, "_compiled."))
 185                return 0;
 186
 187        for (i = 0; special_symbols[i]; i++)
 188                if( strcmp((char *)s->sym + offset, special_symbols[i]) == 0 )
 189                        return 0;
 190
 191        return 1;
 192}
 193
 194static void read_map(FILE *in)
 195{
 196        while (!feof(in)) {
 197                if (table_cnt >= table_size) {
 198                        table_size += 10000;
 199                        table = realloc(table, sizeof(*table) * table_size);
 200                        if (!table) {
 201                                fprintf(stderr, "out of memory\n");
 202                                exit (1);
 203                        }
 204                }
 205                if (read_symbol(in, &table[table_cnt]) == 0)
 206                        table_cnt++;
 207        }
 208}
 209
 210static void output_label(char *label)
 211{
 212        if (symbol_prefix_char)
 213                printf(".globl %c%s\n", symbol_prefix_char, label);
 214        else
 215                printf(".globl %s\n", label);
 216        printf("\tALGN\n");
 217        if (symbol_prefix_char)
 218                printf("%c%s:\n", symbol_prefix_char, label);
 219        else
 220                printf("%s:\n", label);
 221}
 222
 223/* uncompress a compressed symbol. When this function is called, the best table
 224 * might still be compressed itself, so the function needs to be recursive */
 225static int expand_symbol(unsigned char *data, int len, char *result)
 226{
 227        int c, rlen, total=0;
 228
 229        while (len) {
 230                c = *data;
 231                /* if the table holds a single char that is the same as the one
 232                 * we are looking for, then end the search */
 233                if (best_table[c][0]==c && best_table_len[c]==1) {
 234                        *result++ = c;
 235                        total++;
 236                } else {
 237                        /* if not, recurse and expand */
 238                        rlen = expand_symbol(best_table[c], best_table_len[c], result);
 239                        total += rlen;
 240                        result += rlen;
 241                }
 242                data++;
 243                len--;
 244        }
 245        *result=0;
 246
 247        return total;
 248}
 249
 250static void write_src(void)
 251{
 252        unsigned int i, k, off;
 253        unsigned int best_idx[256];
 254        unsigned int *markers;
 255        char buf[KSYM_NAME_LEN];
 256
 257        printf("#include <asm/types.h>\n");
 258        printf("#if BITS_PER_LONG == 64\n");
 259        printf("#define PTR .quad\n");
 260        printf("#define ALGN .align 8\n");
 261        printf("#else\n");
 262        printf("#define PTR .long\n");
 263        printf("#define ALGN .align 4\n");
 264        printf("#endif\n");
 265
 266        printf("\t.section .rodata, \"a\"\n");
 267
 268        /* Provide proper symbols relocatability by their '_text'
 269         * relativeness.  The symbol names cannot be used to construct
 270         * normal symbol references as the list of symbols contains
 271         * symbols that are declared static and are private to their
 272         * .o files.  This prevents .tmp_kallsyms.o or any other
 273         * object from referencing them.
 274         */
 275        output_label("kallsyms_addresses");
 276        for (i = 0; i < table_cnt; i++) {
 277                if (toupper(table[i].sym[0]) != 'A') {
 278                        if (_text <= table[i].addr)
 279                                printf("\tPTR\t_text + %#llx\n",
 280                                        table[i].addr - _text);
 281                        else
 282                                printf("\tPTR\t_text - %#llx\n",
 283                                        _text - table[i].addr);
 284                } else {
 285                        printf("\tPTR\t%#llx\n", table[i].addr);
 286                }
 287        }
 288        printf("\n");
 289
 290        output_label("kallsyms_num_syms");
 291        printf("\tPTR\t%d\n", table_cnt);
 292        printf("\n");
 293
 294        /* table of offset markers, that give the offset in the compressed stream
 295         * every 256 symbols */
 296        markers = malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256));
 297        if (!markers) {
 298                fprintf(stderr, "kallsyms failure: "
 299                        "unable to allocate required memory\n");
 300                exit(EXIT_FAILURE);
 301        }
 302
 303        output_label("kallsyms_names");
 304        off = 0;
 305        for (i = 0; i < table_cnt; i++) {
 306                if ((i & 0xFF) == 0)
 307                        markers[i >> 8] = off;
 308
 309                printf("\t.byte 0x%02x", table[i].len);
 310                for (k = 0; k < table[i].len; k++)
 311                        printf(", 0x%02x", table[i].sym[k]);
 312                printf("\n");
 313
 314                off += table[i].len + 1;
 315        }
 316        printf("\n");
 317
 318        output_label("kallsyms_markers");
 319        for (i = 0; i < ((table_cnt + 255) >> 8); i++)
 320                printf("\tPTR\t%d\n", markers[i]);
 321        printf("\n");
 322
 323        free(markers);
 324
 325        output_label("kallsyms_token_table");
 326        off = 0;
 327        for (i = 0; i < 256; i++) {
 328                best_idx[i] = off;
 329                expand_symbol(best_table[i], best_table_len[i], buf);
 330                printf("\t.asciz\t\"%s\"\n", buf);
 331                off += strlen(buf) + 1;
 332        }
 333        printf("\n");
 334
 335        output_label("kallsyms_token_index");
 336        for (i = 0; i < 256; i++)
 337                printf("\t.short\t%d\n", best_idx[i]);
 338        printf("\n");
 339}
 340
 341
 342/* table lookup compression functions */
 343
 344/* count all the possible tokens in a symbol */
 345static void learn_symbol(unsigned char *symbol, int len)
 346{
 347        int i;
 348
 349        for (i = 0; i < len - 1; i++)
 350                token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++;
 351}
 352
 353/* decrease the count for all the possible tokens in a symbol */
 354static void forget_symbol(unsigned char *symbol, int len)
 355{
 356        int i;
 357
 358        for (i = 0; i < len - 1; i++)
 359                token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--;
 360}
 361
 362/* remove all the invalid symbols from the table and do the initial token count */
 363static void build_initial_tok_table(void)
 364{
 365        unsigned int i, pos;
 366
 367        pos = 0;
 368        for (i = 0; i < table_cnt; i++) {
 369                if ( symbol_valid(&table[i]) ) {
 370                        if (pos != i)
 371                                table[pos] = table[i];
 372                        learn_symbol(table[pos].sym, table[pos].len);
 373                        pos++;
 374                }
 375        }
 376        table_cnt = pos;
 377}
 378
 379static void *find_token(unsigned char *str, int len, unsigned char *token)
 380{
 381        int i;
 382
 383        for (i = 0; i < len - 1; i++) {
 384                if (str[i] == token[0] && str[i+1] == token[1])
 385                        return &str[i];
 386        }
 387        return NULL;
 388}
 389
 390/* replace a given token in all the valid symbols. Use the sampled symbols
 391 * to update the counts */
 392static void compress_symbols(unsigned char *str, int idx)
 393{
 394        unsigned int i, len, size;
 395        unsigned char *p1, *p2;
 396
 397        for (i = 0; i < table_cnt; i++) {
 398
 399                len = table[i].len;
 400                p1 = table[i].sym;
 401
 402                /* find the token on the symbol */
 403                p2 = find_token(p1, len, str);
 404                if (!p2) continue;
 405
 406                /* decrease the counts for this symbol's tokens */
 407                forget_symbol(table[i].sym, len);
 408
 409                size = len;
 410
 411                do {
 412                        *p2 = idx;
 413                        p2++;
 414                        size -= (p2 - p1);
 415                        memmove(p2, p2 + 1, size);
 416                        p1 = p2;
 417                        len--;
 418
 419                        if (size < 2) break;
 420
 421                        /* find the token on the symbol */
 422                        p2 = find_token(p1, size, str);
 423
 424                } while (p2);
 425
 426                table[i].len = len;
 427
 428                /* increase the counts for this symbol's new tokens */
 429                learn_symbol(table[i].sym, len);
 430        }
 431}
 432
 433/* search the token with the maximum profit */
 434static int find_best_token(void)
 435{
 436        int i, best, bestprofit;
 437
 438        bestprofit=-10000;
 439        best = 0;
 440
 441        for (i = 0; i < 0x10000; i++) {
 442                if (token_profit[i] > bestprofit) {
 443                        best = i;
 444                        bestprofit = token_profit[i];
 445                }
 446        }
 447        return best;
 448}
 449
 450/* this is the core of the algorithm: calculate the "best" table */
 451static void optimize_result(void)
 452{
 453        int i, best;
 454
 455        /* using the '\0' symbol last allows compress_symbols to use standard
 456         * fast string functions */
 457        for (i = 255; i >= 0; i--) {
 458
 459                /* if this table slot is empty (it is not used by an actual
 460                 * original char code */
 461                if (!best_table_len[i]) {
 462
 463                        /* find the token with the breates profit value */
 464                        best = find_best_token();
 465
 466                        /* place it in the "best" table */
 467                        best_table_len[i] = 2;
 468                        best_table[i][0] = best & 0xFF;
 469                        best_table[i][1] = (best >> 8) & 0xFF;
 470
 471                        /* replace this token in all the valid symbols */
 472                        compress_symbols(best_table[i], i);
 473                }
 474        }
 475}
 476
 477/* start by placing the symbols that are actually used on the table */
 478static void insert_real_symbols_in_table(void)
 479{
 480        unsigned int i, j, c;
 481
 482        memset(best_table, 0, sizeof(best_table));
 483        memset(best_table_len, 0, sizeof(best_table_len));
 484
 485        for (i = 0; i < table_cnt; i++) {
 486                for (j = 0; j < table[i].len; j++) {
 487                        c = table[i].sym[j];
 488                        best_table[c][0]=c;
 489                        best_table_len[c]=1;
 490                }
 491        }
 492}
 493
 494static void optimize_token_table(void)
 495{
 496        build_initial_tok_table();
 497
 498        insert_real_symbols_in_table();
 499
 500        /* When valid symbol is not registered, exit to error */
 501        if (!table_cnt) {
 502                fprintf(stderr, "No valid symbol.\n");
 503                exit(1);
 504        }
 505
 506        optimize_result();
 507}
 508
 509
 510int main(int argc, char **argv)
 511{
 512        if (argc >= 2) {
 513                int i;
 514                for (i = 1; i < argc; i++) {
 515                        if(strcmp(argv[i], "--all-symbols") == 0)
 516                                all_symbols = 1;
 517                        else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) {
 518                                char *p = &argv[i][16];
 519                                /* skip quote */
 520                                if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\''))
 521                                        p++;
 522                                symbol_prefix_char = *p;
 523                        } else
 524                                usage();
 525                }
 526        } else if (argc != 1)
 527                usage();
 528
 529        read_map(stdin);
 530        optimize_token_table();
 531        write_src();
 532
 533        return 0;
 534}
 535