linux/arch/x86/lib/insn.c
<<
>>
Prefs
   1/*
   2 * x86 instruction analysis
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 *
  18 * Copyright (C) IBM Corporation, 2002, 2004, 2009
  19 */
  20
  21#ifdef __KERNEL__
  22#include <linux/string.h>
  23#else
  24#include <string.h>
  25#endif
  26#include <asm/inat.h>
  27#include <asm/insn.h>
  28
  29/* Verify next sizeof(t) bytes can be on the same instruction */
  30#define validate_next(t, insn, n)       \
  31        ((insn)->next_byte + sizeof(t) + n - (insn)->kaddr <= MAX_INSN_SIZE)
  32
  33#define __get_next(t, insn)     \
  34        ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
  35
  36#define __peek_nbyte_next(t, insn, n)   \
  37        ({ t r = *(t*)((insn)->next_byte + n); r; })
  38
  39#define get_next(t, insn)       \
  40        ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
  41
  42#define peek_nbyte_next(t, insn, n)     \
  43        ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
  44
  45#define peek_next(t, insn)      peek_nbyte_next(t, insn, 0)
  46
  47/**
  48 * insn_init() - initialize struct insn
  49 * @insn:       &struct insn to be initialized
  50 * @kaddr:      address (in kernel memory) of instruction (or copy thereof)
  51 * @x86_64:     !0 for 64-bit kernel or 64-bit app
  52 */
  53void insn_init(struct insn *insn, const void *kaddr, int x86_64)
  54{
  55        memset(insn, 0, sizeof(*insn));
  56        insn->kaddr = kaddr;
  57        insn->next_byte = kaddr;
  58        insn->x86_64 = x86_64 ? 1 : 0;
  59        insn->opnd_bytes = 4;
  60        if (x86_64)
  61                insn->addr_bytes = 8;
  62        else
  63                insn->addr_bytes = 4;
  64}
  65
  66/**
  67 * insn_get_prefixes - scan x86 instruction prefix bytes
  68 * @insn:       &struct insn containing instruction
  69 *
  70 * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
  71 * to point to the (first) opcode.  No effect if @insn->prefixes.got
  72 * is already set.
  73 */
  74void insn_get_prefixes(struct insn *insn)
  75{
  76        struct insn_field *prefixes = &insn->prefixes;
  77        insn_attr_t attr;
  78        insn_byte_t b, lb;
  79        int i, nb;
  80
  81        if (prefixes->got)
  82                return;
  83
  84        nb = 0;
  85        lb = 0;
  86        b = peek_next(insn_byte_t, insn);
  87        attr = inat_get_opcode_attribute(b);
  88        while (inat_is_legacy_prefix(attr)) {
  89                /* Skip if same prefix */
  90                for (i = 0; i < nb; i++)
  91                        if (prefixes->bytes[i] == b)
  92                                goto found;
  93                if (nb == 4)
  94                        /* Invalid instruction */
  95                        break;
  96                prefixes->bytes[nb++] = b;
  97                if (inat_is_address_size_prefix(attr)) {
  98                        /* address size switches 2/4 or 4/8 */
  99                        if (insn->x86_64)
 100                                insn->addr_bytes ^= 12;
 101                        else
 102                                insn->addr_bytes ^= 6;
 103                } else if (inat_is_operand_size_prefix(attr)) {
 104                        /* oprand size switches 2/4 */
 105                        insn->opnd_bytes ^= 6;
 106                }
 107found:
 108                prefixes->nbytes++;
 109                insn->next_byte++;
 110                lb = b;
 111                b = peek_next(insn_byte_t, insn);
 112                attr = inat_get_opcode_attribute(b);
 113        }
 114        /* Set the last prefix */
 115        if (lb && lb != insn->prefixes.bytes[3]) {
 116                if (unlikely(insn->prefixes.bytes[3])) {
 117                        /* Swap the last prefix */
 118                        b = insn->prefixes.bytes[3];
 119                        for (i = 0; i < nb; i++)
 120                                if (prefixes->bytes[i] == lb)
 121                                        prefixes->bytes[i] = b;
 122                }
 123                insn->prefixes.bytes[3] = lb;
 124        }
 125
 126        /* Decode REX prefix */
 127        if (insn->x86_64) {
 128                b = peek_next(insn_byte_t, insn);
 129                attr = inat_get_opcode_attribute(b);
 130                if (inat_is_rex_prefix(attr)) {
 131                        insn->rex_prefix.value = b;
 132                        insn->rex_prefix.nbytes = 1;
 133                        insn->next_byte++;
 134                        if (X86_REX_W(b))
 135                                /* REX.W overrides opnd_size */
 136                                insn->opnd_bytes = 8;
 137                }
 138        }
 139        insn->rex_prefix.got = 1;
 140
 141        /* Decode VEX prefix */
 142        b = peek_next(insn_byte_t, insn);
 143        attr = inat_get_opcode_attribute(b);
 144        if (inat_is_vex_prefix(attr)) {
 145                insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
 146                if (!insn->x86_64) {
 147                        /*
 148                         * In 32-bits mode, if the [7:6] bits (mod bits of
 149                         * ModRM) on the second byte are not 11b, it is
 150                         * LDS or LES.
 151                         */
 152                        if (X86_MODRM_MOD(b2) != 3)
 153                                goto vex_end;
 154                }
 155                insn->vex_prefix.bytes[0] = b;
 156                insn->vex_prefix.bytes[1] = b2;
 157                if (inat_is_vex3_prefix(attr)) {
 158                        b2 = peek_nbyte_next(insn_byte_t, insn, 2);
 159                        insn->vex_prefix.bytes[2] = b2;
 160                        insn->vex_prefix.nbytes = 3;
 161                        insn->next_byte += 3;
 162                        if (insn->x86_64 && X86_VEX_W(b2))
 163                                /* VEX.W overrides opnd_size */
 164                                insn->opnd_bytes = 8;
 165                } else {
 166                        insn->vex_prefix.nbytes = 2;
 167                        insn->next_byte += 2;
 168                }
 169        }
 170vex_end:
 171        insn->vex_prefix.got = 1;
 172
 173        prefixes->got = 1;
 174
 175err_out:
 176        return;
 177}
 178
 179/**
 180 * insn_get_opcode - collect opcode(s)
 181 * @insn:       &struct insn containing instruction
 182 *
 183 * Populates @insn->opcode, updates @insn->next_byte to point past the
 184 * opcode byte(s), and set @insn->attr (except for groups).
 185 * If necessary, first collects any preceding (prefix) bytes.
 186 * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
 187 * is already 1.
 188 */
 189void insn_get_opcode(struct insn *insn)
 190{
 191        struct insn_field *opcode = &insn->opcode;
 192        insn_byte_t op;
 193        int pfx_id;
 194        if (opcode->got)
 195                return;
 196        if (!insn->prefixes.got)
 197                insn_get_prefixes(insn);
 198
 199        /* Get first opcode */
 200        op = get_next(insn_byte_t, insn);
 201        opcode->bytes[0] = op;
 202        opcode->nbytes = 1;
 203
 204        /* Check if there is VEX prefix or not */
 205        if (insn_is_avx(insn)) {
 206                insn_byte_t m, p;
 207                m = insn_vex_m_bits(insn);
 208                p = insn_vex_p_bits(insn);
 209                insn->attr = inat_get_avx_attribute(op, m, p);
 210                if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
 211                        insn->attr = 0; /* This instruction is bad */
 212                goto end;       /* VEX has only 1 byte for opcode */
 213        }
 214
 215        insn->attr = inat_get_opcode_attribute(op);
 216        while (inat_is_escape(insn->attr)) {
 217                /* Get escaped opcode */
 218                op = get_next(insn_byte_t, insn);
 219                opcode->bytes[opcode->nbytes++] = op;
 220                pfx_id = insn_last_prefix_id(insn);
 221                insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
 222        }
 223        if (inat_must_vex(insn->attr))
 224                insn->attr = 0; /* This instruction is bad */
 225end:
 226        opcode->got = 1;
 227
 228err_out:
 229        return;
 230}
 231
 232/**
 233 * insn_get_modrm - collect ModRM byte, if any
 234 * @insn:       &struct insn containing instruction
 235 *
 236 * Populates @insn->modrm and updates @insn->next_byte to point past the
 237 * ModRM byte, if any.  If necessary, first collects the preceding bytes
 238 * (prefixes and opcode(s)).  No effect if @insn->modrm.got is already 1.
 239 */
 240void insn_get_modrm(struct insn *insn)
 241{
 242        struct insn_field *modrm = &insn->modrm;
 243        insn_byte_t pfx_id, mod;
 244        if (modrm->got)
 245                return;
 246        if (!insn->opcode.got)
 247                insn_get_opcode(insn);
 248
 249        if (inat_has_modrm(insn->attr)) {
 250                mod = get_next(insn_byte_t, insn);
 251                modrm->value = mod;
 252                modrm->nbytes = 1;
 253                if (inat_is_group(insn->attr)) {
 254                        pfx_id = insn_last_prefix_id(insn);
 255                        insn->attr = inat_get_group_attribute(mod, pfx_id,
 256                                                              insn->attr);
 257                        if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
 258                                insn->attr = 0; /* This is bad */
 259                }
 260        }
 261
 262        if (insn->x86_64 && inat_is_force64(insn->attr))
 263                insn->opnd_bytes = 8;
 264        modrm->got = 1;
 265
 266err_out:
 267        return;
 268}
 269
 270
 271/**
 272 * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
 273 * @insn:       &struct insn containing instruction
 274 *
 275 * If necessary, first collects the instruction up to and including the
 276 * ModRM byte.  No effect if @insn->x86_64 is 0.
 277 */
 278int insn_rip_relative(struct insn *insn)
 279{
 280        struct insn_field *modrm = &insn->modrm;
 281
 282        if (!insn->x86_64)
 283                return 0;
 284        if (!modrm->got)
 285                insn_get_modrm(insn);
 286        /*
 287         * For rip-relative instructions, the mod field (top 2 bits)
 288         * is zero and the r/m field (bottom 3 bits) is 0x5.
 289         */
 290        return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
 291}
 292
 293/**
 294 * insn_get_sib() - Get the SIB byte of instruction
 295 * @insn:       &struct insn containing instruction
 296 *
 297 * If necessary, first collects the instruction up to and including the
 298 * ModRM byte.
 299 */
 300void insn_get_sib(struct insn *insn)
 301{
 302        insn_byte_t modrm;
 303
 304        if (insn->sib.got)
 305                return;
 306        if (!insn->modrm.got)
 307                insn_get_modrm(insn);
 308        if (insn->modrm.nbytes) {
 309                modrm = (insn_byte_t)insn->modrm.value;
 310                if (insn->addr_bytes != 2 &&
 311                    X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
 312                        insn->sib.value = get_next(insn_byte_t, insn);
 313                        insn->sib.nbytes = 1;
 314                }
 315        }
 316        insn->sib.got = 1;
 317
 318err_out:
 319        return;
 320}
 321
 322
 323/**
 324 * insn_get_displacement() - Get the displacement of instruction
 325 * @insn:       &struct insn containing instruction
 326 *
 327 * If necessary, first collects the instruction up to and including the
 328 * SIB byte.
 329 * Displacement value is sign-expanded.
 330 */
 331void insn_get_displacement(struct insn *insn)
 332{
 333        insn_byte_t mod, rm, base;
 334
 335        if (insn->displacement.got)
 336                return;
 337        if (!insn->sib.got)
 338                insn_get_sib(insn);
 339        if (insn->modrm.nbytes) {
 340                /*
 341                 * Interpreting the modrm byte:
 342                 * mod = 00 - no displacement fields (exceptions below)
 343                 * mod = 01 - 1-byte displacement field
 344                 * mod = 10 - displacement field is 4 bytes, or 2 bytes if
 345                 *      address size = 2 (0x67 prefix in 32-bit mode)
 346                 * mod = 11 - no memory operand
 347                 *
 348                 * If address size = 2...
 349                 * mod = 00, r/m = 110 - displacement field is 2 bytes
 350                 *
 351                 * If address size != 2...
 352                 * mod != 11, r/m = 100 - SIB byte exists
 353                 * mod = 00, SIB base = 101 - displacement field is 4 bytes
 354                 * mod = 00, r/m = 101 - rip-relative addressing, displacement
 355                 *      field is 4 bytes
 356                 */
 357                mod = X86_MODRM_MOD(insn->modrm.value);
 358                rm = X86_MODRM_RM(insn->modrm.value);
 359                base = X86_SIB_BASE(insn->sib.value);
 360                if (mod == 3)
 361                        goto out;
 362                if (mod == 1) {
 363                        insn->displacement.value = get_next(char, insn);
 364                        insn->displacement.nbytes = 1;
 365                } else if (insn->addr_bytes == 2) {
 366                        if ((mod == 0 && rm == 6) || mod == 2) {
 367                                insn->displacement.value =
 368                                         get_next(short, insn);
 369                                insn->displacement.nbytes = 2;
 370                        }
 371                } else {
 372                        if ((mod == 0 && rm == 5) || mod == 2 ||
 373                            (mod == 0 && base == 5)) {
 374                                insn->displacement.value = get_next(int, insn);
 375                                insn->displacement.nbytes = 4;
 376                        }
 377                }
 378        }
 379out:
 380        insn->displacement.got = 1;
 381
 382err_out:
 383        return;
 384}
 385
 386/* Decode moffset16/32/64. Return 0 if failed */
 387static int __get_moffset(struct insn *insn)
 388{
 389        switch (insn->addr_bytes) {
 390        case 2:
 391                insn->moffset1.value = get_next(short, insn);
 392                insn->moffset1.nbytes = 2;
 393                break;
 394        case 4:
 395                insn->moffset1.value = get_next(int, insn);
 396                insn->moffset1.nbytes = 4;
 397                break;
 398        case 8:
 399                insn->moffset1.value = get_next(int, insn);
 400                insn->moffset1.nbytes = 4;
 401                insn->moffset2.value = get_next(int, insn);
 402                insn->moffset2.nbytes = 4;
 403                break;
 404        default:        /* opnd_bytes must be modified manually */
 405                goto err_out;
 406        }
 407        insn->moffset1.got = insn->moffset2.got = 1;
 408
 409        return 1;
 410
 411err_out:
 412        return 0;
 413}
 414
 415/* Decode imm v32(Iz). Return 0 if failed */
 416static int __get_immv32(struct insn *insn)
 417{
 418        switch (insn->opnd_bytes) {
 419        case 2:
 420                insn->immediate.value = get_next(short, insn);
 421                insn->immediate.nbytes = 2;
 422                break;
 423        case 4:
 424        case 8:
 425                insn->immediate.value = get_next(int, insn);
 426                insn->immediate.nbytes = 4;
 427                break;
 428        default:        /* opnd_bytes must be modified manually */
 429                goto err_out;
 430        }
 431
 432        return 1;
 433
 434err_out:
 435        return 0;
 436}
 437
 438/* Decode imm v64(Iv/Ov), Return 0 if failed */
 439static int __get_immv(struct insn *insn)
 440{
 441        switch (insn->opnd_bytes) {
 442        case 2:
 443                insn->immediate1.value = get_next(short, insn);
 444                insn->immediate1.nbytes = 2;
 445                break;
 446        case 4:
 447                insn->immediate1.value = get_next(int, insn);
 448                insn->immediate1.nbytes = 4;
 449                break;
 450        case 8:
 451                insn->immediate1.value = get_next(int, insn);
 452                insn->immediate1.nbytes = 4;
 453                insn->immediate2.value = get_next(int, insn);
 454                insn->immediate2.nbytes = 4;
 455                break;
 456        default:        /* opnd_bytes must be modified manually */
 457                goto err_out;
 458        }
 459        insn->immediate1.got = insn->immediate2.got = 1;
 460
 461        return 1;
 462err_out:
 463        return 0;
 464}
 465
 466/* Decode ptr16:16/32(Ap) */
 467static int __get_immptr(struct insn *insn)
 468{
 469        switch (insn->opnd_bytes) {
 470        case 2:
 471                insn->immediate1.value = get_next(short, insn);
 472                insn->immediate1.nbytes = 2;
 473                break;
 474        case 4:
 475                insn->immediate1.value = get_next(int, insn);
 476                insn->immediate1.nbytes = 4;
 477                break;
 478        case 8:
 479                /* ptr16:64 is not exist (no segment) */
 480                return 0;
 481        default:        /* opnd_bytes must be modified manually */
 482                goto err_out;
 483        }
 484        insn->immediate2.value = get_next(unsigned short, insn);
 485        insn->immediate2.nbytes = 2;
 486        insn->immediate1.got = insn->immediate2.got = 1;
 487
 488        return 1;
 489err_out:
 490        return 0;
 491}
 492
 493/**
 494 * insn_get_immediate() - Get the immediates of instruction
 495 * @insn:       &struct insn containing instruction
 496 *
 497 * If necessary, first collects the instruction up to and including the
 498 * displacement bytes.
 499 * Basically, most of immediates are sign-expanded. Unsigned-value can be
 500 * get by bit masking with ((1 << (nbytes * 8)) - 1)
 501 */
 502void insn_get_immediate(struct insn *insn)
 503{
 504        if (insn->immediate.got)
 505                return;
 506        if (!insn->displacement.got)
 507                insn_get_displacement(insn);
 508
 509        if (inat_has_moffset(insn->attr)) {
 510                if (!__get_moffset(insn))
 511                        goto err_out;
 512                goto done;
 513        }
 514
 515        if (!inat_has_immediate(insn->attr))
 516                /* no immediates */
 517                goto done;
 518
 519        switch (inat_immediate_size(insn->attr)) {
 520        case INAT_IMM_BYTE:
 521                insn->immediate.value = get_next(char, insn);
 522                insn->immediate.nbytes = 1;
 523                break;
 524        case INAT_IMM_WORD:
 525                insn->immediate.value = get_next(short, insn);
 526                insn->immediate.nbytes = 2;
 527                break;
 528        case INAT_IMM_DWORD:
 529                insn->immediate.value = get_next(int, insn);
 530                insn->immediate.nbytes = 4;
 531                break;
 532        case INAT_IMM_QWORD:
 533                insn->immediate1.value = get_next(int, insn);
 534                insn->immediate1.nbytes = 4;
 535                insn->immediate2.value = get_next(int, insn);
 536                insn->immediate2.nbytes = 4;
 537                break;
 538        case INAT_IMM_PTR:
 539                if (!__get_immptr(insn))
 540                        goto err_out;
 541                break;
 542        case INAT_IMM_VWORD32:
 543                if (!__get_immv32(insn))
 544                        goto err_out;
 545                break;
 546        case INAT_IMM_VWORD:
 547                if (!__get_immv(insn))
 548                        goto err_out;
 549                break;
 550        default:
 551                /* Here, insn must have an immediate, but failed */
 552                goto err_out;
 553        }
 554        if (inat_has_second_immediate(insn->attr)) {
 555                insn->immediate2.value = get_next(char, insn);
 556                insn->immediate2.nbytes = 1;
 557        }
 558done:
 559        insn->immediate.got = 1;
 560
 561err_out:
 562        return;
 563}
 564
 565/**
 566 * insn_get_length() - Get the length of instruction
 567 * @insn:       &struct insn containing instruction
 568 *
 569 * If necessary, first collects the instruction up to and including the
 570 * immediates bytes.
 571 */
 572void insn_get_length(struct insn *insn)
 573{
 574        if (insn->length)
 575                return;
 576        if (!insn->immediate.got)
 577                insn_get_immediate(insn);
 578        insn->length = (unsigned char)((unsigned long)insn->next_byte
 579                                     - (unsigned long)insn->kaddr);
 580}
 581