linux/tools/arch/x86/lib/insn.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * x86 instruction analysis
   4 *
   5 * Copyright (C) IBM Corporation, 2002, 2004, 2009
   6 */
   7
   8#ifdef __KERNEL__
   9#include <linux/string.h>
  10#else
  11#include <string.h>
  12#endif
  13#include "../include/asm/inat.h"
  14#include "../include/asm/insn.h"
  15
  16#include "../include/asm/emulate_prefix.h"
  17
  18/* Verify next sizeof(t) bytes can be on the same instruction */
  19#define validate_next(t, insn, n)       \
  20        ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
  21
  22#define __get_next(t, insn)     \
  23        ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
  24
  25#define __peek_nbyte_next(t, insn, n)   \
  26        ({ t r = *(t*)((insn)->next_byte + n); r; })
  27
  28#define get_next(t, insn)       \
  29        ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
  30
  31#define peek_nbyte_next(t, insn, n)     \
  32        ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
  33
  34#define peek_next(t, insn)      peek_nbyte_next(t, insn, 0)
  35
  36/**
  37 * insn_init() - initialize struct insn
  38 * @insn:       &struct insn to be initialized
  39 * @kaddr:      address (in kernel memory) of instruction (or copy thereof)
  40 * @x86_64:     !0 for 64-bit kernel or 64-bit app
  41 */
  42void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
  43{
  44        /*
  45         * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
  46         * even if the input buffer is long enough to hold them.
  47         */
  48        if (buf_len > MAX_INSN_SIZE)
  49                buf_len = MAX_INSN_SIZE;
  50
  51        memset(insn, 0, sizeof(*insn));
  52        insn->kaddr = kaddr;
  53        insn->end_kaddr = kaddr + buf_len;
  54        insn->next_byte = kaddr;
  55        insn->x86_64 = x86_64 ? 1 : 0;
  56        insn->opnd_bytes = 4;
  57        if (x86_64)
  58                insn->addr_bytes = 8;
  59        else
  60                insn->addr_bytes = 4;
  61}
  62
  63static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX };
  64static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX };
  65
  66static int __insn_get_emulate_prefix(struct insn *insn,
  67                                     const insn_byte_t *prefix, size_t len)
  68{
  69        size_t i;
  70
  71        for (i = 0; i < len; i++) {
  72                if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i])
  73                        goto err_out;
  74        }
  75
  76        insn->emulate_prefix_size = len;
  77        insn->next_byte += len;
  78
  79        return 1;
  80
  81err_out:
  82        return 0;
  83}
  84
  85static void insn_get_emulate_prefix(struct insn *insn)
  86{
  87        if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix)))
  88                return;
  89
  90        __insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix));
  91}
  92
  93/**
  94 * insn_get_prefixes - scan x86 instruction prefix bytes
  95 * @insn:       &struct insn containing instruction
  96 *
  97 * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
  98 * to point to the (first) opcode.  No effect if @insn->prefixes.got
  99 * is already set.
 100 */
 101void insn_get_prefixes(struct insn *insn)
 102{
 103        struct insn_field *prefixes = &insn->prefixes;
 104        insn_attr_t attr;
 105        insn_byte_t b, lb;
 106        int i, nb;
 107
 108        if (prefixes->got)
 109                return;
 110
 111        insn_get_emulate_prefix(insn);
 112
 113        nb = 0;
 114        lb = 0;
 115        b = peek_next(insn_byte_t, insn);
 116        attr = inat_get_opcode_attribute(b);
 117        while (inat_is_legacy_prefix(attr)) {
 118                /* Skip if same prefix */
 119                for (i = 0; i < nb; i++)
 120                        if (prefixes->bytes[i] == b)
 121                                goto found;
 122                if (nb == 4)
 123                        /* Invalid instruction */
 124                        break;
 125                prefixes->bytes[nb++] = b;
 126                if (inat_is_address_size_prefix(attr)) {
 127                        /* address size switches 2/4 or 4/8 */
 128                        if (insn->x86_64)
 129                                insn->addr_bytes ^= 12;
 130                        else
 131                                insn->addr_bytes ^= 6;
 132                } else if (inat_is_operand_size_prefix(attr)) {
 133                        /* oprand size switches 2/4 */
 134                        insn->opnd_bytes ^= 6;
 135                }
 136found:
 137                prefixes->nbytes++;
 138                insn->next_byte++;
 139                lb = b;
 140                b = peek_next(insn_byte_t, insn);
 141                attr = inat_get_opcode_attribute(b);
 142        }
 143        /* Set the last prefix */
 144        if (lb && lb != insn->prefixes.bytes[3]) {
 145                if (unlikely(insn->prefixes.bytes[3])) {
 146                        /* Swap the last prefix */
 147                        b = insn->prefixes.bytes[3];
 148                        for (i = 0; i < nb; i++)
 149                                if (prefixes->bytes[i] == lb)
 150                                        prefixes->bytes[i] = b;
 151                }
 152                insn->prefixes.bytes[3] = lb;
 153        }
 154
 155        /* Decode REX prefix */
 156        if (insn->x86_64) {
 157                b = peek_next(insn_byte_t, insn);
 158                attr = inat_get_opcode_attribute(b);
 159                if (inat_is_rex_prefix(attr)) {
 160                        insn->rex_prefix.value = b;
 161                        insn->rex_prefix.nbytes = 1;
 162                        insn->next_byte++;
 163                        if (X86_REX_W(b))
 164                                /* REX.W overrides opnd_size */
 165                                insn->opnd_bytes = 8;
 166                }
 167        }
 168        insn->rex_prefix.got = 1;
 169
 170        /* Decode VEX prefix */
 171        b = peek_next(insn_byte_t, insn);
 172        attr = inat_get_opcode_attribute(b);
 173        if (inat_is_vex_prefix(attr)) {
 174                insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
 175                if (!insn->x86_64) {
 176                        /*
 177                         * In 32-bits mode, if the [7:6] bits (mod bits of
 178                         * ModRM) on the second byte are not 11b, it is
 179                         * LDS or LES or BOUND.
 180                         */
 181                        if (X86_MODRM_MOD(b2) != 3)
 182                                goto vex_end;
 183                }
 184                insn->vex_prefix.bytes[0] = b;
 185                insn->vex_prefix.bytes[1] = b2;
 186                if (inat_is_evex_prefix(attr)) {
 187                        b2 = peek_nbyte_next(insn_byte_t, insn, 2);
 188                        insn->vex_prefix.bytes[2] = b2;
 189                        b2 = peek_nbyte_next(insn_byte_t, insn, 3);
 190                        insn->vex_prefix.bytes[3] = b2;
 191                        insn->vex_prefix.nbytes = 4;
 192                        insn->next_byte += 4;
 193                        if (insn->x86_64 && X86_VEX_W(b2))
 194                                /* VEX.W overrides opnd_size */
 195                                insn->opnd_bytes = 8;
 196                } else if (inat_is_vex3_prefix(attr)) {
 197                        b2 = peek_nbyte_next(insn_byte_t, insn, 2);
 198                        insn->vex_prefix.bytes[2] = b2;
 199                        insn->vex_prefix.nbytes = 3;
 200                        insn->next_byte += 3;
 201                        if (insn->x86_64 && X86_VEX_W(b2))
 202                                /* VEX.W overrides opnd_size */
 203                                insn->opnd_bytes = 8;
 204                } else {
 205                        /*
 206                         * For VEX2, fake VEX3-like byte#2.
 207                         * Makes it easier to decode vex.W, vex.vvvv,
 208                         * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
 209                         */
 210                        insn->vex_prefix.bytes[2] = b2 & 0x7f;
 211                        insn->vex_prefix.nbytes = 2;
 212                        insn->next_byte += 2;
 213                }
 214        }
 215vex_end:
 216        insn->vex_prefix.got = 1;
 217
 218        prefixes->got = 1;
 219
 220err_out:
 221        return;
 222}
 223
 224/**
 225 * insn_get_opcode - collect opcode(s)
 226 * @insn:       &struct insn containing instruction
 227 *
 228 * Populates @insn->opcode, updates @insn->next_byte to point past the
 229 * opcode byte(s), and set @insn->attr (except for groups).
 230 * If necessary, first collects any preceding (prefix) bytes.
 231 * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
 232 * is already 1.
 233 */
 234void insn_get_opcode(struct insn *insn)
 235{
 236        struct insn_field *opcode = &insn->opcode;
 237        insn_byte_t op;
 238        int pfx_id;
 239        if (opcode->got)
 240                return;
 241        if (!insn->prefixes.got)
 242                insn_get_prefixes(insn);
 243
 244        /* Get first opcode */
 245        op = get_next(insn_byte_t, insn);
 246        opcode->bytes[0] = op;
 247        opcode->nbytes = 1;
 248
 249        /* Check if there is VEX prefix or not */
 250        if (insn_is_avx(insn)) {
 251                insn_byte_t m, p;
 252                m = insn_vex_m_bits(insn);
 253                p = insn_vex_p_bits(insn);
 254                insn->attr = inat_get_avx_attribute(op, m, p);
 255                if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
 256                    (!inat_accept_vex(insn->attr) &&
 257                     !inat_is_group(insn->attr)))
 258                        insn->attr = 0; /* This instruction is bad */
 259                goto end;       /* VEX has only 1 byte for opcode */
 260        }
 261
 262        insn->attr = inat_get_opcode_attribute(op);
 263        while (inat_is_escape(insn->attr)) {
 264                /* Get escaped opcode */
 265                op = get_next(insn_byte_t, insn);
 266                opcode->bytes[opcode->nbytes++] = op;
 267                pfx_id = insn_last_prefix_id(insn);
 268                insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
 269        }
 270        if (inat_must_vex(insn->attr))
 271                insn->attr = 0; /* This instruction is bad */
 272end:
 273        opcode->got = 1;
 274
 275err_out:
 276        return;
 277}
 278
 279/**
 280 * insn_get_modrm - collect ModRM byte, if any
 281 * @insn:       &struct insn containing instruction
 282 *
 283 * Populates @insn->modrm and updates @insn->next_byte to point past the
 284 * ModRM byte, if any.  If necessary, first collects the preceding bytes
 285 * (prefixes and opcode(s)).  No effect if @insn->modrm.got is already 1.
 286 */
 287void insn_get_modrm(struct insn *insn)
 288{
 289        struct insn_field *modrm = &insn->modrm;
 290        insn_byte_t pfx_id, mod;
 291        if (modrm->got)
 292                return;
 293        if (!insn->opcode.got)
 294                insn_get_opcode(insn);
 295
 296        if (inat_has_modrm(insn->attr)) {
 297                mod = get_next(insn_byte_t, insn);
 298                modrm->value = mod;
 299                modrm->nbytes = 1;
 300                if (inat_is_group(insn->attr)) {
 301                        pfx_id = insn_last_prefix_id(insn);
 302                        insn->attr = inat_get_group_attribute(mod, pfx_id,
 303                                                              insn->attr);
 304                        if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
 305                                insn->attr = 0; /* This is bad */
 306                }
 307        }
 308
 309        if (insn->x86_64 && inat_is_force64(insn->attr))
 310                insn->opnd_bytes = 8;
 311        modrm->got = 1;
 312
 313err_out:
 314        return;
 315}
 316
 317
 318/**
 319 * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
 320 * @insn:       &struct insn containing instruction
 321 *
 322 * If necessary, first collects the instruction up to and including the
 323 * ModRM byte.  No effect if @insn->x86_64 is 0.
 324 */
 325int insn_rip_relative(struct insn *insn)
 326{
 327        struct insn_field *modrm = &insn->modrm;
 328
 329        if (!insn->x86_64)
 330                return 0;
 331        if (!modrm->got)
 332                insn_get_modrm(insn);
 333        /*
 334         * For rip-relative instructions, the mod field (top 2 bits)
 335         * is zero and the r/m field (bottom 3 bits) is 0x5.
 336         */
 337        return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
 338}
 339
 340/**
 341 * insn_get_sib() - Get the SIB byte of instruction
 342 * @insn:       &struct insn containing instruction
 343 *
 344 * If necessary, first collects the instruction up to and including the
 345 * ModRM byte.
 346 */
 347void insn_get_sib(struct insn *insn)
 348{
 349        insn_byte_t modrm;
 350
 351        if (insn->sib.got)
 352                return;
 353        if (!insn->modrm.got)
 354                insn_get_modrm(insn);
 355        if (insn->modrm.nbytes) {
 356                modrm = (insn_byte_t)insn->modrm.value;
 357                if (insn->addr_bytes != 2 &&
 358                    X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
 359                        insn->sib.value = get_next(insn_byte_t, insn);
 360                        insn->sib.nbytes = 1;
 361                }
 362        }
 363        insn->sib.got = 1;
 364
 365err_out:
 366        return;
 367}
 368
 369
 370/**
 371 * insn_get_displacement() - Get the displacement of instruction
 372 * @insn:       &struct insn containing instruction
 373 *
 374 * If necessary, first collects the instruction up to and including the
 375 * SIB byte.
 376 * Displacement value is sign-expanded.
 377 */
 378void insn_get_displacement(struct insn *insn)
 379{
 380        insn_byte_t mod, rm, base;
 381
 382        if (insn->displacement.got)
 383                return;
 384        if (!insn->sib.got)
 385                insn_get_sib(insn);
 386        if (insn->modrm.nbytes) {
 387                /*
 388                 * Interpreting the modrm byte:
 389                 * mod = 00 - no displacement fields (exceptions below)
 390                 * mod = 01 - 1-byte displacement field
 391                 * mod = 10 - displacement field is 4 bytes, or 2 bytes if
 392                 *      address size = 2 (0x67 prefix in 32-bit mode)
 393                 * mod = 11 - no memory operand
 394                 *
 395                 * If address size = 2...
 396                 * mod = 00, r/m = 110 - displacement field is 2 bytes
 397                 *
 398                 * If address size != 2...
 399                 * mod != 11, r/m = 100 - SIB byte exists
 400                 * mod = 00, SIB base = 101 - displacement field is 4 bytes
 401                 * mod = 00, r/m = 101 - rip-relative addressing, displacement
 402                 *      field is 4 bytes
 403                 */
 404                mod = X86_MODRM_MOD(insn->modrm.value);
 405                rm = X86_MODRM_RM(insn->modrm.value);
 406                base = X86_SIB_BASE(insn->sib.value);
 407                if (mod == 3)
 408                        goto out;
 409                if (mod == 1) {
 410                        insn->displacement.value = get_next(signed char, insn);
 411                        insn->displacement.nbytes = 1;
 412                } else if (insn->addr_bytes == 2) {
 413                        if ((mod == 0 && rm == 6) || mod == 2) {
 414                                insn->displacement.value =
 415                                         get_next(short, insn);
 416                                insn->displacement.nbytes = 2;
 417                        }
 418                } else {
 419                        if ((mod == 0 && rm == 5) || mod == 2 ||
 420                            (mod == 0 && base == 5)) {
 421                                insn->displacement.value = get_next(int, insn);
 422                                insn->displacement.nbytes = 4;
 423                        }
 424                }
 425        }
 426out:
 427        insn->displacement.got = 1;
 428
 429err_out:
 430        return;
 431}
 432
 433/* Decode moffset16/32/64. Return 0 if failed */
 434static int __get_moffset(struct insn *insn)
 435{
 436        switch (insn->addr_bytes) {
 437        case 2:
 438                insn->moffset1.value = get_next(short, insn);
 439                insn->moffset1.nbytes = 2;
 440                break;
 441        case 4:
 442                insn->moffset1.value = get_next(int, insn);
 443                insn->moffset1.nbytes = 4;
 444                break;
 445        case 8:
 446                insn->moffset1.value = get_next(int, insn);
 447                insn->moffset1.nbytes = 4;
 448                insn->moffset2.value = get_next(int, insn);
 449                insn->moffset2.nbytes = 4;
 450                break;
 451        default:        /* opnd_bytes must be modified manually */
 452                goto err_out;
 453        }
 454        insn->moffset1.got = insn->moffset2.got = 1;
 455
 456        return 1;
 457
 458err_out:
 459        return 0;
 460}
 461
 462/* Decode imm v32(Iz). Return 0 if failed */
 463static int __get_immv32(struct insn *insn)
 464{
 465        switch (insn->opnd_bytes) {
 466        case 2:
 467                insn->immediate.value = get_next(short, insn);
 468                insn->immediate.nbytes = 2;
 469                break;
 470        case 4:
 471        case 8:
 472                insn->immediate.value = get_next(int, insn);
 473                insn->immediate.nbytes = 4;
 474                break;
 475        default:        /* opnd_bytes must be modified manually */
 476                goto err_out;
 477        }
 478
 479        return 1;
 480
 481err_out:
 482        return 0;
 483}
 484
 485/* Decode imm v64(Iv/Ov), Return 0 if failed */
 486static int __get_immv(struct insn *insn)
 487{
 488        switch (insn->opnd_bytes) {
 489        case 2:
 490                insn->immediate1.value = get_next(short, insn);
 491                insn->immediate1.nbytes = 2;
 492                break;
 493        case 4:
 494                insn->immediate1.value = get_next(int, insn);
 495                insn->immediate1.nbytes = 4;
 496                break;
 497        case 8:
 498                insn->immediate1.value = get_next(int, insn);
 499                insn->immediate1.nbytes = 4;
 500                insn->immediate2.value = get_next(int, insn);
 501                insn->immediate2.nbytes = 4;
 502                break;
 503        default:        /* opnd_bytes must be modified manually */
 504                goto err_out;
 505        }
 506        insn->immediate1.got = insn->immediate2.got = 1;
 507
 508        return 1;
 509err_out:
 510        return 0;
 511}
 512
 513/* Decode ptr16:16/32(Ap) */
 514static int __get_immptr(struct insn *insn)
 515{
 516        switch (insn->opnd_bytes) {
 517        case 2:
 518                insn->immediate1.value = get_next(short, insn);
 519                insn->immediate1.nbytes = 2;
 520                break;
 521        case 4:
 522                insn->immediate1.value = get_next(int, insn);
 523                insn->immediate1.nbytes = 4;
 524                break;
 525        case 8:
 526                /* ptr16:64 is not exist (no segment) */
 527                return 0;
 528        default:        /* opnd_bytes must be modified manually */
 529                goto err_out;
 530        }
 531        insn->immediate2.value = get_next(unsigned short, insn);
 532        insn->immediate2.nbytes = 2;
 533        insn->immediate1.got = insn->immediate2.got = 1;
 534
 535        return 1;
 536err_out:
 537        return 0;
 538}
 539
 540/**
 541 * insn_get_immediate() - Get the immediates of instruction
 542 * @insn:       &struct insn containing instruction
 543 *
 544 * If necessary, first collects the instruction up to and including the
 545 * displacement bytes.
 546 * Basically, most of immediates are sign-expanded. Unsigned-value can be
 547 * get by bit masking with ((1 << (nbytes * 8)) - 1)
 548 */
 549void insn_get_immediate(struct insn *insn)
 550{
 551        if (insn->immediate.got)
 552                return;
 553        if (!insn->displacement.got)
 554                insn_get_displacement(insn);
 555
 556        if (inat_has_moffset(insn->attr)) {
 557                if (!__get_moffset(insn))
 558                        goto err_out;
 559                goto done;
 560        }
 561
 562        if (!inat_has_immediate(insn->attr))
 563                /* no immediates */
 564                goto done;
 565
 566        switch (inat_immediate_size(insn->attr)) {
 567        case INAT_IMM_BYTE:
 568                insn->immediate.value = get_next(signed char, insn);
 569                insn->immediate.nbytes = 1;
 570                break;
 571        case INAT_IMM_WORD:
 572                insn->immediate.value = get_next(short, insn);
 573                insn->immediate.nbytes = 2;
 574                break;
 575        case INAT_IMM_DWORD:
 576                insn->immediate.value = get_next(int, insn);
 577                insn->immediate.nbytes = 4;
 578                break;
 579        case INAT_IMM_QWORD:
 580                insn->immediate1.value = get_next(int, insn);
 581                insn->immediate1.nbytes = 4;
 582                insn->immediate2.value = get_next(int, insn);
 583                insn->immediate2.nbytes = 4;
 584                break;
 585        case INAT_IMM_PTR:
 586                if (!__get_immptr(insn))
 587                        goto err_out;
 588                break;
 589        case INAT_IMM_VWORD32:
 590                if (!__get_immv32(insn))
 591                        goto err_out;
 592                break;
 593        case INAT_IMM_VWORD:
 594                if (!__get_immv(insn))
 595                        goto err_out;
 596                break;
 597        default:
 598                /* Here, insn must have an immediate, but failed */
 599                goto err_out;
 600        }
 601        if (inat_has_second_immediate(insn->attr)) {
 602                insn->immediate2.value = get_next(signed char, insn);
 603                insn->immediate2.nbytes = 1;
 604        }
 605done:
 606        insn->immediate.got = 1;
 607
 608err_out:
 609        return;
 610}
 611
 612/**
 613 * insn_get_length() - Get the length of instruction
 614 * @insn:       &struct insn containing instruction
 615 *
 616 * If necessary, first collects the instruction up to and including the
 617 * immediates bytes.
 618 */
 619void insn_get_length(struct insn *insn)
 620{
 621        if (insn->length)
 622                return;
 623        if (!insn->immediate.got)
 624                insn_get_immediate(insn);
 625        insn->length = (unsigned char)((unsigned long)insn->next_byte
 626                                     - (unsigned long)insn->kaddr);
 627}
 628