qemu/linux-user/mmap.c
<<
>>
Prefs
   1/*
   2 *  mmap support for qemu
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 *  This program is free software; you can redistribute it and/or modify
   7 *  it under the terms of the GNU General Public License as published by
   8 *  the Free Software Foundation; either version 2 of the License, or
   9 *  (at your option) any later version.
  10 *
  11 *  This program is distributed in the hope that it will be useful,
  12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 *  GNU General Public License for more details.
  15 *
  16 *  You should have received a copy of the GNU General Public License
  17 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20#include "trace.h"
  21#include "exec/log.h"
  22#include "qemu.h"
  23#include "user-internals.h"
  24#include "user-mmap.h"
  25#include "target_mman.h"
  26
  27static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
  28static __thread int mmap_lock_count;
  29
  30void mmap_lock(void)
  31{
  32    if (mmap_lock_count++ == 0) {
  33        pthread_mutex_lock(&mmap_mutex);
  34    }
  35}
  36
  37void mmap_unlock(void)
  38{
  39    assert(mmap_lock_count > 0);
  40    if (--mmap_lock_count == 0) {
  41        pthread_mutex_unlock(&mmap_mutex);
  42    }
  43}
  44
  45bool have_mmap_lock(void)
  46{
  47    return mmap_lock_count > 0 ? true : false;
  48}
  49
  50/* Grab lock to make sure things are in a consistent state after fork().  */
  51void mmap_fork_start(void)
  52{
  53    if (mmap_lock_count)
  54        abort();
  55    pthread_mutex_lock(&mmap_mutex);
  56}
  57
  58void mmap_fork_end(int child)
  59{
  60    if (child) {
  61        pthread_mutex_init(&mmap_mutex, NULL);
  62    } else {
  63        pthread_mutex_unlock(&mmap_mutex);
  64    }
  65}
  66
  67/*
  68 * Validate target prot bitmask.
  69 * Return the prot bitmask for the host in *HOST_PROT.
  70 * Return 0 if the target prot bitmask is invalid, otherwise
  71 * the internal qemu page_flags (which will include PAGE_VALID).
  72 */
  73static int validate_prot_to_pageflags(int prot)
  74{
  75    int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
  76    int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
  77
  78#ifdef TARGET_AARCH64
  79    {
  80        ARMCPU *cpu = ARM_CPU(thread_cpu);
  81
  82        /*
  83         * The PROT_BTI bit is only accepted if the cpu supports the feature.
  84         * Since this is the unusual case, don't bother checking unless
  85         * the bit has been requested.  If set and valid, record the bit
  86         * within QEMU's page_flags.
  87         */
  88        if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
  89            valid |= TARGET_PROT_BTI;
  90            page_flags |= PAGE_BTI;
  91        }
  92        /* Similarly for the PROT_MTE bit. */
  93        if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
  94            valid |= TARGET_PROT_MTE;
  95            page_flags |= PAGE_MTE;
  96        }
  97    }
  98#elif defined(TARGET_HPPA)
  99    valid |= PROT_GROWSDOWN | PROT_GROWSUP;
 100#endif
 101
 102    return prot & ~valid ? 0 : page_flags;
 103}
 104
 105/*
 106 * For the host, we need not pass anything except read/write/exec.
 107 * While PROT_SEM is allowed by all hosts, it is also ignored, so
 108 * don't bother transforming guest bit to host bit.  Any other
 109 * target-specific prot bits will not be understood by the host
 110 * and will need to be encoded into page_flags for qemu emulation.
 111 *
 112 * Pages that are executable by the guest will never be executed
 113 * by the host, but the host will need to be able to read them.
 114 */
 115static int target_to_host_prot(int prot)
 116{
 117    return (prot & (PROT_READ | PROT_WRITE)) |
 118           (prot & PROT_EXEC ? PROT_READ : 0);
 119}
 120
 121/* NOTE: all the constants are the HOST ones, but addresses are target. */
 122int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
 123{
 124    abi_ulong starts[3];
 125    abi_ulong lens[3];
 126    int prots[3];
 127    abi_ulong host_start, host_last, last;
 128    int prot1, ret, page_flags, nranges;
 129
 130    trace_target_mprotect(start, len, target_prot);
 131
 132    if ((start & ~TARGET_PAGE_MASK) != 0) {
 133        return -TARGET_EINVAL;
 134    }
 135    page_flags = validate_prot_to_pageflags(target_prot);
 136    if (!page_flags) {
 137        return -TARGET_EINVAL;
 138    }
 139    if (len == 0) {
 140        return 0;
 141    }
 142    len = TARGET_PAGE_ALIGN(len);
 143    if (!guest_range_valid_untagged(start, len)) {
 144        return -TARGET_ENOMEM;
 145    }
 146
 147    last = start + len - 1;
 148    host_start = start & qemu_host_page_mask;
 149    host_last = HOST_PAGE_ALIGN(last) - 1;
 150    nranges = 0;
 151
 152    mmap_lock();
 153
 154    if (host_last - host_start < qemu_host_page_size) {
 155        /* Single host page contains all guest pages: sum the prot. */
 156        prot1 = target_prot;
 157        for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
 158            prot1 |= page_get_flags(a);
 159        }
 160        for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
 161            prot1 |= page_get_flags(a + 1);
 162        }
 163        starts[nranges] = host_start;
 164        lens[nranges] = qemu_host_page_size;
 165        prots[nranges] = prot1;
 166        nranges++;
 167    } else {
 168        if (host_start < start) {
 169            /* Host page contains more than one guest page: sum the prot. */
 170            prot1 = target_prot;
 171            for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
 172                prot1 |= page_get_flags(a);
 173            }
 174            /* If the resulting sum differs, create a new range. */
 175            if (prot1 != target_prot) {
 176                starts[nranges] = host_start;
 177                lens[nranges] = qemu_host_page_size;
 178                prots[nranges] = prot1;
 179                nranges++;
 180                host_start += qemu_host_page_size;
 181            }
 182        }
 183
 184        if (last < host_last) {
 185            /* Host page contains more than one guest page: sum the prot. */
 186            prot1 = target_prot;
 187            for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
 188                prot1 |= page_get_flags(a + 1);
 189            }
 190            /* If the resulting sum differs, create a new range. */
 191            if (prot1 != target_prot) {
 192                host_last -= qemu_host_page_size;
 193                starts[nranges] = host_last + 1;
 194                lens[nranges] = qemu_host_page_size;
 195                prots[nranges] = prot1;
 196                nranges++;
 197            }
 198        }
 199
 200        /* Create a range for the middle, if any remains. */
 201        if (host_start < host_last) {
 202            starts[nranges] = host_start;
 203            lens[nranges] = host_last - host_start + 1;
 204            prots[nranges] = target_prot;
 205            nranges++;
 206        }
 207    }
 208
 209    for (int i = 0; i < nranges; ++i) {
 210        ret = mprotect(g2h_untagged(starts[i]), lens[i],
 211                       target_to_host_prot(prots[i]));
 212        if (ret != 0) {
 213            goto error;
 214        }
 215    }
 216
 217    page_set_flags(start, last, page_flags);
 218    ret = 0;
 219
 220 error:
 221    mmap_unlock();
 222    return ret;
 223}
 224
 225/* map an incomplete host page */
 226static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
 227                      int prot, int flags, int fd, off_t offset)
 228{
 229    abi_ulong real_last;
 230    void *host_start;
 231    int prot_old, prot_new;
 232    int host_prot_old, host_prot_new;
 233
 234    if (!(flags & MAP_ANONYMOUS)
 235        && (flags & MAP_TYPE) == MAP_SHARED
 236        && (prot & PROT_WRITE)) {
 237        /*
 238         * msync() won't work with the partial page, so we return an
 239         * error if write is possible while it is a shared mapping.
 240         */
 241        errno = EINVAL;
 242        return false;
 243    }
 244
 245    real_last = real_start + qemu_host_page_size - 1;
 246    host_start = g2h_untagged(real_start);
 247
 248    /* Get the protection of the target pages outside the mapping. */
 249    prot_old = 0;
 250    for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
 251        prot_old |= page_get_flags(a);
 252    }
 253    for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
 254        prot_old |= page_get_flags(a);
 255    }
 256
 257    if (prot_old == 0) {
 258        /*
 259         * Since !(prot_old & PAGE_VALID), there were no guest pages
 260         * outside of the fragment we need to map.  Allocate a new host
 261         * page to cover, discarding whatever else may have been present.
 262         */
 263        void *p = mmap(host_start, qemu_host_page_size,
 264                       target_to_host_prot(prot),
 265                       flags | MAP_ANONYMOUS, -1, 0);
 266        if (p != host_start) {
 267            if (p != MAP_FAILED) {
 268                munmap(p, qemu_host_page_size);
 269                errno = EEXIST;
 270            }
 271            return false;
 272        }
 273        prot_old = prot;
 274    }
 275    prot_new = prot | prot_old;
 276
 277    host_prot_old = target_to_host_prot(prot_old);
 278    host_prot_new = target_to_host_prot(prot_new);
 279
 280    /* Adjust protection to be able to write. */
 281    if (!(host_prot_old & PROT_WRITE)) {
 282        host_prot_old |= PROT_WRITE;
 283        mprotect(host_start, qemu_host_page_size, host_prot_old);
 284    }
 285
 286    /* Read or zero the new guest pages. */
 287    if (flags & MAP_ANONYMOUS) {
 288        memset(g2h_untagged(start), 0, last - start + 1);
 289    } else {
 290        if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) {
 291            return false;
 292        }
 293    }
 294
 295    /* Put final protection */
 296    if (host_prot_new != host_prot_old) {
 297        mprotect(host_start, qemu_host_page_size, host_prot_new);
 298    }
 299    return true;
 300}
 301
 302abi_ulong task_unmapped_base;
 303abi_ulong elf_et_dyn_base;
 304abi_ulong mmap_next_start;
 305
 306/*
 307 * Subroutine of mmap_find_vma, used when we have pre-allocated
 308 * a chunk of guest address space.
 309 */
 310static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
 311                                        abi_ulong align)
 312{
 313    target_ulong ret;
 314
 315    ret = page_find_range_empty(start, reserved_va, size, align);
 316    if (ret == -1 && start > mmap_min_addr) {
 317        /* Restart at the beginning of the address space. */
 318        ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
 319    }
 320
 321    return ret;
 322}
 323
 324/*
 325 * Find and reserve a free memory area of size 'size'. The search
 326 * starts at 'start'.
 327 * It must be called with mmap_lock() held.
 328 * Return -1 if error.
 329 */
 330abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
 331{
 332    void *ptr, *prev;
 333    abi_ulong addr;
 334    int wrapped, repeat;
 335
 336    align = MAX(align, qemu_host_page_size);
 337
 338    /* If 'start' == 0, then a default start address is used. */
 339    if (start == 0) {
 340        start = mmap_next_start;
 341    } else {
 342        start &= qemu_host_page_mask;
 343    }
 344    start = ROUND_UP(start, align);
 345
 346    size = HOST_PAGE_ALIGN(size);
 347
 348    if (reserved_va) {
 349        return mmap_find_vma_reserved(start, size, align);
 350    }
 351
 352    addr = start;
 353    wrapped = repeat = 0;
 354    prev = 0;
 355
 356    for (;; prev = ptr) {
 357        /*
 358         * Reserve needed memory area to avoid a race.
 359         * It should be discarded using:
 360         *  - mmap() with MAP_FIXED flag
 361         *  - mremap() with MREMAP_FIXED flag
 362         *  - shmat() with SHM_REMAP flag
 363         */
 364        ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
 365                   MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
 366
 367        /* ENOMEM, if host address space has no memory */
 368        if (ptr == MAP_FAILED) {
 369            return (abi_ulong)-1;
 370        }
 371
 372        /*
 373         * Count the number of sequential returns of the same address.
 374         * This is used to modify the search algorithm below.
 375         */
 376        repeat = (ptr == prev ? repeat + 1 : 0);
 377
 378        if (h2g_valid(ptr + size - 1)) {
 379            addr = h2g(ptr);
 380
 381            if ((addr & (align - 1)) == 0) {
 382                /* Success.  */
 383                if (start == mmap_next_start && addr >= task_unmapped_base) {
 384                    mmap_next_start = addr + size;
 385                }
 386                return addr;
 387            }
 388
 389            /* The address is not properly aligned for the target.  */
 390            switch (repeat) {
 391            case 0:
 392                /*
 393                 * Assume the result that the kernel gave us is the
 394                 * first with enough free space, so start again at the
 395                 * next higher target page.
 396                 */
 397                addr = ROUND_UP(addr, align);
 398                break;
 399            case 1:
 400                /*
 401                 * Sometimes the kernel decides to perform the allocation
 402                 * at the top end of memory instead.
 403                 */
 404                addr &= -align;
 405                break;
 406            case 2:
 407                /* Start over at low memory.  */
 408                addr = 0;
 409                break;
 410            default:
 411                /* Fail.  This unaligned block must the last.  */
 412                addr = -1;
 413                break;
 414            }
 415        } else {
 416            /*
 417             * Since the result the kernel gave didn't fit, start
 418             * again at low memory.  If any repetition, fail.
 419             */
 420            addr = (repeat ? -1 : 0);
 421        }
 422
 423        /* Unmap and try again.  */
 424        munmap(ptr, size);
 425
 426        /* ENOMEM if we checked the whole of the target address space.  */
 427        if (addr == (abi_ulong)-1) {
 428            return (abi_ulong)-1;
 429        } else if (addr == 0) {
 430            if (wrapped) {
 431                return (abi_ulong)-1;
 432            }
 433            wrapped = 1;
 434            /*
 435             * Don't actually use 0 when wrapping, instead indicate
 436             * that we'd truly like an allocation in low memory.
 437             */
 438            addr = (mmap_min_addr > TARGET_PAGE_SIZE
 439                     ? TARGET_PAGE_ALIGN(mmap_min_addr)
 440                     : TARGET_PAGE_SIZE);
 441        } else if (wrapped && addr >= start) {
 442            return (abi_ulong)-1;
 443        }
 444    }
 445}
 446
 447/* NOTE: all the constants are the HOST ones */
 448abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
 449                     int flags, int fd, off_t offset)
 450{
 451    abi_ulong ret, last, real_start, real_last, retaddr, host_len;
 452    abi_ulong passthrough_start = -1, passthrough_last = 0;
 453    int page_flags;
 454    off_t host_offset;
 455
 456    mmap_lock();
 457    trace_target_mmap(start, len, target_prot, flags, fd, offset);
 458
 459    if (!len) {
 460        errno = EINVAL;
 461        goto fail;
 462    }
 463
 464    page_flags = validate_prot_to_pageflags(target_prot);
 465    if (!page_flags) {
 466        errno = EINVAL;
 467        goto fail;
 468    }
 469
 470    /* Also check for overflows... */
 471    len = TARGET_PAGE_ALIGN(len);
 472    if (!len) {
 473        errno = ENOMEM;
 474        goto fail;
 475    }
 476
 477    if (offset & ~TARGET_PAGE_MASK) {
 478        errno = EINVAL;
 479        goto fail;
 480    }
 481
 482    /*
 483     * If we're mapping shared memory, ensure we generate code for parallel
 484     * execution and flush old translations.  This will work up to the level
 485     * supported by the host -- anything that requires EXCP_ATOMIC will not
 486     * be atomic with respect to an external process.
 487     */
 488    if (flags & MAP_SHARED) {
 489        CPUState *cpu = thread_cpu;
 490        if (!(cpu->tcg_cflags & CF_PARALLEL)) {
 491            cpu->tcg_cflags |= CF_PARALLEL;
 492            tb_flush(cpu);
 493        }
 494    }
 495
 496    real_start = start & qemu_host_page_mask;
 497    host_offset = offset & qemu_host_page_mask;
 498
 499    /*
 500     * If the user is asking for the kernel to find a location, do that
 501     * before we truncate the length for mapping files below.
 502     */
 503    if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
 504        host_len = len + offset - host_offset;
 505        host_len = HOST_PAGE_ALIGN(host_len);
 506        start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
 507        if (start == (abi_ulong)-1) {
 508            errno = ENOMEM;
 509            goto fail;
 510        }
 511    }
 512
 513    /*
 514     * When mapping files into a memory area larger than the file, accesses
 515     * to pages beyond the file size will cause a SIGBUS.
 516     *
 517     * For example, if mmaping a file of 100 bytes on a host with 4K pages
 518     * emulating a target with 8K pages, the target expects to be able to
 519     * access the first 8K. But the host will trap us on any access beyond
 520     * 4K.
 521     *
 522     * When emulating a target with a larger page-size than the hosts, we
 523     * may need to truncate file maps at EOF and add extra anonymous pages
 524     * up to the targets page boundary.
 525     */
 526    if ((qemu_real_host_page_size() < qemu_host_page_size) &&
 527        !(flags & MAP_ANONYMOUS)) {
 528        struct stat sb;
 529
 530        if (fstat(fd, &sb) == -1) {
 531            goto fail;
 532        }
 533
 534        /* Are we trying to create a map beyond EOF?.  */
 535        if (offset + len > sb.st_size) {
 536            /*
 537             * If so, truncate the file map at eof aligned with
 538             * the hosts real pagesize. Additional anonymous maps
 539             * will be created beyond EOF.
 540             */
 541            len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
 542        }
 543    }
 544
 545    if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
 546        uintptr_t host_start;
 547        int host_prot;
 548        void *p;
 549
 550        host_len = len + offset - host_offset;
 551        host_len = HOST_PAGE_ALIGN(host_len);
 552        host_prot = target_to_host_prot(target_prot);
 553
 554        /*
 555         * Note: we prefer to control the mapping address. It is
 556         * especially important if qemu_host_page_size >
 557         * qemu_real_host_page_size.
 558         */
 559        p = mmap(g2h_untagged(start), host_len, host_prot,
 560                 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
 561        if (p == MAP_FAILED) {
 562            goto fail;
 563        }
 564        /* update start so that it points to the file position at 'offset' */
 565        host_start = (uintptr_t)p;
 566        if (!(flags & MAP_ANONYMOUS)) {
 567            p = mmap(g2h_untagged(start), len, host_prot,
 568                     flags | MAP_FIXED, fd, host_offset);
 569            if (p == MAP_FAILED) {
 570                munmap(g2h_untagged(start), host_len);
 571                goto fail;
 572            }
 573            host_start += offset - host_offset;
 574        }
 575        start = h2g(host_start);
 576        last = start + len - 1;
 577        passthrough_start = start;
 578        passthrough_last = last;
 579    } else {
 580        if (start & ~TARGET_PAGE_MASK) {
 581            errno = EINVAL;
 582            goto fail;
 583        }
 584        last = start + len - 1;
 585        real_last = HOST_PAGE_ALIGN(last) - 1;
 586
 587        /*
 588         * Test if requested memory area fits target address space
 589         * It can fail only on 64-bit host with 32-bit target.
 590         * On any other target/host host mmap() handles this error correctly.
 591         */
 592        if (last < start || !guest_range_valid_untagged(start, len)) {
 593            errno = ENOMEM;
 594            goto fail;
 595        }
 596
 597        if (flags & MAP_FIXED_NOREPLACE) {
 598            /* Validate that the chosen range is empty. */
 599            if (!page_check_range_empty(start, last)) {
 600                errno = EEXIST;
 601                goto fail;
 602            }
 603
 604            /*
 605             * With reserved_va, the entire address space is mmaped in the
 606             * host to ensure it isn't accidentally used for something else.
 607             * We have just checked that the guest address is not mapped
 608             * within the guest, but need to replace the host reservation.
 609             *
 610             * Without reserved_va, despite the guest address check above,
 611             * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite
 612             * any host address mappings.
 613             */
 614            if (reserved_va) {
 615                flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
 616            }
 617        }
 618
 619        /*
 620         * worst case: we cannot map the file because the offset is not
 621         * aligned, so we read it
 622         */
 623        if (!(flags & MAP_ANONYMOUS) &&
 624            (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
 625            /*
 626             * msync() won't work here, so we return an error if write is
 627             * possible while it is a shared mapping
 628             */
 629            if ((flags & MAP_TYPE) == MAP_SHARED
 630                && (target_prot & PROT_WRITE)) {
 631                errno = EINVAL;
 632                goto fail;
 633            }
 634            retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
 635                                  (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))
 636                                  | MAP_PRIVATE | MAP_ANONYMOUS,
 637                                  -1, 0);
 638            if (retaddr == -1) {
 639                goto fail;
 640            }
 641            if (pread(fd, g2h_untagged(start), len, offset) == -1) {
 642                goto fail;
 643            }
 644            if (!(target_prot & PROT_WRITE)) {
 645                ret = target_mprotect(start, len, target_prot);
 646                assert(ret == 0);
 647            }
 648            goto the_end;
 649        }
 650
 651        /* handle the start of the mapping */
 652        if (start > real_start) {
 653            if (real_last == real_start + qemu_host_page_size - 1) {
 654                /* one single host page */
 655                if (!mmap_frag(real_start, start, last,
 656                               target_prot, flags, fd, offset)) {
 657                    goto fail;
 658                }
 659                goto the_end1;
 660            }
 661            if (!mmap_frag(real_start, start,
 662                           real_start + qemu_host_page_size - 1,
 663                           target_prot, flags, fd, offset)) {
 664                goto fail;
 665            }
 666            real_start += qemu_host_page_size;
 667        }
 668        /* handle the end of the mapping */
 669        if (last < real_last) {
 670            abi_ulong real_page = real_last - qemu_host_page_size + 1;
 671            if (!mmap_frag(real_page, real_page, last,
 672                           target_prot, flags, fd,
 673                           offset + real_page - start)) {
 674                goto fail;
 675            }
 676            real_last -= qemu_host_page_size;
 677        }
 678
 679        /* map the middle (easier) */
 680        if (real_start < real_last) {
 681            void *p, *want_p;
 682            off_t offset1;
 683            size_t len1;
 684
 685            if (flags & MAP_ANONYMOUS) {
 686                offset1 = 0;
 687            } else {
 688                offset1 = offset + real_start - start;
 689            }
 690            len1 = real_last - real_start + 1;
 691            want_p = g2h_untagged(real_start);
 692
 693            p = mmap(want_p, len1, target_to_host_prot(target_prot),
 694                     flags, fd, offset1);
 695            if (p != want_p) {
 696                if (p != MAP_FAILED) {
 697                    munmap(p, len1);
 698                    errno = EEXIST;
 699                }
 700                goto fail;
 701            }
 702            passthrough_start = real_start;
 703            passthrough_last = real_last;
 704        }
 705    }
 706 the_end1:
 707    if (flags & MAP_ANONYMOUS) {
 708        page_flags |= PAGE_ANON;
 709    }
 710    page_flags |= PAGE_RESET;
 711    if (passthrough_start > passthrough_last) {
 712        page_set_flags(start, last, page_flags);
 713    } else {
 714        if (start < passthrough_start) {
 715            page_set_flags(start, passthrough_start - 1, page_flags);
 716        }
 717        page_set_flags(passthrough_start, passthrough_last,
 718                       page_flags | PAGE_PASSTHROUGH);
 719        if (passthrough_last < last) {
 720            page_set_flags(passthrough_last + 1, last, page_flags);
 721        }
 722    }
 723 the_end:
 724    trace_target_mmap_complete(start);
 725    if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
 726        FILE *f = qemu_log_trylock();
 727        if (f) {
 728            fprintf(f, "page layout changed following mmap\n");
 729            page_dump(f);
 730            qemu_log_unlock(f);
 731        }
 732    }
 733    mmap_unlock();
 734    return start;
 735fail:
 736    mmap_unlock();
 737    return -1;
 738}
 739
 740static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
 741{
 742    abi_ulong real_start;
 743    abi_ulong real_last;
 744    abi_ulong real_len;
 745    abi_ulong last;
 746    abi_ulong a;
 747    void *host_start;
 748    int prot;
 749
 750    last = start + len - 1;
 751    real_start = start & qemu_host_page_mask;
 752    real_last = HOST_PAGE_ALIGN(last) - 1;
 753
 754    /*
 755     * If guest pages remain on the first or last host pages,
 756     * adjust the deallocation to retain those guest pages.
 757     * The single page special case is required for the last page,
 758     * lest real_start overflow to zero.
 759     */
 760    if (real_last - real_start < qemu_host_page_size) {
 761        prot = 0;
 762        for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
 763            prot |= page_get_flags(a);
 764        }
 765        for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
 766            prot |= page_get_flags(a + 1);
 767        }
 768        if (prot != 0) {
 769            return;
 770        }
 771    } else {
 772        for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
 773            prot |= page_get_flags(a);
 774        }
 775        if (prot != 0) {
 776            real_start += qemu_host_page_size;
 777        }
 778
 779        for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
 780            prot |= page_get_flags(a + 1);
 781        }
 782        if (prot != 0) {
 783            real_last -= qemu_host_page_size;
 784        }
 785
 786        if (real_last < real_start) {
 787            return;
 788        }
 789    }
 790
 791    real_len = real_last - real_start + 1;
 792    host_start = g2h_untagged(real_start);
 793
 794    if (reserved_va) {
 795        void *ptr = mmap(host_start, real_len, PROT_NONE,
 796                         MAP_FIXED | MAP_ANONYMOUS
 797                         | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
 798        assert(ptr == host_start);
 799    } else {
 800        int ret = munmap(host_start, real_len);
 801        assert(ret == 0);
 802    }
 803}
 804
 805int target_munmap(abi_ulong start, abi_ulong len)
 806{
 807    trace_target_munmap(start, len);
 808
 809    if (start & ~TARGET_PAGE_MASK) {
 810        return -TARGET_EINVAL;
 811    }
 812    len = TARGET_PAGE_ALIGN(len);
 813    if (len == 0 || !guest_range_valid_untagged(start, len)) {
 814        return -TARGET_EINVAL;
 815    }
 816
 817    mmap_lock();
 818    mmap_reserve_or_unmap(start, len);
 819    page_set_flags(start, start + len - 1, 0);
 820    mmap_unlock();
 821
 822    return 0;
 823}
 824
 825abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
 826                       abi_ulong new_size, unsigned long flags,
 827                       abi_ulong new_addr)
 828{
 829    int prot;
 830    void *host_addr;
 831
 832    if (!guest_range_valid_untagged(old_addr, old_size) ||
 833        ((flags & MREMAP_FIXED) &&
 834         !guest_range_valid_untagged(new_addr, new_size)) ||
 835        ((flags & MREMAP_MAYMOVE) == 0 &&
 836         !guest_range_valid_untagged(old_addr, new_size))) {
 837        errno = ENOMEM;
 838        return -1;
 839    }
 840
 841    mmap_lock();
 842
 843    if (flags & MREMAP_FIXED) {
 844        host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
 845                           flags, g2h_untagged(new_addr));
 846
 847        if (reserved_va && host_addr != MAP_FAILED) {
 848            /*
 849             * If new and old addresses overlap then the above mremap will
 850             * already have failed with EINVAL.
 851             */
 852            mmap_reserve_or_unmap(old_addr, old_size);
 853        }
 854    } else if (flags & MREMAP_MAYMOVE) {
 855        abi_ulong mmap_start;
 856
 857        mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
 858
 859        if (mmap_start == -1) {
 860            errno = ENOMEM;
 861            host_addr = MAP_FAILED;
 862        } else {
 863            host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
 864                               flags | MREMAP_FIXED,
 865                               g2h_untagged(mmap_start));
 866            if (reserved_va) {
 867                mmap_reserve_or_unmap(old_addr, old_size);
 868            }
 869        }
 870    } else {
 871        int prot = 0;
 872        if (reserved_va && old_size < new_size) {
 873            abi_ulong addr;
 874            for (addr = old_addr + old_size;
 875                 addr < old_addr + new_size;
 876                 addr++) {
 877                prot |= page_get_flags(addr);
 878            }
 879        }
 880        if (prot == 0) {
 881            host_addr = mremap(g2h_untagged(old_addr),
 882                               old_size, new_size, flags);
 883
 884            if (host_addr != MAP_FAILED) {
 885                /* Check if address fits target address space */
 886                if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
 887                    /* Revert mremap() changes */
 888                    host_addr = mremap(g2h_untagged(old_addr),
 889                                       new_size, old_size, flags);
 890                    errno = ENOMEM;
 891                    host_addr = MAP_FAILED;
 892                } else if (reserved_va && old_size > new_size) {
 893                    mmap_reserve_or_unmap(old_addr + old_size,
 894                                          old_size - new_size);
 895                }
 896            }
 897        } else {
 898            errno = ENOMEM;
 899            host_addr = MAP_FAILED;
 900        }
 901    }
 902
 903    if (host_addr == MAP_FAILED) {
 904        new_addr = -1;
 905    } else {
 906        new_addr = h2g(host_addr);
 907        prot = page_get_flags(old_addr);
 908        page_set_flags(old_addr, old_addr + old_size - 1, 0);
 909        page_set_flags(new_addr, new_addr + new_size - 1,
 910                       prot | PAGE_VALID | PAGE_RESET);
 911    }
 912    mmap_unlock();
 913    return new_addr;
 914}
 915
 916abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
 917{
 918    abi_ulong len;
 919    int ret = 0;
 920
 921    if (start & ~TARGET_PAGE_MASK) {
 922        return -TARGET_EINVAL;
 923    }
 924    if (len_in == 0) {
 925        return 0;
 926    }
 927    len = TARGET_PAGE_ALIGN(len_in);
 928    if (len == 0 || !guest_range_valid_untagged(start, len)) {
 929        return -TARGET_EINVAL;
 930    }
 931
 932    /* Translate for some architectures which have different MADV_xxx values */
 933    switch (advice) {
 934    case TARGET_MADV_DONTNEED:      /* alpha */
 935        advice = MADV_DONTNEED;
 936        break;
 937    case TARGET_MADV_WIPEONFORK:    /* parisc */
 938        advice = MADV_WIPEONFORK;
 939        break;
 940    case TARGET_MADV_KEEPONFORK:    /* parisc */
 941        advice = MADV_KEEPONFORK;
 942        break;
 943    /* we do not care about the other MADV_xxx values yet */
 944    }
 945
 946    /*
 947     * Most advice values are hints, so ignoring and returning success is ok.
 948     *
 949     * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
 950     * MADV_KEEPONFORK are not hints and need to be emulated.
 951     *
 952     * A straight passthrough for those may not be safe because qemu sometimes
 953     * turns private file-backed mappings into anonymous mappings.
 954     * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
 955     * same semantics for the host as for the guest.
 956     *
 957     * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
 958     * return failure if not.
 959     *
 960     * MADV_DONTNEED is passed through as well, if possible.
 961     * If passthrough isn't possible, we nevertheless (wrongly!) return
 962     * success, which is broken but some userspace programs fail to work
 963     * otherwise. Completely implementing such emulation is quite complicated
 964     * though.
 965     */
 966    mmap_lock();
 967    switch (advice) {
 968    case MADV_WIPEONFORK:
 969    case MADV_KEEPONFORK:
 970        ret = -EINVAL;
 971        /* fall through */
 972    case MADV_DONTNEED:
 973        if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
 974            ret = get_errno(madvise(g2h_untagged(start), len, advice));
 975            if ((advice == MADV_DONTNEED) && (ret == 0)) {
 976                page_reset_target_data(start, start + len - 1);
 977            }
 978        }
 979    }
 980    mmap_unlock();
 981
 982    return ret;
 983}
 984