qemu/linux-user/mmap.c
<<
>>
Prefs
   1/*
   2 *  mmap support for qemu
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 *  This program is free software; you can redistribute it and/or modify
   7 *  it under the terms of the GNU General Public License as published by
   8 *  the Free Software Foundation; either version 2 of the License, or
   9 *  (at your option) any later version.
  10 *
  11 *  This program is distributed in the hope that it will be useful,
  12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 *  GNU General Public License for more details.
  15 *
  16 *  You should have received a copy of the GNU General Public License
  17 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20#include "trace.h"
  21#include "exec/log.h"
  22#include "qemu.h"
  23#include "user-internals.h"
  24#include "user-mmap.h"
  25#include "target_mman.h"
  26
  27static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
  28static __thread int mmap_lock_count;
  29
  30void mmap_lock(void)
  31{
  32    if (mmap_lock_count++ == 0) {
  33        pthread_mutex_lock(&mmap_mutex);
  34    }
  35}
  36
  37void mmap_unlock(void)
  38{
  39    if (--mmap_lock_count == 0) {
  40        pthread_mutex_unlock(&mmap_mutex);
  41    }
  42}
  43
  44bool have_mmap_lock(void)
  45{
  46    return mmap_lock_count > 0 ? true : false;
  47}
  48
  49/* Grab lock to make sure things are in a consistent state after fork().  */
  50void mmap_fork_start(void)
  51{
  52    if (mmap_lock_count)
  53        abort();
  54    pthread_mutex_lock(&mmap_mutex);
  55}
  56
  57void mmap_fork_end(int child)
  58{
  59    if (child)
  60        pthread_mutex_init(&mmap_mutex, NULL);
  61    else
  62        pthread_mutex_unlock(&mmap_mutex);
  63}
  64
  65/*
  66 * Validate target prot bitmask.
  67 * Return the prot bitmask for the host in *HOST_PROT.
  68 * Return 0 if the target prot bitmask is invalid, otherwise
  69 * the internal qemu page_flags (which will include PAGE_VALID).
  70 */
  71static int validate_prot_to_pageflags(int *host_prot, int prot)
  72{
  73    int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
  74    int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
  75
  76    /*
  77     * For the host, we need not pass anything except read/write/exec.
  78     * While PROT_SEM is allowed by all hosts, it is also ignored, so
  79     * don't bother transforming guest bit to host bit.  Any other
  80     * target-specific prot bits will not be understood by the host
  81     * and will need to be encoded into page_flags for qemu emulation.
  82     *
  83     * Pages that are executable by the guest will never be executed
  84     * by the host, but the host will need to be able to read them.
  85     */
  86    *host_prot = (prot & (PROT_READ | PROT_WRITE))
  87               | (prot & PROT_EXEC ? PROT_READ : 0);
  88
  89#ifdef TARGET_AARCH64
  90    {
  91        ARMCPU *cpu = ARM_CPU(thread_cpu);
  92
  93        /*
  94         * The PROT_BTI bit is only accepted if the cpu supports the feature.
  95         * Since this is the unusual case, don't bother checking unless
  96         * the bit has been requested.  If set and valid, record the bit
  97         * within QEMU's page_flags.
  98         */
  99        if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
 100            valid |= TARGET_PROT_BTI;
 101            page_flags |= PAGE_BTI;
 102        }
 103        /* Similarly for the PROT_MTE bit. */
 104        if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
 105            valid |= TARGET_PROT_MTE;
 106            page_flags |= PAGE_MTE;
 107        }
 108    }
 109#elif defined(TARGET_HPPA)
 110    valid |= PROT_GROWSDOWN | PROT_GROWSUP;
 111#endif
 112
 113    return prot & ~valid ? 0 : page_flags;
 114}
 115
 116/* NOTE: all the constants are the HOST ones, but addresses are target. */
 117int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
 118{
 119    abi_ulong end, host_start, host_end, addr;
 120    int prot1, ret, page_flags, host_prot;
 121
 122    trace_target_mprotect(start, len, target_prot);
 123
 124    if ((start & ~TARGET_PAGE_MASK) != 0) {
 125        return -TARGET_EINVAL;
 126    }
 127    page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
 128    if (!page_flags) {
 129        return -TARGET_EINVAL;
 130    }
 131    len = TARGET_PAGE_ALIGN(len);
 132    end = start + len;
 133    if (!guest_range_valid_untagged(start, len)) {
 134        return -TARGET_ENOMEM;
 135    }
 136    if (len == 0) {
 137        return 0;
 138    }
 139
 140    mmap_lock();
 141    host_start = start & qemu_host_page_mask;
 142    host_end = HOST_PAGE_ALIGN(end);
 143    if (start > host_start) {
 144        /* handle host page containing start */
 145        prot1 = host_prot;
 146        for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
 147            prot1 |= page_get_flags(addr);
 148        }
 149        if (host_end == host_start + qemu_host_page_size) {
 150            for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
 151                prot1 |= page_get_flags(addr);
 152            }
 153            end = host_end;
 154        }
 155        ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
 156                       prot1 & PAGE_BITS);
 157        if (ret != 0) {
 158            goto error;
 159        }
 160        host_start += qemu_host_page_size;
 161    }
 162    if (end < host_end) {
 163        prot1 = host_prot;
 164        for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
 165            prot1 |= page_get_flags(addr);
 166        }
 167        ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
 168                       qemu_host_page_size, prot1 & PAGE_BITS);
 169        if (ret != 0) {
 170            goto error;
 171        }
 172        host_end -= qemu_host_page_size;
 173    }
 174
 175    /* handle the pages in the middle */
 176    if (host_start < host_end) {
 177        ret = mprotect(g2h_untagged(host_start),
 178                       host_end - host_start, host_prot);
 179        if (ret != 0) {
 180            goto error;
 181        }
 182    }
 183
 184    page_set_flags(start, start + len - 1, page_flags);
 185    ret = 0;
 186
 187error:
 188    mmap_unlock();
 189    return ret;
 190}
 191
 192/* map an incomplete host page */
 193static int mmap_frag(abi_ulong real_start,
 194                     abi_ulong start, abi_ulong end,
 195                     int prot, int flags, int fd, abi_ulong offset)
 196{
 197    abi_ulong real_end, addr;
 198    void *host_start;
 199    int prot1, prot_new;
 200
 201    real_end = real_start + qemu_host_page_size;
 202    host_start = g2h_untagged(real_start);
 203
 204    /* get the protection of the target pages outside the mapping */
 205    prot1 = 0;
 206    for(addr = real_start; addr < real_end; addr++) {
 207        if (addr < start || addr >= end)
 208            prot1 |= page_get_flags(addr);
 209    }
 210
 211    if (prot1 == 0) {
 212        /* no page was there, so we allocate one */
 213        void *p = mmap(host_start, qemu_host_page_size, prot,
 214                       flags | MAP_ANONYMOUS, -1, 0);
 215        if (p == MAP_FAILED)
 216            return -1;
 217        prot1 = prot;
 218    }
 219    prot1 &= PAGE_BITS;
 220
 221    prot_new = prot | prot1;
 222    if (!(flags & MAP_ANONYMOUS)) {
 223        /* msync() won't work here, so we return an error if write is
 224           possible while it is a shared mapping */
 225        if ((flags & MAP_TYPE) == MAP_SHARED &&
 226            (prot & PROT_WRITE))
 227            return -1;
 228
 229        /* adjust protection to be able to read */
 230        if (!(prot1 & PROT_WRITE))
 231            mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
 232
 233        /* read the corresponding file data */
 234        if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
 235            return -1;
 236
 237        /* put final protection */
 238        if (prot_new != (prot1 | PROT_WRITE))
 239            mprotect(host_start, qemu_host_page_size, prot_new);
 240    } else {
 241        if (prot_new != prot1) {
 242            mprotect(host_start, qemu_host_page_size, prot_new);
 243        }
 244        if (prot_new & PROT_WRITE) {
 245            memset(g2h_untagged(start), 0, end - start);
 246        }
 247    }
 248    return 0;
 249}
 250
 251#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
 252#ifdef TARGET_AARCH64
 253# define TASK_UNMAPPED_BASE  0x5500000000
 254#else
 255# define TASK_UNMAPPED_BASE  (1ul << 38)
 256#endif
 257#else
 258#ifdef TARGET_HPPA
 259# define TASK_UNMAPPED_BASE  0xfa000000
 260#else
 261# define TASK_UNMAPPED_BASE  0x40000000
 262#endif
 263#endif
 264abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
 265
 266unsigned long last_brk;
 267
 268/* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
 269   of guest address space.  */
 270static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
 271                                        abi_ulong align)
 272{
 273    abi_ulong addr, end_addr, incr = qemu_host_page_size;
 274    int prot;
 275    bool looped = false;
 276
 277    if (size > reserved_va) {
 278        return (abi_ulong)-1;
 279    }
 280
 281    /* Note that start and size have already been aligned by mmap_find_vma. */
 282
 283    end_addr = start + size;
 284    if (start > reserved_va - size) {
 285        /* Start at the top of the address space.  */
 286        end_addr = ((reserved_va + 1 - size) & -align) + size;
 287        looped = true;
 288    }
 289
 290    /* Search downward from END_ADDR, checking to see if a page is in use.  */
 291    addr = end_addr;
 292    while (1) {
 293        addr -= incr;
 294        if (addr > end_addr) {
 295            if (looped) {
 296                /* Failure.  The entire address space has been searched.  */
 297                return (abi_ulong)-1;
 298            }
 299            /* Re-start at the top of the address space.  */
 300            addr = end_addr = ((reserved_va + 1 - size) & -align) + size;
 301            looped = true;
 302        } else {
 303            prot = page_get_flags(addr);
 304            if (prot) {
 305                /* Page in use.  Restart below this page.  */
 306                addr = end_addr = ((addr - size) & -align) + size;
 307            } else if (addr && addr + size == end_addr) {
 308                /* Success!  All pages between ADDR and END_ADDR are free.  */
 309                if (start == mmap_next_start) {
 310                    mmap_next_start = addr;
 311                }
 312                return addr;
 313            }
 314        }
 315    }
 316}
 317
 318/*
 319 * Find and reserve a free memory area of size 'size'. The search
 320 * starts at 'start'.
 321 * It must be called with mmap_lock() held.
 322 * Return -1 if error.
 323 */
 324abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
 325{
 326    void *ptr, *prev;
 327    abi_ulong addr;
 328    int wrapped, repeat;
 329
 330    align = MAX(align, qemu_host_page_size);
 331
 332    /* If 'start' == 0, then a default start address is used. */
 333    if (start == 0) {
 334        start = mmap_next_start;
 335    } else {
 336        start &= qemu_host_page_mask;
 337    }
 338    start = ROUND_UP(start, align);
 339
 340    size = HOST_PAGE_ALIGN(size);
 341
 342    if (reserved_va) {
 343        return mmap_find_vma_reserved(start, size, align);
 344    }
 345
 346    addr = start;
 347    wrapped = repeat = 0;
 348    prev = 0;
 349
 350    for (;; prev = ptr) {
 351        /*
 352         * Reserve needed memory area to avoid a race.
 353         * It should be discarded using:
 354         *  - mmap() with MAP_FIXED flag
 355         *  - mremap() with MREMAP_FIXED flag
 356         *  - shmat() with SHM_REMAP flag
 357         */
 358        ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
 359                   MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
 360
 361        /* ENOMEM, if host address space has no memory */
 362        if (ptr == MAP_FAILED) {
 363            return (abi_ulong)-1;
 364        }
 365
 366        /* Count the number of sequential returns of the same address.
 367           This is used to modify the search algorithm below.  */
 368        repeat = (ptr == prev ? repeat + 1 : 0);
 369
 370        if (h2g_valid(ptr + size - 1)) {
 371            addr = h2g(ptr);
 372
 373            if ((addr & (align - 1)) == 0) {
 374                /* Success.  */
 375                if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
 376                    mmap_next_start = addr + size;
 377                }
 378                return addr;
 379            }
 380
 381            /* The address is not properly aligned for the target.  */
 382            switch (repeat) {
 383            case 0:
 384                /* Assume the result that the kernel gave us is the
 385                   first with enough free space, so start again at the
 386                   next higher target page.  */
 387                addr = ROUND_UP(addr, align);
 388                break;
 389            case 1:
 390                /* Sometimes the kernel decides to perform the allocation
 391                   at the top end of memory instead.  */
 392                addr &= -align;
 393                break;
 394            case 2:
 395                /* Start over at low memory.  */
 396                addr = 0;
 397                break;
 398            default:
 399                /* Fail.  This unaligned block must the last.  */
 400                addr = -1;
 401                break;
 402            }
 403        } else {
 404            /* Since the result the kernel gave didn't fit, start
 405               again at low memory.  If any repetition, fail.  */
 406            addr = (repeat ? -1 : 0);
 407        }
 408
 409        /* Unmap and try again.  */
 410        munmap(ptr, size);
 411
 412        /* ENOMEM if we checked the whole of the target address space.  */
 413        if (addr == (abi_ulong)-1) {
 414            return (abi_ulong)-1;
 415        } else if (addr == 0) {
 416            if (wrapped) {
 417                return (abi_ulong)-1;
 418            }
 419            wrapped = 1;
 420            /* Don't actually use 0 when wrapping, instead indicate
 421               that we'd truly like an allocation in low memory.  */
 422            addr = (mmap_min_addr > TARGET_PAGE_SIZE
 423                     ? TARGET_PAGE_ALIGN(mmap_min_addr)
 424                     : TARGET_PAGE_SIZE);
 425        } else if (wrapped && addr >= start) {
 426            return (abi_ulong)-1;
 427        }
 428    }
 429}
 430
 431/* NOTE: all the constants are the HOST ones */
 432abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
 433                     int flags, int fd, abi_ulong offset)
 434{
 435    abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len,
 436              passthrough_start = -1, passthrough_end = -1;
 437    int page_flags, host_prot;
 438
 439    mmap_lock();
 440    trace_target_mmap(start, len, target_prot, flags, fd, offset);
 441
 442    if (!len) {
 443        errno = EINVAL;
 444        goto fail;
 445    }
 446
 447    page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
 448    if (!page_flags) {
 449        errno = EINVAL;
 450        goto fail;
 451    }
 452
 453    /* Also check for overflows... */
 454    len = TARGET_PAGE_ALIGN(len);
 455    if (!len) {
 456        errno = ENOMEM;
 457        goto fail;
 458    }
 459
 460    if (offset & ~TARGET_PAGE_MASK) {
 461        errno = EINVAL;
 462        goto fail;
 463    }
 464
 465    /*
 466     * If we're mapping shared memory, ensure we generate code for parallel
 467     * execution and flush old translations.  This will work up to the level
 468     * supported by the host -- anything that requires EXCP_ATOMIC will not
 469     * be atomic with respect to an external process.
 470     */
 471    if (flags & MAP_SHARED) {
 472        CPUState *cpu = thread_cpu;
 473        if (!(cpu->tcg_cflags & CF_PARALLEL)) {
 474            cpu->tcg_cflags |= CF_PARALLEL;
 475            tb_flush(cpu);
 476        }
 477    }
 478
 479    real_start = start & qemu_host_page_mask;
 480    host_offset = offset & qemu_host_page_mask;
 481
 482    /* If the user is asking for the kernel to find a location, do that
 483       before we truncate the length for mapping files below.  */
 484    if (!(flags & MAP_FIXED)) {
 485        host_len = len + offset - host_offset;
 486        host_len = HOST_PAGE_ALIGN(host_len);
 487        start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
 488        if (start == (abi_ulong)-1) {
 489            errno = ENOMEM;
 490            goto fail;
 491        }
 492    }
 493
 494    /* When mapping files into a memory area larger than the file, accesses
 495       to pages beyond the file size will cause a SIGBUS. 
 496
 497       For example, if mmaping a file of 100 bytes on a host with 4K pages
 498       emulating a target with 8K pages, the target expects to be able to
 499       access the first 8K. But the host will trap us on any access beyond
 500       4K.  
 501
 502       When emulating a target with a larger page-size than the hosts, we
 503       may need to truncate file maps at EOF and add extra anonymous pages
 504       up to the targets page boundary.  */
 505
 506    if ((qemu_real_host_page_size() < qemu_host_page_size) &&
 507        !(flags & MAP_ANONYMOUS)) {
 508        struct stat sb;
 509
 510       if (fstat (fd, &sb) == -1)
 511           goto fail;
 512
 513       /* Are we trying to create a map beyond EOF?.  */
 514       if (offset + len > sb.st_size) {
 515           /* If so, truncate the file map at eof aligned with 
 516              the hosts real pagesize. Additional anonymous maps
 517              will be created beyond EOF.  */
 518           len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
 519       }
 520    }
 521
 522    if (!(flags & MAP_FIXED)) {
 523        unsigned long host_start;
 524        void *p;
 525
 526        host_len = len + offset - host_offset;
 527        host_len = HOST_PAGE_ALIGN(host_len);
 528
 529        /* Note: we prefer to control the mapping address. It is
 530           especially important if qemu_host_page_size >
 531           qemu_real_host_page_size */
 532        p = mmap(g2h_untagged(start), host_len, host_prot,
 533                 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
 534        if (p == MAP_FAILED) {
 535            goto fail;
 536        }
 537        /* update start so that it points to the file position at 'offset' */
 538        host_start = (unsigned long)p;
 539        if (!(flags & MAP_ANONYMOUS)) {
 540            p = mmap(g2h_untagged(start), len, host_prot,
 541                     flags | MAP_FIXED, fd, host_offset);
 542            if (p == MAP_FAILED) {
 543                munmap(g2h_untagged(start), host_len);
 544                goto fail;
 545            }
 546            host_start += offset - host_offset;
 547        }
 548        start = h2g(host_start);
 549        passthrough_start = start;
 550        passthrough_end = start + len;
 551    } else {
 552        if (start & ~TARGET_PAGE_MASK) {
 553            errno = EINVAL;
 554            goto fail;
 555        }
 556        end = start + len;
 557        real_end = HOST_PAGE_ALIGN(end);
 558
 559        /*
 560         * Test if requested memory area fits target address space
 561         * It can fail only on 64-bit host with 32-bit target.
 562         * On any other target/host host mmap() handles this error correctly.
 563         */
 564        if (end < start || !guest_range_valid_untagged(start, len)) {
 565            errno = ENOMEM;
 566            goto fail;
 567        }
 568
 569        /* worst case: we cannot map the file because the offset is not
 570           aligned, so we read it */
 571        if (!(flags & MAP_ANONYMOUS) &&
 572            (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
 573            /* msync() won't work here, so we return an error if write is
 574               possible while it is a shared mapping */
 575            if ((flags & MAP_TYPE) == MAP_SHARED &&
 576                (host_prot & PROT_WRITE)) {
 577                errno = EINVAL;
 578                goto fail;
 579            }
 580            retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
 581                                  MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
 582                                  -1, 0);
 583            if (retaddr == -1)
 584                goto fail;
 585            if (pread(fd, g2h_untagged(start), len, offset) == -1)
 586                goto fail;
 587            if (!(host_prot & PROT_WRITE)) {
 588                ret = target_mprotect(start, len, target_prot);
 589                assert(ret == 0);
 590            }
 591            goto the_end;
 592        }
 593        
 594        /* handle the start of the mapping */
 595        if (start > real_start) {
 596            if (real_end == real_start + qemu_host_page_size) {
 597                /* one single host page */
 598                ret = mmap_frag(real_start, start, end,
 599                                host_prot, flags, fd, offset);
 600                if (ret == -1)
 601                    goto fail;
 602                goto the_end1;
 603            }
 604            ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
 605                            host_prot, flags, fd, offset);
 606            if (ret == -1)
 607                goto fail;
 608            real_start += qemu_host_page_size;
 609        }
 610        /* handle the end of the mapping */
 611        if (end < real_end) {
 612            ret = mmap_frag(real_end - qemu_host_page_size,
 613                            real_end - qemu_host_page_size, end,
 614                            host_prot, flags, fd,
 615                            offset + real_end - qemu_host_page_size - start);
 616            if (ret == -1)
 617                goto fail;
 618            real_end -= qemu_host_page_size;
 619        }
 620
 621        /* map the middle (easier) */
 622        if (real_start < real_end) {
 623            void *p;
 624            unsigned long offset1;
 625            if (flags & MAP_ANONYMOUS)
 626                offset1 = 0;
 627            else
 628                offset1 = offset + real_start - start;
 629            p = mmap(g2h_untagged(real_start), real_end - real_start,
 630                     host_prot, flags, fd, offset1);
 631            if (p == MAP_FAILED)
 632                goto fail;
 633            passthrough_start = real_start;
 634            passthrough_end = real_end;
 635        }
 636    }
 637 the_end1:
 638    if (flags & MAP_ANONYMOUS) {
 639        page_flags |= PAGE_ANON;
 640    }
 641    page_flags |= PAGE_RESET;
 642    if (passthrough_start == passthrough_end) {
 643        page_set_flags(start, start + len - 1, page_flags);
 644    } else {
 645        if (start < passthrough_start) {
 646            page_set_flags(start, passthrough_start - 1, page_flags);
 647        }
 648        page_set_flags(passthrough_start, passthrough_end - 1,
 649                       page_flags | PAGE_PASSTHROUGH);
 650        if (passthrough_end < start + len) {
 651            page_set_flags(passthrough_end, start + len - 1, page_flags);
 652        }
 653    }
 654 the_end:
 655    trace_target_mmap_complete(start);
 656    if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
 657        FILE *f = qemu_log_trylock();
 658        if (f) {
 659            fprintf(f, "page layout changed following mmap\n");
 660            page_dump(f);
 661            qemu_log_unlock(f);
 662        }
 663    }
 664    mmap_unlock();
 665    return start;
 666fail:
 667    mmap_unlock();
 668    return -1;
 669}
 670
 671static void mmap_reserve(abi_ulong start, abi_ulong size)
 672{
 673    abi_ulong real_start;
 674    abi_ulong real_end;
 675    abi_ulong addr;
 676    abi_ulong end;
 677    int prot;
 678
 679    real_start = start & qemu_host_page_mask;
 680    real_end = HOST_PAGE_ALIGN(start + size);
 681    end = start + size;
 682    if (start > real_start) {
 683        /* handle host page containing start */
 684        prot = 0;
 685        for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
 686            prot |= page_get_flags(addr);
 687        }
 688        if (real_end == real_start + qemu_host_page_size) {
 689            for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
 690                prot |= page_get_flags(addr);
 691            }
 692            end = real_end;
 693        }
 694        if (prot != 0)
 695            real_start += qemu_host_page_size;
 696    }
 697    if (end < real_end) {
 698        prot = 0;
 699        for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
 700            prot |= page_get_flags(addr);
 701        }
 702        if (prot != 0)
 703            real_end -= qemu_host_page_size;
 704    }
 705    if (real_start != real_end) {
 706        mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
 707                 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
 708                 -1, 0);
 709    }
 710}
 711
 712int target_munmap(abi_ulong start, abi_ulong len)
 713{
 714    abi_ulong end, real_start, real_end, addr;
 715    int prot, ret;
 716
 717    trace_target_munmap(start, len);
 718
 719    if (start & ~TARGET_PAGE_MASK)
 720        return -TARGET_EINVAL;
 721    len = TARGET_PAGE_ALIGN(len);
 722    if (len == 0 || !guest_range_valid_untagged(start, len)) {
 723        return -TARGET_EINVAL;
 724    }
 725
 726    mmap_lock();
 727    end = start + len;
 728    real_start = start & qemu_host_page_mask;
 729    real_end = HOST_PAGE_ALIGN(end);
 730
 731    if (start > real_start) {
 732        /* handle host page containing start */
 733        prot = 0;
 734        for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
 735            prot |= page_get_flags(addr);
 736        }
 737        if (real_end == real_start + qemu_host_page_size) {
 738            for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
 739                prot |= page_get_flags(addr);
 740            }
 741            end = real_end;
 742        }
 743        if (prot != 0)
 744            real_start += qemu_host_page_size;
 745    }
 746    if (end < real_end) {
 747        prot = 0;
 748        for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
 749            prot |= page_get_flags(addr);
 750        }
 751        if (prot != 0)
 752            real_end -= qemu_host_page_size;
 753    }
 754
 755    ret = 0;
 756    /* unmap what we can */
 757    if (real_start < real_end) {
 758        if (reserved_va) {
 759            mmap_reserve(real_start, real_end - real_start);
 760        } else {
 761            ret = munmap(g2h_untagged(real_start), real_end - real_start);
 762        }
 763    }
 764
 765    if (ret == 0) {
 766        page_set_flags(start, start + len - 1, 0);
 767    }
 768    mmap_unlock();
 769    return ret;
 770}
 771
 772abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
 773                       abi_ulong new_size, unsigned long flags,
 774                       abi_ulong new_addr)
 775{
 776    int prot;
 777    void *host_addr;
 778
 779    if (!guest_range_valid_untagged(old_addr, old_size) ||
 780        ((flags & MREMAP_FIXED) &&
 781         !guest_range_valid_untagged(new_addr, new_size)) ||
 782        ((flags & MREMAP_MAYMOVE) == 0 &&
 783         !guest_range_valid_untagged(old_addr, new_size))) {
 784        errno = ENOMEM;
 785        return -1;
 786    }
 787
 788    mmap_lock();
 789
 790    if (flags & MREMAP_FIXED) {
 791        host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
 792                           flags, g2h_untagged(new_addr));
 793
 794        if (reserved_va && host_addr != MAP_FAILED) {
 795            /* If new and old addresses overlap then the above mremap will
 796               already have failed with EINVAL.  */
 797            mmap_reserve(old_addr, old_size);
 798        }
 799    } else if (flags & MREMAP_MAYMOVE) {
 800        abi_ulong mmap_start;
 801
 802        mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
 803
 804        if (mmap_start == -1) {
 805            errno = ENOMEM;
 806            host_addr = MAP_FAILED;
 807        } else {
 808            host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
 809                               flags | MREMAP_FIXED,
 810                               g2h_untagged(mmap_start));
 811            if (reserved_va) {
 812                mmap_reserve(old_addr, old_size);
 813            }
 814        }
 815    } else {
 816        int prot = 0;
 817        if (reserved_va && old_size < new_size) {
 818            abi_ulong addr;
 819            for (addr = old_addr + old_size;
 820                 addr < old_addr + new_size;
 821                 addr++) {
 822                prot |= page_get_flags(addr);
 823            }
 824        }
 825        if (prot == 0) {
 826            host_addr = mremap(g2h_untagged(old_addr),
 827                               old_size, new_size, flags);
 828
 829            if (host_addr != MAP_FAILED) {
 830                /* Check if address fits target address space */
 831                if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
 832                    /* Revert mremap() changes */
 833                    host_addr = mremap(g2h_untagged(old_addr),
 834                                       new_size, old_size, flags);
 835                    errno = ENOMEM;
 836                    host_addr = MAP_FAILED;
 837                } else if (reserved_va && old_size > new_size) {
 838                    mmap_reserve(old_addr + old_size, old_size - new_size);
 839                }
 840            }
 841        } else {
 842            errno = ENOMEM;
 843            host_addr = MAP_FAILED;
 844        }
 845    }
 846
 847    if (host_addr == MAP_FAILED) {
 848        new_addr = -1;
 849    } else {
 850        new_addr = h2g(host_addr);
 851        prot = page_get_flags(old_addr);
 852        page_set_flags(old_addr, old_addr + old_size - 1, 0);
 853        page_set_flags(new_addr, new_addr + new_size - 1,
 854                       prot | PAGE_VALID | PAGE_RESET);
 855    }
 856    mmap_unlock();
 857    return new_addr;
 858}
 859
 860static bool can_passthrough_madvise(abi_ulong start, abi_ulong end)
 861{
 862    ulong addr;
 863
 864    if ((start | end) & ~qemu_host_page_mask) {
 865        return false;
 866    }
 867
 868    for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
 869        if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) {
 870            return false;
 871        }
 872    }
 873
 874    return true;
 875}
 876
 877abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
 878{
 879    abi_ulong len, end;
 880    int ret = 0;
 881
 882    if (start & ~TARGET_PAGE_MASK) {
 883        return -TARGET_EINVAL;
 884    }
 885    len = TARGET_PAGE_ALIGN(len_in);
 886
 887    if (len_in && !len) {
 888        return -TARGET_EINVAL;
 889    }
 890
 891    end = start + len;
 892    if (end < start) {
 893        return -TARGET_EINVAL;
 894    }
 895
 896    if (end == start) {
 897        return 0;
 898    }
 899
 900    if (!guest_range_valid_untagged(start, len)) {
 901        return -TARGET_EINVAL;
 902    }
 903
 904    /* Translate for some architectures which have different MADV_xxx values */
 905    switch (advice) {
 906    case TARGET_MADV_DONTNEED:      /* alpha */
 907        advice = MADV_DONTNEED;
 908        break;
 909    case TARGET_MADV_WIPEONFORK:    /* parisc */
 910        advice = MADV_WIPEONFORK;
 911        break;
 912    case TARGET_MADV_KEEPONFORK:    /* parisc */
 913        advice = MADV_KEEPONFORK;
 914        break;
 915    /* we do not care about the other MADV_xxx values yet */
 916    }
 917
 918    /*
 919     * Most advice values are hints, so ignoring and returning success is ok.
 920     *
 921     * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
 922     * MADV_KEEPONFORK are not hints and need to be emulated.
 923     *
 924     * A straight passthrough for those may not be safe because qemu sometimes
 925     * turns private file-backed mappings into anonymous mappings.
 926     * can_passthrough_madvise() helps to check if a passthrough is possible by
 927     * comparing mappings that are known to have the same semantics in the host
 928     * and the guest. In this case passthrough is safe.
 929     *
 930     * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
 931     * return failure if not.
 932     *
 933     * MADV_DONTNEED is passed through as well, if possible.
 934     * If passthrough isn't possible, we nevertheless (wrongly!) return
 935     * success, which is broken but some userspace programs fail to work
 936     * otherwise. Completely implementing such emulation is quite complicated
 937     * though.
 938     */
 939    mmap_lock();
 940    switch (advice) {
 941    case MADV_WIPEONFORK:
 942    case MADV_KEEPONFORK:
 943        ret = -EINVAL;
 944        /* fall through */
 945    case MADV_DONTNEED:
 946        if (can_passthrough_madvise(start, end)) {
 947            ret = get_errno(madvise(g2h_untagged(start), len, advice));
 948            if ((advice == MADV_DONTNEED) && (ret == 0)) {
 949                page_reset_target_data(start, start + len - 1);
 950            }
 951        }
 952    }
 953    mmap_unlock();
 954
 955    return ret;
 956}
 957