LXR linux/kernel/sched/membarrier.c

   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
   4 *
   5 * membarrier system call
   6 */
   7#include "sched.h"
   8
   9/*
  10 * For documentation purposes, here are some membarrier ordering
  11 * scenarios to keep in mind:
  12 *
  13 * A) Userspace thread execution after IPI vs membarrier's memory
  14 *    barrier before sending the IPI
  15 *
  16 * Userspace variables:
  17 *
  18 * int x = 0, y = 0;
  19 *
  20 * The memory barrier at the start of membarrier() on CPU0 is necessary in
  21 * order to enforce the guarantee that any writes occurring on CPU0 before
  22 * the membarrier() is executed will be visible to any code executing on
  23 * CPU1 after the IPI-induced memory barrier:
  24 *
  25 *         CPU0                              CPU1
  26 *
  27 *         x = 1
  28 *         membarrier():
  29 *           a: smp_mb()
  30 *           b: send IPI                       IPI-induced mb
  31 *           c: smp_mb()
  32 *         r2 = y
  33 *                                           y = 1
  34 *                                           barrier()
  35 *                                           r1 = x
  36 *
  37 *                     BUG_ON(r1 == 0 && r2 == 0)
  38 *
  39 * The write to y and load from x by CPU1 are unordered by the hardware,
  40 * so it's possible to have "r1 = x" reordered before "y = 1" at any
  41 * point after (b).  If the memory barrier at (a) is omitted, then "x = 1"
  42 * can be reordered after (a) (although not after (c)), so we get r1 == 0
  43 * and r2 == 0.  This violates the guarantee that membarrier() is
  44 * supposed by provide.
  45 *
  46 * The timing of the memory barrier at (a) has to ensure that it executes
  47 * before the IPI-induced memory barrier on CPU1.
  48 *
  49 * B) Userspace thread execution before IPI vs membarrier's memory
  50 *    barrier after completing the IPI
  51 *
  52 * Userspace variables:
  53 *
  54 * int x = 0, y = 0;
  55 *
  56 * The memory barrier at the end of membarrier() on CPU0 is necessary in
  57 * order to enforce the guarantee that any writes occurring on CPU1 before
  58 * the membarrier() is executed will be visible to any code executing on
  59 * CPU0 after the membarrier():
  60 *
  61 *         CPU0                              CPU1
  62 *
  63 *                                           x = 1
  64 *                                           barrier()
  65 *                                           y = 1
  66 *         r2 = y
  67 *         membarrier():
  68 *           a: smp_mb()
  69 *           b: send IPI                       IPI-induced mb
  70 *           c: smp_mb()
  71 *         r1 = x
  72 *         BUG_ON(r1 == 0 && r2 == 1)
  73 *
  74 * The writes to x and y are unordered by the hardware, so it's possible to
  75 * have "r2 = 1" even though the write to x doesn't execute until (b).  If
  76 * the memory barrier at (c) is omitted then "r1 = x" can be reordered
  77 * before (b) (although not before (a)), so we get "r1 = 0".  This violates
  78 * the guarantee that membarrier() is supposed to provide.
  79 *
  80 * The timing of the memory barrier at (c) has to ensure that it executes
  81 * after the IPI-induced memory barrier on CPU1.
  82 *
  83 * C) Scheduling userspace thread -> kthread -> userspace thread vs membarrier
  84 *
  85 *           CPU0                            CPU1
  86 *
  87 *           membarrier():
  88 *           a: smp_mb()
  89 *                                           d: switch to kthread (includes mb)
  90 *           b: read rq->curr->mm == NULL
  91 *                                           e: switch to user (includes mb)
  92 *           c: smp_mb()
  93 *
  94 * Using the scenario from (A), we can show that (a) needs to be paired
  95 * with (e). Using the scenario from (B), we can show that (c) needs to
  96 * be paired with (d).
  97 *
  98 * D) exit_mm vs membarrier
  99 *
 100 * Two thread groups are created, A and B.  Thread group B is created by
 101 * issuing clone from group A with flag CLONE_VM set, but not CLONE_THREAD.
 102 * Let's assume we have a single thread within each thread group (Thread A
 103 * and Thread B).  Thread A runs on CPU0, Thread B runs on CPU1.
 104 *
 105 *           CPU0                            CPU1
 106 *
 107 *           membarrier():
 108 *             a: smp_mb()
 109 *                                           exit_mm():
 110 *                                             d: smp_mb()
 111 *                                             e: current->mm = NULL
 112 *             b: read rq->curr->mm == NULL
 113 *             c: smp_mb()
 114 *
 115 * Using scenario (B), we can show that (c) needs to be paired with (d).
 116 *
 117 * E) kthread_{use,unuse}_mm vs membarrier
 118 *
 119 *           CPU0                            CPU1
 120 *
 121 *           membarrier():
 122 *           a: smp_mb()
 123 *                                           kthread_unuse_mm()
 124 *                                             d: smp_mb()
 125 *                                             e: current->mm = NULL
 126 *           b: read rq->curr->mm == NULL
 127 *                                           kthread_use_mm()
 128 *                                             f: current->mm = mm
 129 *                                             g: smp_mb()
 130 *           c: smp_mb()
 131 *
 132 * Using the scenario from (A), we can show that (a) needs to be paired
 133 * with (g). Using the scenario from (B), we can show that (c) needs to
 134 * be paired with (d).
 135 */
 136
 137/*
 138 * Bitmask made from a "or" of all commands within enum membarrier_cmd,
 139 * except MEMBARRIER_CMD_QUERY.
 140 */
 141#ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE
 142#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK                  \
 143        (MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE                     \
 144        | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
 145#else
 146#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK  0
 147#endif
 148
 149#ifdef CONFIG_RSEQ
 150#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK           \
 151        (MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ                  \
 152        | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ_BITMASK)
 153#else
 154#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK   0
 155#endif
 156
 157#define MEMBARRIER_CMD_BITMASK                                          \
 158        (MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED        \
 159        | MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED                      \
 160        | MEMBARRIER_CMD_PRIVATE_EXPEDITED                              \
 161        | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED                     \
 162        | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
 163
 164static void ipi_mb(void *info)
 165{
 166        smp_mb();       /* IPIs should be serializing but paranoid. */
 167}
 168
 169static void ipi_sync_core(void *info)
 170{
 171        /*
 172         * The smp_mb() in membarrier after all the IPIs is supposed to
 173         * ensure that memory on remote CPUs that occur before the IPI
 174         * become visible to membarrier()'s caller -- see scenario B in
 175         * the big comment at the top of this file.
 176         *
 177         * A sync_core() would provide this guarantee, but
 178         * sync_core_before_usermode() might end up being deferred until
 179         * after membarrier()'s smp_mb().
 180         */
 181        smp_mb();       /* IPIs should be serializing but paranoid. */
 182
 183        sync_core_before_usermode();
 184}
 185
 186static void ipi_rseq(void *info)
 187{
 188        /*
 189         * Ensure that all stores done by the calling thread are visible
 190         * to the current task before the current task resumes.  We could
 191         * probably optimize this away on most architectures, but by the
 192         * time we've already sent an IPI, the cost of the extra smp_mb()
 193         * is negligible.
 194         */
 195        smp_mb();
 196        rseq_preempt(current);
 197}
 198
 199static void ipi_sync_rq_state(void *info)
 200{
 201        struct mm_struct *mm = (struct mm_struct *) info;
 202
 203        if (current->mm != mm)
 204                return;
 205        this_cpu_write(runqueues.membarrier_state,
 206                       atomic_read(&mm->membarrier_state));
 207        /*
 208         * Issue a memory barrier after setting
 209         * MEMBARRIER_STATE_GLOBAL_EXPEDITED in the current runqueue to
 210         * guarantee that no memory access following registration is reordered
 211         * before registration.
 212         */
 213        smp_mb();
 214}
 215
 216void membarrier_exec_mmap(struct mm_struct *mm)
 217{
 218        /*
 219         * Issue a memory barrier before clearing membarrier_state to
 220         * guarantee that no memory access prior to exec is reordered after
 221         * clearing this state.
 222         */
 223        smp_mb();
 224        atomic_set(&mm->membarrier_state, 0);
 225        /*
 226         * Keep the runqueue membarrier_state in sync with this mm
 227         * membarrier_state.
 228         */
 229        this_cpu_write(runqueues.membarrier_state, 0);
 230}
 231
 232void membarrier_update_current_mm(struct mm_struct *next_mm)
 233{
 234        struct rq *rq = this_rq();
 235        int membarrier_state = 0;
 236
 237        if (next_mm)
 238                membarrier_state = atomic_read(&next_mm->membarrier_state);
 239        if (READ_ONCE(rq->membarrier_state) == membarrier_state)
 240                return;
 241        WRITE_ONCE(rq->membarrier_state, membarrier_state);
 242}
 243
 244static int membarrier_global_expedited(void)
 245{
 246        int cpu;
 247        cpumask_var_t tmpmask;
 248
 249        if (num_online_cpus() == 1)
 250                return 0;
 251
 252        /*
 253         * Matches memory barriers around rq->curr modification in
 254         * scheduler.
 255         */
 256        smp_mb();       /* system call entry is not a mb. */
 257
 258        if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
 259                return -ENOMEM;
 260
 261        cpus_read_lock();
 262        rcu_read_lock();
 263        for_each_online_cpu(cpu) {
 264                struct task_struct *p;
 265
 266                /*
 267                 * Skipping the current CPU is OK even through we can be
 268                 * migrated at any point. The current CPU, at the point
 269                 * where we read raw_smp_processor_id(), is ensured to
 270                 * be in program order with respect to the caller
 271                 * thread. Therefore, we can skip this CPU from the
 272                 * iteration.
 273                 */
 274                if (cpu == raw_smp_processor_id())
 275                        continue;
 276
 277                if (!(READ_ONCE(cpu_rq(cpu)->membarrier_state) &
 278                    MEMBARRIER_STATE_GLOBAL_EXPEDITED))
 279                        continue;
 280
 281                /*
 282                 * Skip the CPU if it runs a kernel thread which is not using
 283                 * a task mm.
 284                 */
 285                p = rcu_dereference(cpu_rq(cpu)->curr);
 286                if (!p->mm)
 287                        continue;
 288
 289                __cpumask_set_cpu(cpu, tmpmask);
 290        }
 291        rcu_read_unlock();
 292
 293        preempt_disable();
 294        smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
 295        preempt_enable();
 296
 297        free_cpumask_var(tmpmask);
 298        cpus_read_unlock();
 299
 300        /*
 301         * Memory barrier on the caller thread _after_ we finished
 302         * waiting for the last IPI. Matches memory barriers around
 303         * rq->curr modification in scheduler.
 304         */
 305        smp_mb();       /* exit from system call is not a mb */
 306        return 0;
 307}
 308
 309static int membarrier_private_expedited(int flags, int cpu_id)
 310{
 311        cpumask_var_t tmpmask;
 312        struct mm_struct *mm = current->mm;
 313        smp_call_func_t ipi_func = ipi_mb;
 314
 315        if (flags == MEMBARRIER_FLAG_SYNC_CORE) {
 316                if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
 317                        return -EINVAL;
 318                if (!(atomic_read(&mm->membarrier_state) &
 319                      MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
 320                        return -EPERM;
 321                ipi_func = ipi_sync_core;
 322        } else if (flags == MEMBARRIER_FLAG_RSEQ) {
 323                if (!IS_ENABLED(CONFIG_RSEQ))
 324                        return -EINVAL;
 325                if (!(atomic_read(&mm->membarrier_state) &
 326                      MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY))
 327                        return -EPERM;
 328                ipi_func = ipi_rseq;
 329        } else {
 330                WARN_ON_ONCE(flags);
 331                if (!(atomic_read(&mm->membarrier_state) &
 332                      MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
 333                        return -EPERM;
 334        }
 335
 336        if (flags != MEMBARRIER_FLAG_SYNC_CORE &&
 337            (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1))
 338                return 0;
 339
 340        /*
 341         * Matches memory barriers around rq->curr modification in
 342         * scheduler.
 343         */
 344        smp_mb();       /* system call entry is not a mb. */
 345
 346        if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
 347                return -ENOMEM;
 348
 349        cpus_read_lock();
 350
 351        if (cpu_id >= 0) {
 352                struct task_struct *p;
 353
 354                if (cpu_id >= nr_cpu_ids || !cpu_online(cpu_id))
 355                        goto out;
 356                rcu_read_lock();
 357                p = rcu_dereference(cpu_rq(cpu_id)->curr);
 358                if (!p || p->mm != mm) {
 359                        rcu_read_unlock();
 360                        goto out;
 361                }
 362                rcu_read_unlock();
 363        } else {
 364                int cpu;
 365
 366                rcu_read_lock();
 367                for_each_online_cpu(cpu) {
 368                        struct task_struct *p;
 369
 370                        p = rcu_dereference(cpu_rq(cpu)->curr);
 371                        if (p && p->mm == mm)
 372                                __cpumask_set_cpu(cpu, tmpmask);
 373                }
 374                rcu_read_unlock();
 375        }
 376
 377        if (cpu_id >= 0) {
 378                /*
 379                 * smp_call_function_single() will call ipi_func() if cpu_id
 380                 * is the calling CPU.
 381                 */
 382                smp_call_function_single(cpu_id, ipi_func, NULL, 1);
 383        } else {
 384                /*
 385                 * For regular membarrier, we can save a few cycles by
 386                 * skipping the current cpu -- we're about to do smp_mb()
 387                 * below, and if we migrate to a different cpu, this cpu
 388                 * and the new cpu will execute a full barrier in the
 389                 * scheduler.
 390                 *
 391                 * For SYNC_CORE, we do need a barrier on the current cpu --
 392                 * otherwise, if we are migrated and replaced by a different
 393                 * task in the same mm just before, during, or after
 394                 * membarrier, we will end up with some thread in the mm
 395                 * running without a core sync.
 396                 *
 397                 * For RSEQ, don't rseq_preempt() the caller.  User code
 398                 * is not supposed to issue syscalls at all from inside an
 399                 * rseq critical section.
 400                 */
 401                if (flags != MEMBARRIER_FLAG_SYNC_CORE) {
 402                        preempt_disable();
 403                        smp_call_function_many(tmpmask, ipi_func, NULL, true);
 404                        preempt_enable();
 405                } else {
 406                        on_each_cpu_mask(tmpmask, ipi_func, NULL, true);
 407                }
 408        }
 409
 410out:
 411        if (cpu_id < 0)
 412                free_cpumask_var(tmpmask);
 413        cpus_read_unlock();
 414
 415        /*
 416         * Memory barrier on the caller thread _after_ we finished
 417         * waiting for the last IPI. Matches memory barriers around
 418         * rq->curr modification in scheduler.
 419         */
 420        smp_mb();       /* exit from system call is not a mb */
 421
 422        return 0;
 423}
 424
 425static int sync_runqueues_membarrier_state(struct mm_struct *mm)
 426{
 427        int membarrier_state = atomic_read(&mm->membarrier_state);
 428        cpumask_var_t tmpmask;
 429        int cpu;
 430
 431        if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1) {
 432                this_cpu_write(runqueues.membarrier_state, membarrier_state);
 433
 434                /*
 435                 * For single mm user, we can simply issue a memory barrier
 436                 * after setting MEMBARRIER_STATE_GLOBAL_EXPEDITED in the
 437                 * mm and in the current runqueue to guarantee that no memory
 438                 * access following registration is reordered before
 439                 * registration.
 440                 */
 441                smp_mb();
 442                return 0;
 443        }
 444
 445        if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
 446                return -ENOMEM;
 447
 448        /*
 449         * For mm with multiple users, we need to ensure all future
 450         * scheduler executions will observe @mm's new membarrier
 451         * state.
 452         */
 453        synchronize_rcu();
 454
 455        /*
 456         * For each cpu runqueue, if the task's mm match @mm, ensure that all
 457         * @mm's membarrier state set bits are also set in the runqueue's
 458         * membarrier state. This ensures that a runqueue scheduling
 459         * between threads which are users of @mm has its membarrier state
 460         * updated.
 461         */
 462        cpus_read_lock();
 463        rcu_read_lock();
 464        for_each_online_cpu(cpu) {
 465                struct rq *rq = cpu_rq(cpu);
 466                struct task_struct *p;
 467
 468                p = rcu_dereference(rq->curr);
 469                if (p && p->mm == mm)
 470                        __cpumask_set_cpu(cpu, tmpmask);
 471        }
 472        rcu_read_unlock();
 473
 474        on_each_cpu_mask(tmpmask, ipi_sync_rq_state, mm, true);
 475
 476        free_cpumask_var(tmpmask);
 477        cpus_read_unlock();
 478
 479        return 0;
 480}
 481
 482static int membarrier_register_global_expedited(void)
 483{
 484        struct task_struct *p = current;
 485        struct mm_struct *mm = p->mm;
 486        int ret;
 487
 488        if (atomic_read(&mm->membarrier_state) &
 489            MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY)
 490                return 0;
 491        atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state);
 492        ret = sync_runqueues_membarrier_state(mm);
 493        if (ret)
 494                return ret;
 495        atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
 496                  &mm->membarrier_state);
 497
 498        return 0;
 499}
 500
 501static int membarrier_register_private_expedited(int flags)
 502{
 503        struct task_struct *p = current;
 504        struct mm_struct *mm = p->mm;
 505        int ready_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
 506            set_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED,
 507            ret;
 508
 509        if (flags == MEMBARRIER_FLAG_SYNC_CORE) {
 510                if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
 511                        return -EINVAL;
 512                ready_state =
 513                        MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
 514        } else if (flags == MEMBARRIER_FLAG_RSEQ) {
 515                if (!IS_ENABLED(CONFIG_RSEQ))
 516                        return -EINVAL;
 517                ready_state =
 518                        MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY;
 519        } else {
 520                WARN_ON_ONCE(flags);
 521        }
 522
 523        /*
 524         * We need to consider threads belonging to different thread
 525         * groups, which use the same mm. (CLONE_VM but not
 526         * CLONE_THREAD).
 527         */
 528        if ((atomic_read(&mm->membarrier_state) & ready_state) == ready_state)
 529                return 0;
 530        if (flags & MEMBARRIER_FLAG_SYNC_CORE)
 531                set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE;
 532        if (flags & MEMBARRIER_FLAG_RSEQ)
 533                set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ;
 534        atomic_or(set_state, &mm->membarrier_state);
 535        ret = sync_runqueues_membarrier_state(mm);
 536        if (ret)
 537                return ret;
 538        atomic_or(ready_state, &mm->membarrier_state);
 539
 540        return 0;
 541}
 542
 543/**
 544 * sys_membarrier - issue memory barriers on a set of threads
 545 * @cmd:    Takes command values defined in enum membarrier_cmd.
 546 * @flags:  Currently needs to be 0 for all commands other than
 547 *          MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ: in the latter
 548 *          case it can be MEMBARRIER_CMD_FLAG_CPU, indicating that @cpu_id
 549 *          contains the CPU on which to interrupt (= restart)
 550 *          the RSEQ critical section.
 551 * @cpu_id: if @flags == MEMBARRIER_CMD_FLAG_CPU, indicates the cpu on which
 552 *          RSEQ CS should be interrupted (@cmd must be
 553 *          MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ).
 554 *
 555 * If this system call is not implemented, -ENOSYS is returned. If the
 556 * command specified does not exist, not available on the running
 557 * kernel, or if the command argument is invalid, this system call
 558 * returns -EINVAL. For a given command, with flags argument set to 0,
 559 * if this system call returns -ENOSYS or -EINVAL, it is guaranteed to
 560 * always return the same value until reboot. In addition, it can return
 561 * -ENOMEM if there is not enough memory available to perform the system
 562 * call.
 563 *
 564 * All memory accesses performed in program order from each targeted thread
 565 * is guaranteed to be ordered with respect to sys_membarrier(). If we use
 566 * the semantic "barrier()" to represent a compiler barrier forcing memory
 567 * accesses to be performed in program order across the barrier, and
 568 * smp_mb() to represent explicit memory barriers forcing full memory
 569 * ordering across the barrier, we have the following ordering table for
 570 * each pair of barrier(), sys_membarrier() and smp_mb():
 571 *
 572 * The pair ordering is detailed as (O: ordered, X: not ordered):
 573 *
 574 *                        barrier()   smp_mb() sys_membarrier()
 575 *        barrier()          X           X            O
 576 *        smp_mb()           X           O            O
 577 *        sys_membarrier()   O           O            O
 578 */
 579SYSCALL_DEFINE3(membarrier, int, cmd, unsigned int, flags, int, cpu_id)
 580{
 581        switch (cmd) {
 582        case MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
 583                if (unlikely(flags && flags != MEMBARRIER_CMD_FLAG_CPU))
 584                        return -EINVAL;
 585                break;
 586        default:
 587                if (unlikely(flags))
 588                        return -EINVAL;
 589        }
 590
 591        if (!(flags & MEMBARRIER_CMD_FLAG_CPU))
 592                cpu_id = -1;
 593
 594        switch (cmd) {
 595        case MEMBARRIER_CMD_QUERY:
 596        {
 597                int cmd_mask = MEMBARRIER_CMD_BITMASK;
 598
 599                if (tick_nohz_full_enabled())
 600                        cmd_mask &= ~MEMBARRIER_CMD_GLOBAL;
 601                return cmd_mask;
 602        }
 603        case MEMBARRIER_CMD_GLOBAL:
 604                /* MEMBARRIER_CMD_GLOBAL is not compatible with nohz_full. */
 605                if (tick_nohz_full_enabled())
 606                        return -EINVAL;
 607                if (num_online_cpus() > 1)
 608                        synchronize_rcu();
 609                return 0;
 610        case MEMBARRIER_CMD_GLOBAL_EXPEDITED:
 611                return membarrier_global_expedited();
 612        case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
 613                return membarrier_register_global_expedited();
 614        case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
 615                return membarrier_private_expedited(0, cpu_id);
 616        case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
 617                return membarrier_register_private_expedited(0);
 618        case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
 619                return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE, cpu_id);
 620        case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
 621                return membarrier_register_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
 622        case MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
 623                return membarrier_private_expedited(MEMBARRIER_FLAG_RSEQ, cpu_id);
 624        case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ:
 625                return membarrier_register_private_expedited(MEMBARRIER_FLAG_RSEQ);
 626        default:
 627                return -EINVAL;
 628        }
 629}
 630