linux/arch/x86/hyperv/mmu.c
<<
>>
Prefs
   1#define pr_fmt(fmt)  "Hyper-V: " fmt
   2
   3#include <linux/hyperv.h>
   4#include <linux/log2.h>
   5#include <linux/slab.h>
   6#include <linux/types.h>
   7
   8#include <asm/fpu/api.h>
   9#include <asm/mshyperv.h>
  10#include <asm/msr.h>
  11#include <asm/tlbflush.h>
  12
  13#define CREATE_TRACE_POINTS
  14#include <asm/trace/hyperv.h>
  15
  16/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
  17struct hv_flush_pcpu {
  18        u64 address_space;
  19        u64 flags;
  20        u64 processor_mask;
  21        u64 gva_list[];
  22};
  23
  24/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
  25struct hv_flush_pcpu_ex {
  26        u64 address_space;
  27        u64 flags;
  28        struct {
  29                u64 format;
  30                u64 valid_bank_mask;
  31                u64 bank_contents[];
  32        } hv_vp_set;
  33        u64 gva_list[];
  34};
  35
  36/* Each gva in gva_list encodes up to 4096 pages to flush */
  37#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
  38
  39static struct hv_flush_pcpu __percpu **pcpu_flush;
  40
  41static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex;
  42
  43/*
  44 * Fills in gva_list starting from offset. Returns the number of items added.
  45 */
  46static inline int fill_gva_list(u64 gva_list[], int offset,
  47                                unsigned long start, unsigned long end)
  48{
  49        int gva_n = offset;
  50        unsigned long cur = start, diff;
  51
  52        do {
  53                diff = end > cur ? end - cur : 0;
  54
  55                gva_list[gva_n] = cur & PAGE_MASK;
  56                /*
  57                 * Lower 12 bits encode the number of additional
  58                 * pages to flush (in addition to the 'cur' page).
  59                 */
  60                if (diff >= HV_TLB_FLUSH_UNIT)
  61                        gva_list[gva_n] |= ~PAGE_MASK;
  62                else if (diff)
  63                        gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
  64
  65                cur += HV_TLB_FLUSH_UNIT;
  66                gva_n++;
  67
  68        } while (cur < end);
  69
  70        return gva_n - offset;
  71}
  72
  73/* Return the number of banks in the resulting vp_set */
  74static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
  75                                    const struct cpumask *cpus)
  76{
  77        int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
  78
  79        /* valid_bank_mask can represent up to 64 banks */
  80        if (hv_max_vp_index / 64 >= 64)
  81                return 0;
  82
  83        /*
  84         * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
  85         * structs are not cleared between calls, we risk flushing unneeded
  86         * vCPUs otherwise.
  87         */
  88        for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
  89                flush->hv_vp_set.bank_contents[vcpu_bank] = 0;
  90
  91        /*
  92         * Some banks may end up being empty but this is acceptable.
  93         */
  94        for_each_cpu(cpu, cpus) {
  95                vcpu = hv_cpu_number_to_vp_number(cpu);
  96                vcpu_bank = vcpu / 64;
  97                vcpu_offset = vcpu % 64;
  98                __set_bit(vcpu_offset, (unsigned long *)
  99                          &flush->hv_vp_set.bank_contents[vcpu_bank]);
 100                if (vcpu_bank >= nr_bank)
 101                        nr_bank = vcpu_bank + 1;
 102        }
 103        flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
 104
 105        return nr_bank;
 106}
 107
 108static void hyperv_flush_tlb_others(const struct cpumask *cpus,
 109                                    const struct flush_tlb_info *info)
 110{
 111        int cpu, vcpu, gva_n, max_gvas;
 112        struct hv_flush_pcpu **flush_pcpu;
 113        struct hv_flush_pcpu *flush;
 114        u64 status = U64_MAX;
 115        unsigned long flags;
 116
 117        trace_hyperv_mmu_flush_tlb_others(cpus, info);
 118
 119        if (!pcpu_flush || !hv_hypercall_pg)
 120                goto do_native;
 121
 122        if (cpumask_empty(cpus))
 123                return;
 124
 125        local_irq_save(flags);
 126
 127        flush_pcpu = this_cpu_ptr(pcpu_flush);
 128
 129        if (unlikely(!*flush_pcpu))
 130                *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
 131
 132        flush = *flush_pcpu;
 133
 134        if (unlikely(!flush)) {
 135                local_irq_restore(flags);
 136                goto do_native;
 137        }
 138
 139        if (info->mm) {
 140                flush->address_space = virt_to_phys(info->mm->pgd);
 141                flush->flags = 0;
 142        } else {
 143                flush->address_space = 0;
 144                flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
 145        }
 146
 147        flush->processor_mask = 0;
 148        if (cpumask_equal(cpus, cpu_present_mask)) {
 149                flush->flags |= HV_FLUSH_ALL_PROCESSORS;
 150        } else {
 151                for_each_cpu(cpu, cpus) {
 152                        vcpu = hv_cpu_number_to_vp_number(cpu);
 153                        if (vcpu >= 64)
 154                                goto do_native;
 155
 156                        __set_bit(vcpu, (unsigned long *)
 157                                  &flush->processor_mask);
 158                }
 159        }
 160
 161        /*
 162         * We can flush not more than max_gvas with one hypercall. Flush the
 163         * whole address space if we were asked to do more.
 164         */
 165        max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
 166
 167        if (info->end == TLB_FLUSH_ALL) {
 168                flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
 169                status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
 170                                         flush, NULL);
 171        } else if (info->end &&
 172                   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
 173                status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
 174                                         flush, NULL);
 175        } else {
 176                gva_n = fill_gva_list(flush->gva_list, 0,
 177                                      info->start, info->end);
 178                status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
 179                                             gva_n, 0, flush, NULL);
 180        }
 181
 182        local_irq_restore(flags);
 183
 184        if (!(status & HV_HYPERCALL_RESULT_MASK))
 185                return;
 186do_native:
 187        native_flush_tlb_others(cpus, info);
 188}
 189
 190static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
 191                                       const struct flush_tlb_info *info)
 192{
 193        int nr_bank = 0, max_gvas, gva_n;
 194        struct hv_flush_pcpu_ex **flush_pcpu;
 195        struct hv_flush_pcpu_ex *flush;
 196        u64 status = U64_MAX;
 197        unsigned long flags;
 198
 199        trace_hyperv_mmu_flush_tlb_others(cpus, info);
 200
 201        if (!pcpu_flush_ex || !hv_hypercall_pg)
 202                goto do_native;
 203
 204        if (cpumask_empty(cpus))
 205                return;
 206
 207        local_irq_save(flags);
 208
 209        flush_pcpu = this_cpu_ptr(pcpu_flush_ex);
 210
 211        if (unlikely(!*flush_pcpu))
 212                *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
 213
 214        flush = *flush_pcpu;
 215
 216        if (unlikely(!flush)) {
 217                local_irq_restore(flags);
 218                goto do_native;
 219        }
 220
 221        if (info->mm) {
 222                flush->address_space = virt_to_phys(info->mm->pgd);
 223                flush->flags = 0;
 224        } else {
 225                flush->address_space = 0;
 226                flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
 227        }
 228
 229        flush->hv_vp_set.valid_bank_mask = 0;
 230
 231        if (!cpumask_equal(cpus, cpu_present_mask)) {
 232                flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
 233                nr_bank = cpumask_to_vp_set(flush, cpus);
 234        }
 235
 236        if (!nr_bank) {
 237                flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
 238                flush->flags |= HV_FLUSH_ALL_PROCESSORS;
 239        }
 240
 241        /*
 242         * We can flush not more than max_gvas with one hypercall. Flush the
 243         * whole address space if we were asked to do more.
 244         */
 245        max_gvas =
 246                (PAGE_SIZE - sizeof(*flush) - nr_bank *
 247                 sizeof(flush->hv_vp_set.bank_contents[0])) /
 248                sizeof(flush->gva_list[0]);
 249
 250        if (info->end == TLB_FLUSH_ALL) {
 251                flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
 252                status = hv_do_rep_hypercall(
 253                        HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
 254                        0, nr_bank, flush, NULL);
 255        } else if (info->end &&
 256                   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
 257                status = hv_do_rep_hypercall(
 258                        HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
 259                        0, nr_bank, flush, NULL);
 260        } else {
 261                gva_n = fill_gva_list(flush->gva_list, nr_bank,
 262                                      info->start, info->end);
 263                status = hv_do_rep_hypercall(
 264                        HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
 265                        gva_n, nr_bank, flush, NULL);
 266        }
 267
 268        local_irq_restore(flags);
 269
 270        if (!(status & HV_HYPERCALL_RESULT_MASK))
 271                return;
 272do_native:
 273        native_flush_tlb_others(cpus, info);
 274}
 275
 276void hyperv_setup_mmu_ops(void)
 277{
 278        if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
 279                return;
 280
 281        setup_clear_cpu_cap(X86_FEATURE_PCID);
 282
 283        if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
 284                pr_info("Using hypercall for remote TLB flush\n");
 285                pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
 286        } else {
 287                pr_info("Using ext hypercall for remote TLB flush\n");
 288                pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
 289        }
 290}
 291
 292void hyper_alloc_mmu(void)
 293{
 294        if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
 295                return;
 296
 297        if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
 298                pcpu_flush = alloc_percpu(struct hv_flush_pcpu *);
 299        else
 300                pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *);
 301}
 302