1
2
3
4
5
6
7#include <linux/mm.h>
8#include <linux/err.h>
9#include <linux/sched.h>
10#include <linux/sched/task_stack.h>
11#include <linux/slab.h>
12#include <linux/init.h>
13#include <linux/random.h>
14#include <linux/elf.h>
15#include <linux/cpu.h>
16#include <linux/ptrace.h>
17#include <linux/time_namespace.h>
18
19#include <asm/pvclock.h>
20#include <asm/vgtod.h>
21#include <asm/proto.h>
22#include <asm/vdso.h>
23#include <asm/vvar.h>
24#include <asm/tlb.h>
25#include <asm/page.h>
26#include <asm/desc.h>
27#include <asm/cpufeature.h>
28#include <clocksource/hyperv_timer.h>
29
30#undef _ASM_X86_VVAR_H
31#define EMIT_VVAR(name, offset) \
32 const size_t name ## _offset = offset;
33#include <asm/vvar.h>
34
35struct vdso_data *arch_get_vdso_data(void *vvar_page)
36{
37 return (struct vdso_data *)(vvar_page + _vdso_data_offset);
38}
39#undef EMIT_VVAR
40
41unsigned int vclocks_used __read_mostly;
42
43#if defined(CONFIG_X86_64)
44unsigned int __read_mostly vdso64_enabled = 1;
45#endif
46
47void __init init_vdso_image(const struct vdso_image *image)
48{
49 BUG_ON(image->size % PAGE_SIZE != 0);
50
51 apply_alternatives((struct alt_instr *)(image->data + image->alt),
52 (struct alt_instr *)(image->data + image->alt +
53 image->alt_len));
54}
55
56static const struct vm_special_mapping vvar_mapping;
57struct linux_binprm;
58
59static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
60 struct vm_area_struct *vma, struct vm_fault *vmf)
61{
62 const struct vdso_image *image = vma->vm_mm->context.vdso_image;
63
64 if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
65 return VM_FAULT_SIGBUS;
66
67 vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
68 get_page(vmf->page);
69 return 0;
70}
71
72static void vdso_fix_landing(const struct vdso_image *image,
73 struct vm_area_struct *new_vma)
74{
75#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
76 if (in_ia32_syscall() && image == &vdso_image_32) {
77 struct pt_regs *regs = current_pt_regs();
78 unsigned long vdso_land = image->sym_int80_landing_pad;
79 unsigned long old_land_addr = vdso_land +
80 (unsigned long)current->mm->context.vdso;
81
82
83 if (regs->ip == old_land_addr)
84 regs->ip = new_vma->vm_start + vdso_land;
85 }
86#endif
87}
88
89static int vdso_mremap(const struct vm_special_mapping *sm,
90 struct vm_area_struct *new_vma)
91{
92 const struct vdso_image *image = current->mm->context.vdso_image;
93
94 vdso_fix_landing(image, new_vma);
95 current->mm->context.vdso = (void __user *)new_vma->vm_start;
96
97 return 0;
98}
99
100#ifdef CONFIG_TIME_NS
101static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
102{
103 if (likely(vma->vm_mm == current->mm))
104 return current->nsproxy->time_ns->vvar_page;
105
106
107
108
109
110
111
112
113
114 WARN(1, "vvar_page accessed remotely");
115
116 return NULL;
117}
118
119
120
121
122
123
124
125
126int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
127{
128 struct mm_struct *mm = task->mm;
129 struct vm_area_struct *vma;
130
131 mmap_read_lock(mm);
132
133 for (vma = mm->mmap; vma; vma = vma->vm_next) {
134 unsigned long size = vma->vm_end - vma->vm_start;
135
136 if (vma_is_special_mapping(vma, &vvar_mapping))
137 zap_page_range(vma, vma->vm_start, size);
138 }
139
140 mmap_read_unlock(mm);
141 return 0;
142}
143#else
144static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
145{
146 return NULL;
147}
148#endif
149
150static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
151 struct vm_area_struct *vma, struct vm_fault *vmf)
152{
153 const struct vdso_image *image = vma->vm_mm->context.vdso_image;
154 unsigned long pfn;
155 long sym_offset;
156
157 if (!image)
158 return VM_FAULT_SIGBUS;
159
160 sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) +
161 image->sym_vvar_start;
162
163
164
165
166
167
168
169
170 if (sym_offset == 0)
171 return VM_FAULT_SIGBUS;
172
173 if (sym_offset == image->sym_vvar_page) {
174 struct page *timens_page = find_timens_vvar_page(vma);
175
176 pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
177
178
179
180
181
182
183
184
185 if (timens_page) {
186 unsigned long addr;
187 vm_fault_t err;
188
189
190
191
192
193
194
195 addr = vmf->address + (image->sym_timens_page - sym_offset);
196 err = vmf_insert_pfn(vma, addr, pfn);
197 if (unlikely(err & VM_FAULT_ERROR))
198 return err;
199
200 pfn = page_to_pfn(timens_page);
201 }
202
203 return vmf_insert_pfn(vma, vmf->address, pfn);
204 } else if (sym_offset == image->sym_pvclock_page) {
205 struct pvclock_vsyscall_time_info *pvti =
206 pvclock_get_pvti_cpu0_va();
207 if (pvti && vclock_was_used(VDSO_CLOCKMODE_PVCLOCK)) {
208 return vmf_insert_pfn_prot(vma, vmf->address,
209 __pa(pvti) >> PAGE_SHIFT,
210 pgprot_decrypted(vma->vm_page_prot));
211 }
212 } else if (sym_offset == image->sym_hvclock_page) {
213 struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page();
214
215 if (tsc_pg && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK))
216 return vmf_insert_pfn(vma, vmf->address,
217 virt_to_phys(tsc_pg) >> PAGE_SHIFT);
218 } else if (sym_offset == image->sym_timens_page) {
219 struct page *timens_page = find_timens_vvar_page(vma);
220
221 if (!timens_page)
222 return VM_FAULT_SIGBUS;
223
224 pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
225 return vmf_insert_pfn(vma, vmf->address, pfn);
226 }
227
228 return VM_FAULT_SIGBUS;
229}
230
231static const struct vm_special_mapping vdso_mapping = {
232 .name = "[vdso]",
233 .fault = vdso_fault,
234 .mremap = vdso_mremap,
235};
236static const struct vm_special_mapping vvar_mapping = {
237 .name = "[vvar]",
238 .fault = vvar_fault,
239};
240
241
242
243
244
245
246static int map_vdso(const struct vdso_image *image, unsigned long addr)
247{
248 struct mm_struct *mm = current->mm;
249 struct vm_area_struct *vma;
250 unsigned long text_start;
251 int ret = 0;
252
253 if (mmap_write_lock_killable(mm))
254 return -EINTR;
255
256 addr = get_unmapped_area(NULL, addr,
257 image->size - image->sym_vvar_start, 0, 0);
258 if (IS_ERR_VALUE(addr)) {
259 ret = addr;
260 goto up_fail;
261 }
262
263 text_start = addr - image->sym_vvar_start;
264
265
266
267
268 vma = _install_special_mapping(mm,
269 text_start,
270 image->size,
271 VM_READ|VM_EXEC|
272 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
273 &vdso_mapping);
274
275 if (IS_ERR(vma)) {
276 ret = PTR_ERR(vma);
277 goto up_fail;
278 }
279
280 vma = _install_special_mapping(mm,
281 addr,
282 -image->sym_vvar_start,
283 VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
284 VM_PFNMAP,
285 &vvar_mapping);
286
287 if (IS_ERR(vma)) {
288 ret = PTR_ERR(vma);
289 do_munmap(mm, text_start, image->size, NULL);
290 } else {
291 current->mm->context.vdso = (void __user *)text_start;
292 current->mm->context.vdso_image = image;
293 }
294
295up_fail:
296 mmap_write_unlock(mm);
297 return ret;
298}
299
300#ifdef CONFIG_X86_64
301
302
303
304
305
306
307
308
309
310
311
312static unsigned long vdso_addr(unsigned long start, unsigned len)
313{
314 unsigned long addr, end;
315 unsigned offset;
316
317
318
319
320
321 start = PAGE_ALIGN(start);
322
323
324 end = (start + len + PMD_SIZE - 1) & PMD_MASK;
325 if (end >= TASK_SIZE_MAX)
326 end = TASK_SIZE_MAX;
327 end -= len;
328
329 if (end > start) {
330 offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
331 addr = start + (offset << PAGE_SHIFT);
332 } else {
333 addr = start;
334 }
335
336
337
338
339
340 addr = align_vdso_addr(addr);
341
342 return addr;
343}
344
345static int map_vdso_randomized(const struct vdso_image *image)
346{
347 unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start);
348
349 return map_vdso(image, addr);
350}
351#endif
352
353int map_vdso_once(const struct vdso_image *image, unsigned long addr)
354{
355 struct mm_struct *mm = current->mm;
356 struct vm_area_struct *vma;
357
358 mmap_write_lock(mm);
359
360
361
362
363
364
365
366 for (vma = mm->mmap; vma; vma = vma->vm_next) {
367 if (vma_is_special_mapping(vma, &vdso_mapping) ||
368 vma_is_special_mapping(vma, &vvar_mapping)) {
369 mmap_write_unlock(mm);
370 return -EEXIST;
371 }
372 }
373 mmap_write_unlock(mm);
374
375 return map_vdso(image, addr);
376}
377
378#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
379static int load_vdso32(void)
380{
381 if (vdso32_enabled != 1)
382 return 0;
383
384 return map_vdso(&vdso_image_32, 0);
385}
386#endif
387
388#ifdef CONFIG_X86_64
389int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
390{
391 if (!vdso64_enabled)
392 return 0;
393
394 return map_vdso_randomized(&vdso_image_64);
395}
396
397#ifdef CONFIG_COMPAT
398int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
399 int uses_interp, bool x32)
400{
401#ifdef CONFIG_X86_X32_ABI
402 if (x32) {
403 if (!vdso64_enabled)
404 return 0;
405 return map_vdso_randomized(&vdso_image_x32);
406 }
407#endif
408#ifdef CONFIG_IA32_EMULATION
409 return load_vdso32();
410#else
411 return 0;
412#endif
413}
414#endif
415#else
416int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
417{
418 return load_vdso32();
419}
420#endif
421
422bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
423{
424#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
425 const struct vdso_image *image = current->mm->context.vdso_image;
426 unsigned long vdso = (unsigned long) current->mm->context.vdso;
427
428 if (in_ia32_syscall() && image == &vdso_image_32) {
429 if (regs->ip == vdso + image->sym_vdso32_sigreturn_landing_pad ||
430 regs->ip == vdso + image->sym_vdso32_rt_sigreturn_landing_pad)
431 return true;
432 }
433#endif
434 return false;
435}
436
437#ifdef CONFIG_X86_64
438static __init int vdso_setup(char *s)
439{
440 vdso64_enabled = simple_strtoul(s, NULL, 0);
441 return 1;
442}
443__setup("vdso=", vdso_setup);
444
445static int __init init_vdso(void)
446{
447 BUILD_BUG_ON(VDSO_CLOCKMODE_MAX >= 32);
448
449 init_vdso_image(&vdso_image_64);
450
451#ifdef CONFIG_X86_X32_ABI
452 init_vdso_image(&vdso_image_x32);
453#endif
454
455 return 0;
456}
457subsys_initcall(init_vdso);
458#endif
459