1
2
3
4
5
6
7
8#include <linux/mman.h>
9#include <linux/pagemap.h>
10#include <linux/syscalls.h>
11#include <linux/mempolicy.h>
12#include <linux/hugetlb.h>
13#include <linux/sched.h>
14#include <linux/ksm.h>
15
16
17
18
19
20
21static int madvise_need_mmap_write(int behavior)
22{
23 switch (behavior) {
24 case MADV_REMOVE:
25 case MADV_WILLNEED:
26 case MADV_DONTNEED:
27 return 0;
28 default:
29
30 return 1;
31 }
32}
33
34
35
36
37
38static long madvise_behavior(struct vm_area_struct * vma,
39 struct vm_area_struct **prev,
40 unsigned long start, unsigned long end, int behavior)
41{
42 struct mm_struct * mm = vma->vm_mm;
43 int error = 0;
44 pgoff_t pgoff;
45 unsigned long new_flags = vma->vm_flags;
46
47 switch (behavior) {
48 case MADV_NORMAL:
49 new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
50 break;
51 case MADV_SEQUENTIAL:
52 new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ;
53 break;
54 case MADV_RANDOM:
55 new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ;
56 break;
57 case MADV_DONTFORK:
58 new_flags |= VM_DONTCOPY;
59 break;
60 case MADV_DOFORK:
61 if (vma->vm_flags & VM_IO) {
62 error = -EINVAL;
63 goto out;
64 }
65 new_flags &= ~VM_DONTCOPY;
66 break;
67 case MADV_MERGEABLE:
68 case MADV_UNMERGEABLE:
69 error = ksm_madvise(vma, start, end, behavior, &new_flags);
70 if (error)
71 goto out;
72 break;
73 }
74
75 if (new_flags == vma->vm_flags) {
76 *prev = vma;
77 goto out;
78 }
79
80 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
81 *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
82 vma->vm_file, pgoff, vma_policy(vma));
83 if (*prev) {
84 vma = *prev;
85 goto success;
86 }
87
88 *prev = vma;
89
90 if (start != vma->vm_start) {
91 error = split_vma(mm, vma, start, 1);
92 if (error)
93 goto out;
94 }
95
96 if (end != vma->vm_end) {
97 error = split_vma(mm, vma, end, 0);
98 if (error)
99 goto out;
100 }
101
102success:
103
104
105
106 vma->vm_flags = new_flags;
107
108out:
109 if (error == -ENOMEM)
110 error = -EAGAIN;
111 return error;
112}
113
114
115
116
117static long madvise_willneed(struct vm_area_struct * vma,
118 struct vm_area_struct ** prev,
119 unsigned long start, unsigned long end)
120{
121 struct file *file = vma->vm_file;
122
123 if (!file)
124 return -EBADF;
125
126 if (file->f_mapping->a_ops->get_xip_mem) {
127
128 return 0;
129 }
130
131 *prev = vma;
132 start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
133 if (end > vma->vm_end)
134 end = vma->vm_end;
135 end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
136
137 force_page_cache_readahead(file->f_mapping, file, start, end - start);
138 return 0;
139}
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160static long madvise_dontneed(struct vm_area_struct * vma,
161 struct vm_area_struct ** prev,
162 unsigned long start, unsigned long end)
163{
164 *prev = vma;
165 if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
166 return -EINVAL;
167
168 if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
169 struct zap_details details = {
170 .nonlinear_vma = vma,
171 .last_index = ULONG_MAX,
172 };
173 zap_page_range(vma, start, end - start, &details);
174 } else
175 zap_page_range(vma, start, end - start, NULL);
176 return 0;
177}
178
179
180
181
182
183
184
185
186static long madvise_remove(struct vm_area_struct *vma,
187 struct vm_area_struct **prev,
188 unsigned long start, unsigned long end)
189{
190 struct address_space *mapping;
191 loff_t offset, endoff;
192 int error;
193
194 *prev = NULL;
195
196 if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
197 return -EINVAL;
198
199 if (!vma->vm_file || !vma->vm_file->f_mapping
200 || !vma->vm_file->f_mapping->host) {
201 return -EINVAL;
202 }
203
204 if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
205 return -EACCES;
206
207 mapping = vma->vm_file->f_mapping;
208
209 offset = (loff_t)(start - vma->vm_start)
210 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
211 endoff = (loff_t)(end - vma->vm_start - 1)
212 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
213
214
215 up_read(¤t->mm->mmap_sem);
216 error = vmtruncate_range(mapping->host, offset, endoff);
217 down_read(¤t->mm->mmap_sem);
218 return error;
219}
220
221#ifdef CONFIG_MEMORY_FAILURE
222
223
224
225static int madvise_hwpoison(unsigned long start, unsigned long end)
226{
227 int ret = 0;
228
229 if (!capable(CAP_SYS_ADMIN))
230 return -EPERM;
231 for (; start < end; start += PAGE_SIZE) {
232 struct page *p;
233 int ret = get_user_pages(current, current->mm, start, 1,
234 0, 0, &p, NULL);
235 if (ret != 1)
236 return ret;
237 printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
238 page_to_pfn(p), start);
239
240 __memory_failure(page_to_pfn(p), 0, 1);
241 put_page(p);
242 }
243 return ret;
244}
245#endif
246
247static long
248madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
249 unsigned long start, unsigned long end, int behavior)
250{
251 switch (behavior) {
252 case MADV_REMOVE:
253 return madvise_remove(vma, prev, start, end);
254 case MADV_WILLNEED:
255 return madvise_willneed(vma, prev, start, end);
256 case MADV_DONTNEED:
257 return madvise_dontneed(vma, prev, start, end);
258 default:
259 return madvise_behavior(vma, prev, start, end, behavior);
260 }
261}
262
263static int
264madvise_behavior_valid(int behavior)
265{
266 switch (behavior) {
267 case MADV_DOFORK:
268 case MADV_DONTFORK:
269 case MADV_NORMAL:
270 case MADV_SEQUENTIAL:
271 case MADV_RANDOM:
272 case MADV_REMOVE:
273 case MADV_WILLNEED:
274 case MADV_DONTNEED:
275#ifdef CONFIG_KSM
276 case MADV_MERGEABLE:
277 case MADV_UNMERGEABLE:
278#endif
279 return 1;
280
281 default:
282 return 0;
283 }
284}
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
329{
330 unsigned long end, tmp;
331 struct vm_area_struct * vma, *prev;
332 int unmapped_error = 0;
333 int error = -EINVAL;
334 int write;
335 size_t len;
336
337#ifdef CONFIG_MEMORY_FAILURE
338 if (behavior == MADV_HWPOISON)
339 return madvise_hwpoison(start, start+len_in);
340#endif
341 if (!madvise_behavior_valid(behavior))
342 return error;
343
344 write = madvise_need_mmap_write(behavior);
345 if (write)
346 down_write(¤t->mm->mmap_sem);
347 else
348 down_read(¤t->mm->mmap_sem);
349
350 if (start & ~PAGE_MASK)
351 goto out;
352 len = (len_in + ~PAGE_MASK) & PAGE_MASK;
353
354
355 if (len_in && !len)
356 goto out;
357
358 end = start + len;
359 if (end < start)
360 goto out;
361
362 error = 0;
363 if (end == start)
364 goto out;
365
366
367
368
369
370
371 vma = find_vma_prev(current->mm, start, &prev);
372 if (vma && start > vma->vm_start)
373 prev = vma;
374
375 for (;;) {
376
377 error = -ENOMEM;
378 if (!vma)
379 goto out;
380
381
382 if (start < vma->vm_start) {
383 unmapped_error = -ENOMEM;
384 start = vma->vm_start;
385 if (start >= end)
386 goto out;
387 }
388
389
390 tmp = vma->vm_end;
391 if (end < tmp)
392 tmp = end;
393
394
395 error = madvise_vma(vma, &prev, start, tmp, behavior);
396 if (error)
397 goto out;
398 start = tmp;
399 if (prev && start < prev->vm_end)
400 start = prev->vm_end;
401 error = unmapped_error;
402 if (start >= end)
403 goto out;
404 if (prev)
405 vma = prev->vm_next;
406 else
407 vma = find_vma(current->mm, start);
408 }
409out:
410 if (write)
411 up_write(¤t->mm->mmap_sem);
412 else
413 up_read(¤t->mm->mmap_sem);
414
415 return error;
416}
417