1
2
3
4
5
6
7
8#include <linux/mman.h>
9#include <linux/pagemap.h>
10#include <linux/syscalls.h>
11#include <linux/mempolicy.h>
12#include <linux/page-isolation.h>
13#include <linux/hugetlb.h>
14#include <linux/sched.h>
15#include <linux/ksm.h>
16
17
18
19
20
21
22static int madvise_need_mmap_write(int behavior)
23{
24 switch (behavior) {
25 case MADV_REMOVE:
26 case MADV_WILLNEED:
27 case MADV_DONTNEED:
28 return 0;
29 default:
30
31 return 1;
32 }
33}
34
35
36
37
38
39static long madvise_behavior(struct vm_area_struct * vma,
40 struct vm_area_struct **prev,
41 unsigned long start, unsigned long end, int behavior)
42{
43 struct mm_struct * mm = vma->vm_mm;
44 int error = 0;
45 pgoff_t pgoff;
46 unsigned long new_flags = vma->vm_flags;
47
48 switch (behavior) {
49 case MADV_NORMAL:
50 new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
51 break;
52 case MADV_SEQUENTIAL:
53 new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ;
54 break;
55 case MADV_RANDOM:
56 new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ;
57 break;
58 case MADV_DONTFORK:
59 new_flags |= VM_DONTCOPY;
60 break;
61 case MADV_DOFORK:
62 if (vma->vm_flags & VM_IO) {
63 error = -EINVAL;
64 goto out;
65 }
66 new_flags &= ~VM_DONTCOPY;
67 break;
68 case MADV_DONTDUMP:
69 new_flags |= VM_NODUMP;
70 break;
71 case MADV_DODUMP:
72 new_flags &= ~VM_NODUMP;
73 break;
74 case MADV_MERGEABLE:
75 case MADV_UNMERGEABLE:
76 error = ksm_madvise(vma, start, end, behavior, &new_flags);
77 if (error)
78 goto out;
79 break;
80 case MADV_HUGEPAGE:
81 case MADV_NOHUGEPAGE:
82 error = hugepage_madvise(vma, &new_flags, behavior);
83 if (error)
84 goto out;
85 break;
86 }
87
88 if (new_flags == vma->vm_flags) {
89 *prev = vma;
90 goto out;
91 }
92
93 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
94 *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
95 vma->vm_file, pgoff, vma_policy(vma));
96 if (*prev) {
97 vma = *prev;
98 goto success;
99 }
100
101 *prev = vma;
102
103 if (start != vma->vm_start) {
104 error = split_vma(mm, vma, start, 1);
105 if (error)
106 goto out;
107 }
108
109 if (end != vma->vm_end) {
110 error = split_vma(mm, vma, end, 0);
111 if (error)
112 goto out;
113 }
114
115success:
116
117
118
119 vma->vm_flags = new_flags;
120
121out:
122 if (error == -ENOMEM)
123 error = -EAGAIN;
124 return error;
125}
126
127
128
129
130static long madvise_willneed(struct vm_area_struct * vma,
131 struct vm_area_struct ** prev,
132 unsigned long start, unsigned long end)
133{
134 struct file *file = vma->vm_file;
135
136 if (!file)
137 return -EBADF;
138
139 if (file->f_mapping->a_ops->get_xip_mem) {
140
141 return 0;
142 }
143
144 *prev = vma;
145 start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
146 if (end > vma->vm_end)
147 end = vma->vm_end;
148 end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
149
150 force_page_cache_readahead(file->f_mapping, file, start, end - start);
151 return 0;
152}
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173static long madvise_dontneed(struct vm_area_struct * vma,
174 struct vm_area_struct ** prev,
175 unsigned long start, unsigned long end)
176{
177 *prev = vma;
178 if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
179 return -EINVAL;
180
181 if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
182 struct zap_details details = {
183 .nonlinear_vma = vma,
184 .last_index = ULONG_MAX,
185 };
186 zap_page_range(vma, start, end - start, &details);
187 } else
188 zap_page_range(vma, start, end - start, NULL);
189 return 0;
190}
191
192
193
194
195
196
197
198
199static long madvise_remove(struct vm_area_struct *vma,
200 struct vm_area_struct **prev,
201 unsigned long start, unsigned long end)
202{
203 struct address_space *mapping;
204 loff_t offset, endoff;
205 int error;
206
207 *prev = NULL;
208
209 if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
210 return -EINVAL;
211
212 if (!vma->vm_file || !vma->vm_file->f_mapping
213 || !vma->vm_file->f_mapping->host) {
214 return -EINVAL;
215 }
216
217 if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
218 return -EACCES;
219
220 mapping = vma->vm_file->f_mapping;
221
222 offset = (loff_t)(start - vma->vm_start)
223 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
224 endoff = (loff_t)(end - vma->vm_start - 1)
225 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
226
227
228 up_read(¤t->mm->mmap_sem);
229 error = vmtruncate_range(mapping->host, offset, endoff);
230 down_read(¤t->mm->mmap_sem);
231 return error;
232}
233
234#ifdef CONFIG_MEMORY_FAILURE
235
236
237
238static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
239{
240 int ret = 0;
241
242 if (!capable(CAP_SYS_ADMIN))
243 return -EPERM;
244 for (; start < end; start += PAGE_SIZE) {
245 struct page *p;
246 int ret = get_user_pages_fast(start, 1, 0, &p);
247 if (ret != 1)
248 return ret;
249 if (bhv == MADV_SOFT_OFFLINE) {
250 printk(KERN_INFO "Soft offlining page %lx at %lx\n",
251 page_to_pfn(p), start);
252 ret = soft_offline_page(p, MF_COUNT_INCREASED);
253 if (ret)
254 break;
255 continue;
256 }
257 printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
258 page_to_pfn(p), start);
259
260 memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED);
261 }
262 return ret;
263}
264#endif
265
266static long
267madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
268 unsigned long start, unsigned long end, int behavior)
269{
270 switch (behavior) {
271 case MADV_REMOVE:
272 return madvise_remove(vma, prev, start, end);
273 case MADV_WILLNEED:
274 return madvise_willneed(vma, prev, start, end);
275 case MADV_DONTNEED:
276 return madvise_dontneed(vma, prev, start, end);
277 default:
278 return madvise_behavior(vma, prev, start, end, behavior);
279 }
280}
281
282static int
283madvise_behavior_valid(int behavior)
284{
285 switch (behavior) {
286 case MADV_DOFORK:
287 case MADV_DONTFORK:
288 case MADV_NORMAL:
289 case MADV_SEQUENTIAL:
290 case MADV_RANDOM:
291 case MADV_REMOVE:
292 case MADV_WILLNEED:
293 case MADV_DONTNEED:
294#ifdef CONFIG_KSM
295 case MADV_MERGEABLE:
296 case MADV_UNMERGEABLE:
297#endif
298#ifdef CONFIG_TRANSPARENT_HUGEPAGE
299 case MADV_HUGEPAGE:
300 case MADV_NOHUGEPAGE:
301#endif
302 case MADV_DONTDUMP:
303 case MADV_DODUMP:
304 return 1;
305
306 default:
307 return 0;
308 }
309}
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
354{
355 unsigned long end, tmp;
356 struct vm_area_struct * vma, *prev;
357 int unmapped_error = 0;
358 int error = -EINVAL;
359 int write;
360 size_t len;
361
362#ifdef CONFIG_MEMORY_FAILURE
363 if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE)
364 return madvise_hwpoison(behavior, start, start+len_in);
365#endif
366 if (!madvise_behavior_valid(behavior))
367 return error;
368
369 write = madvise_need_mmap_write(behavior);
370 if (write)
371 down_write(¤t->mm->mmap_sem);
372 else
373 down_read(¤t->mm->mmap_sem);
374
375 if (start & ~PAGE_MASK)
376 goto out;
377 len = (len_in + ~PAGE_MASK) & PAGE_MASK;
378
379
380 if (len_in && !len)
381 goto out;
382
383 end = start + len;
384 if (end < start)
385 goto out;
386
387 error = 0;
388 if (end == start)
389 goto out;
390
391
392
393
394
395
396 vma = find_vma_prev(current->mm, start, &prev);
397 if (vma && start > vma->vm_start)
398 prev = vma;
399
400 for (;;) {
401
402 error = -ENOMEM;
403 if (!vma)
404 goto out;
405
406
407 if (start < vma->vm_start) {
408 unmapped_error = -ENOMEM;
409 start = vma->vm_start;
410 if (start >= end)
411 goto out;
412 }
413
414
415 tmp = vma->vm_end;
416 if (end < tmp)
417 tmp = end;
418
419
420 error = madvise_vma(vma, &prev, start, tmp, behavior);
421 if (error)
422 goto out;
423 start = tmp;
424 if (prev && start < prev->vm_end)
425 start = prev->vm_end;
426 error = unmapped_error;
427 if (start >= end)
428 goto out;
429 if (prev)
430 vma = prev->vm_next;
431 else
432 vma = find_vma(current->mm, start);
433 }
434out:
435 if (write)
436 up_write(¤t->mm->mmap_sem);
437 else
438 up_read(¤t->mm->mmap_sem);
439
440 return error;
441}
442