1
2
3
4
5
6
7
8#include <linux/mman.h>
9#include <linux/pagemap.h>
10#include <linux/syscalls.h>
11#include <linux/mempolicy.h>
12#include <linux/page-isolation.h>
13#include <linux/hugetlb.h>
14#include <linux/sched.h>
15#include <linux/ksm.h>
16
17
18
19
20
21
22static int madvise_need_mmap_write(int behavior)
23{
24 switch (behavior) {
25 case MADV_REMOVE:
26 case MADV_WILLNEED:
27 case MADV_DONTNEED:
28 return 0;
29 default:
30
31 return 1;
32 }
33}
34
35
36
37
38
39static long madvise_behavior(struct vm_area_struct * vma,
40 struct vm_area_struct **prev,
41 unsigned long start, unsigned long end, int behavior)
42{
43 struct mm_struct * mm = vma->vm_mm;
44 int error = 0;
45 pgoff_t pgoff;
46 unsigned long new_flags = vma->vm_flags;
47
48 switch (behavior) {
49 case MADV_NORMAL:
50 new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
51 break;
52 case MADV_SEQUENTIAL:
53 new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ;
54 break;
55 case MADV_RANDOM:
56 new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ;
57 break;
58 case MADV_DONTFORK:
59 new_flags |= VM_DONTCOPY;
60 break;
61 case MADV_DOFORK:
62 if (vma->vm_flags & VM_IO) {
63 error = -EINVAL;
64 goto out;
65 }
66 new_flags &= ~VM_DONTCOPY;
67 break;
68 case MADV_MERGEABLE:
69 case MADV_UNMERGEABLE:
70 error = ksm_madvise(vma, start, end, behavior, &new_flags);
71 if (error)
72 goto out;
73 break;
74 case MADV_HUGEPAGE:
75 case MADV_NOHUGEPAGE:
76 error = hugepage_madvise(vma, &new_flags, behavior);
77 if (error)
78 goto out;
79 break;
80 }
81
82 if (new_flags == vma->vm_flags) {
83 *prev = vma;
84 goto out;
85 }
86
87 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
88 *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
89 vma->vm_file, pgoff, vma_policy(vma));
90 if (*prev) {
91 vma = *prev;
92 goto success;
93 }
94
95 *prev = vma;
96
97 if (start != vma->vm_start) {
98 error = split_vma(mm, vma, start, 1);
99 if (error)
100 goto out;
101 }
102
103 if (end != vma->vm_end) {
104 error = split_vma(mm, vma, end, 0);
105 if (error)
106 goto out;
107 }
108
109success:
110
111
112
113 vma->vm_flags = new_flags;
114
115out:
116 if (error == -ENOMEM)
117 error = -EAGAIN;
118 return error;
119}
120
121
122
123
124static long madvise_willneed(struct vm_area_struct * vma,
125 struct vm_area_struct ** prev,
126 unsigned long start, unsigned long end)
127{
128 struct file *file = vma->vm_file;
129
130 if (!file)
131 return -EBADF;
132
133 if (file->f_mapping->a_ops->get_xip_mem) {
134
135 return 0;
136 }
137
138 *prev = vma;
139 start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
140 if (end > vma->vm_end)
141 end = vma->vm_end;
142 end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
143
144 force_page_cache_readahead(file->f_mapping, file, start, end - start);
145 return 0;
146}
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167static long madvise_dontneed(struct vm_area_struct * vma,
168 struct vm_area_struct ** prev,
169 unsigned long start, unsigned long end)
170{
171 *prev = vma;
172 if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
173 return -EINVAL;
174
175 if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
176 struct zap_details details = {
177 .nonlinear_vma = vma,
178 .last_index = ULONG_MAX,
179 };
180 zap_page_range(vma, start, end - start, &details);
181 } else
182 zap_page_range(vma, start, end - start, NULL);
183 return 0;
184}
185
186
187
188
189
190
191
192
193static long madvise_remove(struct vm_area_struct *vma,
194 struct vm_area_struct **prev,
195 unsigned long start, unsigned long end)
196{
197 struct address_space *mapping;
198 loff_t offset, endoff;
199 int error;
200
201 *prev = NULL;
202
203 if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
204 return -EINVAL;
205
206 if (!vma->vm_file || !vma->vm_file->f_mapping
207 || !vma->vm_file->f_mapping->host) {
208 return -EINVAL;
209 }
210
211 if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
212 return -EACCES;
213
214 mapping = vma->vm_file->f_mapping;
215
216 offset = (loff_t)(start - vma->vm_start)
217 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
218 endoff = (loff_t)(end - vma->vm_start - 1)
219 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
220
221
222 up_read(¤t->mm->mmap_sem);
223 error = vmtruncate_range(mapping->host, offset, endoff);
224 down_read(¤t->mm->mmap_sem);
225 return error;
226}
227
228#ifdef CONFIG_MEMORY_FAILURE
229
230
231
232static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
233{
234 int ret = 0;
235
236 if (!capable(CAP_SYS_ADMIN))
237 return -EPERM;
238 for (; start < end; start += PAGE_SIZE) {
239 struct page *p;
240 int ret = get_user_pages_fast(start, 1, 0, &p);
241 if (ret != 1)
242 return ret;
243 if (bhv == MADV_SOFT_OFFLINE) {
244 printk(KERN_INFO "Soft offlining page %lx at %lx\n",
245 page_to_pfn(p), start);
246 ret = soft_offline_page(p, MF_COUNT_INCREASED);
247 if (ret)
248 break;
249 continue;
250 }
251 printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
252 page_to_pfn(p), start);
253
254 __memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED);
255 }
256 return ret;
257}
258#endif
259
260static long
261madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
262 unsigned long start, unsigned long end, int behavior)
263{
264 switch (behavior) {
265 case MADV_REMOVE:
266 return madvise_remove(vma, prev, start, end);
267 case MADV_WILLNEED:
268 return madvise_willneed(vma, prev, start, end);
269 case MADV_DONTNEED:
270 return madvise_dontneed(vma, prev, start, end);
271 default:
272 return madvise_behavior(vma, prev, start, end, behavior);
273 }
274}
275
276static int
277madvise_behavior_valid(int behavior)
278{
279 switch (behavior) {
280 case MADV_DOFORK:
281 case MADV_DONTFORK:
282 case MADV_NORMAL:
283 case MADV_SEQUENTIAL:
284 case MADV_RANDOM:
285 case MADV_REMOVE:
286 case MADV_WILLNEED:
287 case MADV_DONTNEED:
288#ifdef CONFIG_KSM
289 case MADV_MERGEABLE:
290 case MADV_UNMERGEABLE:
291#endif
292#ifdef CONFIG_TRANSPARENT_HUGEPAGE
293 case MADV_HUGEPAGE:
294 case MADV_NOHUGEPAGE:
295#endif
296 return 1;
297
298 default:
299 return 0;
300 }
301}
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
346{
347 unsigned long end, tmp;
348 struct vm_area_struct * vma, *prev;
349 int unmapped_error = 0;
350 int error = -EINVAL;
351 int write;
352 size_t len;
353
354#ifdef CONFIG_MEMORY_FAILURE
355 if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE)
356 return madvise_hwpoison(behavior, start, start+len_in);
357#endif
358 if (!madvise_behavior_valid(behavior))
359 return error;
360
361 write = madvise_need_mmap_write(behavior);
362 if (write)
363 down_write(¤t->mm->mmap_sem);
364 else
365 down_read(¤t->mm->mmap_sem);
366
367 if (start & ~PAGE_MASK)
368 goto out;
369 len = (len_in + ~PAGE_MASK) & PAGE_MASK;
370
371
372 if (len_in && !len)
373 goto out;
374
375 end = start + len;
376 if (end < start)
377 goto out;
378
379 error = 0;
380 if (end == start)
381 goto out;
382
383
384
385
386
387
388 vma = find_vma_prev(current->mm, start, &prev);
389 if (vma && start > vma->vm_start)
390 prev = vma;
391
392 for (;;) {
393
394 error = -ENOMEM;
395 if (!vma)
396 goto out;
397
398
399 if (start < vma->vm_start) {
400 unmapped_error = -ENOMEM;
401 start = vma->vm_start;
402 if (start >= end)
403 goto out;
404 }
405
406
407 tmp = vma->vm_end;
408 if (end < tmp)
409 tmp = end;
410
411
412 error = madvise_vma(vma, &prev, start, tmp, behavior);
413 if (error)
414 goto out;
415 start = tmp;
416 if (prev && start < prev->vm_end)
417 start = prev->vm_end;
418 error = unmapped_error;
419 if (start >= end)
420 goto out;
421 if (prev)
422 vma = prev->vm_next;
423 else
424 vma = find_vma(current->mm, start);
425 }
426out:
427 if (write)
428 up_write(¤t->mm->mmap_sem);
429 else
430 up_read(¤t->mm->mmap_sem);
431
432 return error;
433}
434