1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
40
41#include <linux/kernel.h>
42#include <linux/sched.h>
43#include <linux/errno.h>
44#include <linux/module.h>
45#include <linux/mm.h>
46#include <linux/bootmem.h>
47#include <linux/pagemap.h>
48#include <linux/highmem.h>
49#include <linux/mutex.h>
50#include <linux/list.h>
51#include <linux/gfp.h>
52#include <linux/notifier.h>
53#include <linux/memory.h>
54#include <linux/memory_hotplug.h>
55
56#include <asm/page.h>
57#include <asm/pgalloc.h>
58#include <asm/pgtable.h>
59#include <asm/tlb.h>
60
61#include <asm/xen/hypervisor.h>
62#include <asm/xen/hypercall.h>
63
64#include <xen/xen.h>
65#include <xen/interface/xen.h>
66#include <xen/interface/memory.h>
67#include <xen/balloon.h>
68#include <xen/features.h>
69#include <xen/page.h>
70
71
72
73
74
75
76
77
78
79enum bp_state {
80 BP_DONE,
81 BP_EAGAIN,
82 BP_ECANCELED
83};
84
85
86static DEFINE_MUTEX(balloon_mutex);
87
88struct balloon_stats balloon_stats;
89EXPORT_SYMBOL_GPL(balloon_stats);
90
91
92static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
93
94
95static LIST_HEAD(ballooned_pages);
96
97
98static void balloon_process(struct work_struct *work);
99static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
100
101
102
103#define GFP_BALLOON \
104 (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
105
106static void scrub_page(struct page *page)
107{
108#ifdef CONFIG_XEN_SCRUB_PAGES
109 clear_highpage(page);
110#endif
111}
112
113
114static void __balloon_append(struct page *page)
115{
116
117 if (PageHighMem(page)) {
118 list_add_tail(&page->lru, &ballooned_pages);
119 balloon_stats.balloon_high++;
120 } else {
121 list_add(&page->lru, &ballooned_pages);
122 balloon_stats.balloon_low++;
123 }
124}
125
126static void balloon_append(struct page *page)
127{
128 __balloon_append(page);
129 adjust_managed_page_count(page, -1);
130}
131
132
133static struct page *balloon_retrieve(bool prefer_highmem)
134{
135 struct page *page;
136
137 if (list_empty(&ballooned_pages))
138 return NULL;
139
140 if (prefer_highmem)
141 page = list_entry(ballooned_pages.prev, struct page, lru);
142 else
143 page = list_entry(ballooned_pages.next, struct page, lru);
144 list_del(&page->lru);
145
146 if (PageHighMem(page))
147 balloon_stats.balloon_high--;
148 else
149 balloon_stats.balloon_low--;
150
151 adjust_managed_page_count(page, 1);
152
153 return page;
154}
155
156static struct page *balloon_first_page(void)
157{
158 if (list_empty(&ballooned_pages))
159 return NULL;
160 return list_entry(ballooned_pages.next, struct page, lru);
161}
162
163static struct page *balloon_next_page(struct page *page)
164{
165 struct list_head *next = page->lru.next;
166 if (next == &ballooned_pages)
167 return NULL;
168 return list_entry(next, struct page, lru);
169}
170
171static enum bp_state update_schedule(enum bp_state state)
172{
173 if (state == BP_DONE) {
174 balloon_stats.schedule_delay = 1;
175 balloon_stats.retry_count = 1;
176 return BP_DONE;
177 }
178
179 ++balloon_stats.retry_count;
180
181 if (balloon_stats.max_retry_count != RETRY_UNLIMITED &&
182 balloon_stats.retry_count > balloon_stats.max_retry_count) {
183 balloon_stats.schedule_delay = 1;
184 balloon_stats.retry_count = 1;
185 return BP_ECANCELED;
186 }
187
188 balloon_stats.schedule_delay <<= 1;
189
190 if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay)
191 balloon_stats.schedule_delay = balloon_stats.max_schedule_delay;
192
193 return BP_EAGAIN;
194}
195
196#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
197static long current_credit(void)
198{
199 return balloon_stats.target_pages - balloon_stats.current_pages -
200 balloon_stats.hotplug_pages;
201}
202
203static bool balloon_is_inflated(void)
204{
205 if (balloon_stats.balloon_low || balloon_stats.balloon_high ||
206 balloon_stats.balloon_hotplug)
207 return true;
208 else
209 return false;
210}
211
212
213
214
215
216
217
218
219
220
221
222
223static enum bp_state reserve_additional_memory(long credit)
224{
225 int nid, rc;
226 u64 hotplug_start_paddr;
227 unsigned long balloon_hotplug = credit;
228
229 hotplug_start_paddr = PFN_PHYS(SECTION_ALIGN_UP(max_pfn));
230 balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION);
231 nid = memory_add_physaddr_to_nid(hotplug_start_paddr);
232
233 rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT);
234
235 if (rc) {
236 pr_info("%s: add_memory() failed: %i\n", __func__, rc);
237 return BP_EAGAIN;
238 }
239
240 balloon_hotplug -= credit;
241
242 balloon_stats.hotplug_pages += credit;
243 balloon_stats.balloon_hotplug = balloon_hotplug;
244
245 return BP_DONE;
246}
247
248static void xen_online_page(struct page *page)
249{
250 __online_page_set_limits(page);
251
252 mutex_lock(&balloon_mutex);
253
254 __balloon_append(page);
255
256 if (balloon_stats.hotplug_pages)
257 --balloon_stats.hotplug_pages;
258 else
259 --balloon_stats.balloon_hotplug;
260
261 mutex_unlock(&balloon_mutex);
262}
263
264static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v)
265{
266 if (val == MEM_ONLINE)
267 schedule_delayed_work(&balloon_worker, 0);
268
269 return NOTIFY_OK;
270}
271
272static struct notifier_block xen_memory_nb = {
273 .notifier_call = xen_memory_notifier,
274 .priority = 0
275};
276#else
277static long current_credit(void)
278{
279 unsigned long target = balloon_stats.target_pages;
280
281 target = min(target,
282 balloon_stats.current_pages +
283 balloon_stats.balloon_low +
284 balloon_stats.balloon_high);
285
286 return target - balloon_stats.current_pages;
287}
288
289static bool balloon_is_inflated(void)
290{
291 if (balloon_stats.balloon_low || balloon_stats.balloon_high)
292 return true;
293 else
294 return false;
295}
296
297static enum bp_state reserve_additional_memory(long credit)
298{
299 balloon_stats.target_pages = balloon_stats.current_pages;
300 return BP_DONE;
301}
302#endif
303
304static enum bp_state increase_reservation(unsigned long nr_pages)
305{
306 int rc;
307 unsigned long pfn, i;
308 struct page *page;
309 struct xen_memory_reservation reservation = {
310 .address_bits = 0,
311 .extent_order = 0,
312 .domid = DOMID_SELF
313 };
314
315#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
316 if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) {
317 nr_pages = min(nr_pages, balloon_stats.balloon_hotplug);
318 balloon_stats.hotplug_pages += nr_pages;
319 balloon_stats.balloon_hotplug -= nr_pages;
320 return BP_DONE;
321 }
322#endif
323
324 if (nr_pages > ARRAY_SIZE(frame_list))
325 nr_pages = ARRAY_SIZE(frame_list);
326
327 page = balloon_first_page();
328 for (i = 0; i < nr_pages; i++) {
329 if (!page) {
330 nr_pages = i;
331 break;
332 }
333 frame_list[i] = page_to_pfn(page);
334 page = balloon_next_page(page);
335 }
336
337 set_xen_guest_handle(reservation.extent_start, frame_list);
338 reservation.nr_extents = nr_pages;
339 rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
340 if (rc <= 0)
341 return BP_EAGAIN;
342
343 for (i = 0; i < rc; i++) {
344 page = balloon_retrieve(false);
345 BUG_ON(page == NULL);
346
347 pfn = page_to_pfn(page);
348 BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
349 phys_to_machine_mapping_valid(pfn));
350
351 set_phys_to_machine(pfn, frame_list[i]);
352
353#ifdef CONFIG_XEN_HAVE_PVMMU
354
355 if (xen_pv_domain() && !PageHighMem(page)) {
356 int ret;
357 ret = HYPERVISOR_update_va_mapping(
358 (unsigned long)__va(pfn << PAGE_SHIFT),
359 mfn_pte(frame_list[i], PAGE_KERNEL),
360 0);
361 BUG_ON(ret);
362 }
363#endif
364
365
366 __free_reserved_page(page);
367 }
368
369 balloon_stats.current_pages += rc;
370
371 return BP_DONE;
372}
373
374static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
375{
376 enum bp_state state = BP_DONE;
377 unsigned long pfn, i;
378 struct page *page;
379 int ret;
380 struct xen_memory_reservation reservation = {
381 .address_bits = 0,
382 .extent_order = 0,
383 .domid = DOMID_SELF
384 };
385
386#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
387 if (balloon_stats.hotplug_pages) {
388 nr_pages = min(nr_pages, balloon_stats.hotplug_pages);
389 balloon_stats.hotplug_pages -= nr_pages;
390 balloon_stats.balloon_hotplug += nr_pages;
391 return BP_DONE;
392 }
393#endif
394
395 if (nr_pages > ARRAY_SIZE(frame_list))
396 nr_pages = ARRAY_SIZE(frame_list);
397
398 for (i = 0; i < nr_pages; i++) {
399 page = alloc_page(gfp);
400 if (page == NULL) {
401 nr_pages = i;
402 state = BP_EAGAIN;
403 break;
404 }
405
406 pfn = page_to_pfn(page);
407 frame_list[i] = pfn_to_mfn(pfn);
408
409 scrub_page(page);
410
411#ifdef CONFIG_XEN_HAVE_PVMMU
412 if (xen_pv_domain() && !PageHighMem(page)) {
413 ret = HYPERVISOR_update_va_mapping(
414 (unsigned long)__va(pfn << PAGE_SHIFT),
415 __pte_ma(0), 0);
416 BUG_ON(ret);
417 }
418#endif
419 }
420
421
422 kmap_flush_unused();
423 flush_tlb_all();
424
425
426 for (i = 0; i < nr_pages; i++) {
427 pfn = mfn_to_pfn(frame_list[i]);
428 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
429 balloon_append(pfn_to_page(pfn));
430 }
431
432 set_xen_guest_handle(reservation.extent_start, frame_list);
433 reservation.nr_extents = nr_pages;
434 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
435 BUG_ON(ret != nr_pages);
436
437 balloon_stats.current_pages -= nr_pages;
438
439 return state;
440}
441
442
443
444
445
446
447
448static void balloon_process(struct work_struct *work)
449{
450 enum bp_state state = BP_DONE;
451 long credit;
452
453 mutex_lock(&balloon_mutex);
454
455 do {
456 credit = current_credit();
457
458 if (credit > 0) {
459 if (balloon_is_inflated())
460 state = increase_reservation(credit);
461 else
462 state = reserve_additional_memory(credit);
463 }
464
465 if (credit < 0)
466 state = decrease_reservation(-credit, GFP_BALLOON);
467
468 state = update_schedule(state);
469
470#ifndef CONFIG_PREEMPT
471 if (need_resched())
472 schedule();
473#endif
474 } while (credit && state == BP_DONE);
475
476
477 if (state == BP_EAGAIN)
478 schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ);
479
480 mutex_unlock(&balloon_mutex);
481}
482
483
484void balloon_set_new_target(unsigned long target)
485{
486
487 balloon_stats.target_pages = target;
488 schedule_delayed_work(&balloon_worker, 0);
489}
490EXPORT_SYMBOL_GPL(balloon_set_new_target);
491
492
493
494
495
496
497
498
499int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)
500{
501 int pgno = 0;
502 struct page *page;
503 mutex_lock(&balloon_mutex);
504 while (pgno < nr_pages) {
505 page = balloon_retrieve(highmem);
506 if (page && (highmem || !PageHighMem(page))) {
507 pages[pgno++] = page;
508 } else {
509 enum bp_state st;
510 if (page)
511 balloon_append(page);
512 st = decrease_reservation(nr_pages - pgno,
513 highmem ? GFP_HIGHUSER : GFP_USER);
514 if (st != BP_DONE)
515 goto out_undo;
516 }
517 }
518 mutex_unlock(&balloon_mutex);
519 return 0;
520 out_undo:
521 while (pgno)
522 balloon_append(pages[--pgno]);
523
524 schedule_delayed_work(&balloon_worker, 0);
525 mutex_unlock(&balloon_mutex);
526 return -ENOMEM;
527}
528EXPORT_SYMBOL(alloc_xenballooned_pages);
529
530
531
532
533
534
535void free_xenballooned_pages(int nr_pages, struct page **pages)
536{
537 int i;
538
539 mutex_lock(&balloon_mutex);
540
541 for (i = 0; i < nr_pages; i++) {
542 if (pages[i])
543 balloon_append(pages[i]);
544 }
545
546
547 if (current_credit())
548 schedule_delayed_work(&balloon_worker, 0);
549
550 mutex_unlock(&balloon_mutex);
551}
552EXPORT_SYMBOL(free_xenballooned_pages);
553
554static void __init balloon_add_region(unsigned long start_pfn,
555 unsigned long pages)
556{
557 unsigned long pfn, extra_pfn_end;
558 struct page *page;
559
560
561
562
563
564
565 extra_pfn_end = min(max_pfn, start_pfn + pages);
566
567 for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) {
568 page = pfn_to_page(pfn);
569
570
571
572 __balloon_append(page);
573 }
574}
575
576static int __init balloon_init(void)
577{
578 int i;
579
580 if (!xen_domain())
581 return -ENODEV;
582
583 pr_info("Initialising balloon driver\n");
584
585 balloon_stats.current_pages = xen_pv_domain()
586 ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
587 : get_num_physpages();
588 balloon_stats.target_pages = balloon_stats.current_pages;
589 balloon_stats.balloon_low = 0;
590 balloon_stats.balloon_high = 0;
591
592 balloon_stats.schedule_delay = 1;
593 balloon_stats.max_schedule_delay = 32;
594 balloon_stats.retry_count = 1;
595 balloon_stats.max_retry_count = RETRY_UNLIMITED;
596
597#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
598 balloon_stats.hotplug_pages = 0;
599 balloon_stats.balloon_hotplug = 0;
600
601 set_online_page_callback(&xen_online_page);
602 register_memory_notifier(&xen_memory_nb);
603#endif
604
605
606
607
608
609 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++)
610 if (xen_extra_mem[i].size)
611 balloon_add_region(PFN_UP(xen_extra_mem[i].start),
612 PFN_DOWN(xen_extra_mem[i].size));
613
614
615 xen_balloon_init();
616
617 return 0;
618}
619
620subsys_initcall(balloon_init);
621
622MODULE_LICENSE("GPL");
623