1
2
3
4
5
6
7
8
9
10#define _GNU_SOURCE
11
12#include <stdio.h>
13#include <stdlib.h>
14#include <time.h>
15#include <poll.h>
16#include <pthread.h>
17#include <linux/userfaultfd.h>
18#include <sys/syscall.h>
19
20#include "kvm_util.h"
21#include "test_util.h"
22#include "perf_test_util.h"
23#include "guest_modes.h"
24
25#ifdef __NR_userfaultfd
26
27#ifdef PRINT_PER_PAGE_UPDATES
28#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
29#else
30#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
31#endif
32
33#ifdef PRINT_PER_VCPU_UPDATES
34#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
35#else
36#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
37#endif
38
39static int nr_vcpus = 1;
40static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
41static char *guest_data_prototype;
42
43static void *vcpu_worker(void *data)
44{
45 int ret;
46 struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
47 int vcpu_id = vcpu_args->vcpu_id;
48 struct kvm_vm *vm = perf_test_args.vm;
49 struct kvm_run *run;
50 struct timespec start;
51 struct timespec ts_diff;
52
53 vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
54 run = vcpu_state(vm, vcpu_id);
55
56 clock_gettime(CLOCK_MONOTONIC, &start);
57
58
59 ret = _vcpu_run(vm, vcpu_id);
60 TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
61 if (get_ucall(vm, vcpu_id, NULL) != UCALL_SYNC) {
62 TEST_ASSERT(false,
63 "Invalid guest sync status: exit_reason=%s\n",
64 exit_reason_str(run->exit_reason));
65 }
66
67 ts_diff = timespec_elapsed(start);
68 PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
69 ts_diff.tv_sec, ts_diff.tv_nsec);
70
71 return NULL;
72}
73
74static int handle_uffd_page_request(int uffd, uint64_t addr)
75{
76 pid_t tid;
77 struct timespec start;
78 struct timespec ts_diff;
79 struct uffdio_copy copy;
80 int r;
81
82 tid = syscall(__NR_gettid);
83
84 copy.src = (uint64_t)guest_data_prototype;
85 copy.dst = addr;
86 copy.len = perf_test_args.host_page_size;
87 copy.mode = 0;
88
89 clock_gettime(CLOCK_MONOTONIC, &start);
90
91 r = ioctl(uffd, UFFDIO_COPY, ©);
92 if (r == -1) {
93 pr_info("Failed Paged in 0x%lx from thread %d with errno: %d\n",
94 addr, tid, errno);
95 return r;
96 }
97
98 ts_diff = timespec_elapsed(start);
99
100 PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
101 timespec_to_ns(ts_diff));
102 PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
103 perf_test_args.host_page_size, addr, tid);
104
105 return 0;
106}
107
108bool quit_uffd_thread;
109
110struct uffd_handler_args {
111 int uffd;
112 int pipefd;
113 useconds_t delay;
114};
115
116static void *uffd_handler_thread_fn(void *arg)
117{
118 struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg;
119 int uffd = uffd_args->uffd;
120 int pipefd = uffd_args->pipefd;
121 useconds_t delay = uffd_args->delay;
122 int64_t pages = 0;
123 struct timespec start;
124 struct timespec ts_diff;
125
126 clock_gettime(CLOCK_MONOTONIC, &start);
127 while (!quit_uffd_thread) {
128 struct uffd_msg msg;
129 struct pollfd pollfd[2];
130 char tmp_chr;
131 int r;
132 uint64_t addr;
133
134 pollfd[0].fd = uffd;
135 pollfd[0].events = POLLIN;
136 pollfd[1].fd = pipefd;
137 pollfd[1].events = POLLIN;
138
139 r = poll(pollfd, 2, -1);
140 switch (r) {
141 case -1:
142 pr_info("poll err");
143 continue;
144 case 0:
145 continue;
146 case 1:
147 break;
148 default:
149 pr_info("Polling uffd returned %d", r);
150 return NULL;
151 }
152
153 if (pollfd[0].revents & POLLERR) {
154 pr_info("uffd revents has POLLERR");
155 return NULL;
156 }
157
158 if (pollfd[1].revents & POLLIN) {
159 r = read(pollfd[1].fd, &tmp_chr, 1);
160 TEST_ASSERT(r == 1,
161 "Error reading pipefd in UFFD thread\n");
162 return NULL;
163 }
164
165 if (!pollfd[0].revents & POLLIN)
166 continue;
167
168 r = read(uffd, &msg, sizeof(msg));
169 if (r == -1) {
170 if (errno == EAGAIN)
171 continue;
172 pr_info("Read of uffd gor errno %d", errno);
173 return NULL;
174 }
175
176 if (r != sizeof(msg)) {
177 pr_info("Read on uffd returned unexpected size: %d bytes", r);
178 return NULL;
179 }
180
181 if (!(msg.event & UFFD_EVENT_PAGEFAULT))
182 continue;
183
184 if (delay)
185 usleep(delay);
186 addr = msg.arg.pagefault.address;
187 r = handle_uffd_page_request(uffd, addr);
188 if (r < 0)
189 return NULL;
190 pages++;
191 }
192
193 ts_diff = timespec_elapsed(start);
194 PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
195 pages, ts_diff.tv_sec, ts_diff.tv_nsec,
196 pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
197
198 return NULL;
199}
200
201static int setup_demand_paging(struct kvm_vm *vm,
202 pthread_t *uffd_handler_thread, int pipefd,
203 useconds_t uffd_delay,
204 struct uffd_handler_args *uffd_args,
205 void *hva, uint64_t len)
206{
207 int uffd;
208 struct uffdio_api uffdio_api;
209 struct uffdio_register uffdio_register;
210
211 uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
212 if (uffd == -1) {
213 pr_info("uffd creation failed\n");
214 return -1;
215 }
216
217 uffdio_api.api = UFFD_API;
218 uffdio_api.features = 0;
219 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
220 pr_info("ioctl uffdio_api failed\n");
221 return -1;
222 }
223
224 uffdio_register.range.start = (uint64_t)hva;
225 uffdio_register.range.len = len;
226 uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
227 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
228 pr_info("ioctl uffdio_register failed\n");
229 return -1;
230 }
231
232 if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) !=
233 UFFD_API_RANGE_IOCTLS) {
234 pr_info("unexpected userfaultfd ioctl set\n");
235 return -1;
236 }
237
238 uffd_args->uffd = uffd;
239 uffd_args->pipefd = pipefd;
240 uffd_args->delay = uffd_delay;
241 pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn,
242 uffd_args);
243
244 PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
245 hva, hva + len);
246
247 return 0;
248}
249
250struct test_params {
251 bool use_uffd;
252 useconds_t uffd_delay;
253 bool partition_vcpu_memory_access;
254};
255
256static void run_test(enum vm_guest_mode mode, void *arg)
257{
258 struct test_params *p = arg;
259 pthread_t *vcpu_threads;
260 pthread_t *uffd_handler_threads = NULL;
261 struct uffd_handler_args *uffd_args = NULL;
262 struct timespec start;
263 struct timespec ts_diff;
264 int *pipefds = NULL;
265 struct kvm_vm *vm;
266 int vcpu_id;
267 int r;
268
269 vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
270 VM_MEM_SRC_ANONYMOUS);
271
272 perf_test_args.wr_fract = 1;
273
274 guest_data_prototype = malloc(perf_test_args.host_page_size);
275 TEST_ASSERT(guest_data_prototype,
276 "Failed to allocate buffer for guest data pattern");
277 memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size);
278
279 vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
280 TEST_ASSERT(vcpu_threads, "Memory allocation failed");
281
282 perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
283 p->partition_vcpu_memory_access);
284
285 if (p->use_uffd) {
286 uffd_handler_threads =
287 malloc(nr_vcpus * sizeof(*uffd_handler_threads));
288 TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
289
290 uffd_args = malloc(nr_vcpus * sizeof(*uffd_args));
291 TEST_ASSERT(uffd_args, "Memory allocation failed");
292
293 pipefds = malloc(sizeof(int) * nr_vcpus * 2);
294 TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
295
296 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
297 vm_paddr_t vcpu_gpa;
298 void *vcpu_hva;
299 uint64_t vcpu_mem_size;
300
301
302 if (p->partition_vcpu_memory_access) {
303 vcpu_gpa = guest_test_phys_mem +
304 (vcpu_id * guest_percpu_mem_size);
305 vcpu_mem_size = guest_percpu_mem_size;
306 } else {
307 vcpu_gpa = guest_test_phys_mem;
308 vcpu_mem_size = guest_percpu_mem_size * nr_vcpus;
309 }
310 PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
311 vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size);
312
313
314 vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
315
316
317
318
319
320 r = pipe2(&pipefds[vcpu_id * 2],
321 O_CLOEXEC | O_NONBLOCK);
322 TEST_ASSERT(!r, "Failed to set up pipefd");
323
324 r = setup_demand_paging(vm,
325 &uffd_handler_threads[vcpu_id],
326 pipefds[vcpu_id * 2],
327 p->uffd_delay, &uffd_args[vcpu_id],
328 vcpu_hva, vcpu_mem_size);
329 if (r < 0)
330 exit(-r);
331 }
332 }
333
334
335 sync_global_to_guest(vm, perf_test_args);
336
337 pr_info("Finished creating vCPUs and starting uffd threads\n");
338
339 clock_gettime(CLOCK_MONOTONIC, &start);
340
341 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
342 pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
343 &perf_test_args.vcpu_args[vcpu_id]);
344 }
345
346 pr_info("Started all vCPUs\n");
347
348
349 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
350 pthread_join(vcpu_threads[vcpu_id], NULL);
351 PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
352 }
353
354 ts_diff = timespec_elapsed(start);
355
356 pr_info("All vCPU threads joined\n");
357
358 if (p->use_uffd) {
359 char c;
360
361
362 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
363 r = write(pipefds[vcpu_id * 2 + 1], &c, 1);
364 TEST_ASSERT(r == 1, "Unable to write to pipefd");
365
366 pthread_join(uffd_handler_threads[vcpu_id], NULL);
367 }
368 }
369
370 pr_info("Total guest execution time: %ld.%.9lds\n",
371 ts_diff.tv_sec, ts_diff.tv_nsec);
372 pr_info("Overall demand paging rate: %f pgs/sec\n",
373 perf_test_args.vcpu_args[0].pages * nr_vcpus /
374 ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
375
376 perf_test_destroy_vm(vm);
377
378 free(guest_data_prototype);
379 free(vcpu_threads);
380 if (p->use_uffd) {
381 free(uffd_handler_threads);
382 free(uffd_args);
383 free(pipefds);
384 }
385}
386
387static void help(char *name)
388{
389 puts("");
390 printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n"
391 " [-b memory] [-v vcpus] [-o]\n", name);
392 guest_modes_help();
393 printf(" -u: use User Fault FD to handle vCPU page\n"
394 " faults.\n");
395 printf(" -d: add a delay in usec to the User Fault\n"
396 " FD handler to simulate demand paging\n"
397 " overheads. Ignored without -u.\n");
398 printf(" -b: specify the size of the memory region which should be\n"
399 " demand paged by each vCPU. e.g. 10M or 3G.\n"
400 " Default: 1G\n");
401 printf(" -v: specify the number of vCPUs to run.\n");
402 printf(" -o: Overlap guest memory accesses instead of partitioning\n"
403 " them into a separate region of memory for each vCPU.\n");
404 puts("");
405 exit(0);
406}
407
408int main(int argc, char *argv[])
409{
410 int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
411 struct test_params p = {
412 .partition_vcpu_memory_access = true,
413 };
414 int opt;
415
416 guest_modes_append_default();
417
418 while ((opt = getopt(argc, argv, "hm:ud:b:v:o")) != -1) {
419 switch (opt) {
420 case 'm':
421 guest_modes_cmdline(optarg);
422 break;
423 case 'u':
424 p.use_uffd = true;
425 break;
426 case 'd':
427 p.uffd_delay = strtoul(optarg, NULL, 0);
428 TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported.");
429 break;
430 case 'b':
431 guest_percpu_mem_size = parse_size(optarg);
432 break;
433 case 'v':
434 nr_vcpus = atoi(optarg);
435 TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
436 "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
437 break;
438 case 'o':
439 p.partition_vcpu_memory_access = false;
440 break;
441 case 'h':
442 default:
443 help(argv[0]);
444 break;
445 }
446 }
447
448 for_each_guest_mode(run_test, &p);
449
450 return 0;
451}
452
453#else
454
455#warning "missing __NR_userfaultfd definition"
456
457int main(void)
458{
459 print_skip("__NR_userfaultfd must be present for userfaultfd test");
460 return KSFT_SKIP;
461}
462
463#endif
464