1
2
3
4
5
6
7
8
9
10
11
12
13#include "qemu/osdep.h"
14#include "qemu/bitops.h"
15#include "qemu/error-report.h"
16#include "qemu/userfaultfd.h"
17#include "trace.h"
18#include <poll.h>
19#include <sys/syscall.h>
20#include <sys/ioctl.h>
21#include <fcntl.h>
22
23typedef enum {
24 UFFD_UNINITIALIZED = 0,
25 UFFD_USE_DEV_PATH,
26 UFFD_USE_SYSCALL,
27} uffd_open_mode;
28
29int uffd_open(int flags)
30{
31#if defined(__NR_userfaultfd)
32 static uffd_open_mode open_mode;
33 static int uffd_dev;
34
35
36 if (open_mode == UFFD_UNINITIALIZED) {
37
38
39
40
41
42 uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
43 if (uffd_dev >= 0) {
44 open_mode = UFFD_USE_DEV_PATH;
45 } else {
46
47 open_mode = UFFD_USE_SYSCALL;
48 }
49 trace_uffd_detect_open_mode(open_mode);
50 }
51
52 if (open_mode == UFFD_USE_DEV_PATH) {
53 assert(uffd_dev >= 0);
54 return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags);
55 }
56
57 return syscall(__NR_userfaultfd, flags);
58#else
59 return -EINVAL;
60#endif
61}
62
63
64
65
66
67
68
69
70int uffd_query_features(uint64_t *features)
71{
72 int uffd_fd;
73 struct uffdio_api api_struct = { 0 };
74 int ret = -1;
75
76 uffd_fd = uffd_open(O_CLOEXEC);
77 if (uffd_fd < 0) {
78 trace_uffd_query_features_nosys(errno);
79 return -1;
80 }
81
82 api_struct.api = UFFD_API;
83 api_struct.features = 0;
84
85 if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
86 trace_uffd_query_features_api_failed(errno);
87 goto out;
88 }
89 *features = api_struct.features;
90 ret = 0;
91
92out:
93 close(uffd_fd);
94 return ret;
95}
96
97
98
99
100
101
102
103
104
105int uffd_create_fd(uint64_t features, bool non_blocking)
106{
107 int uffd_fd;
108 int flags;
109 struct uffdio_api api_struct = { 0 };
110 uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER);
111
112 flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0);
113 uffd_fd = uffd_open(flags);
114 if (uffd_fd < 0) {
115 trace_uffd_create_fd_nosys(errno);
116 return -1;
117 }
118
119 api_struct.api = UFFD_API;
120 api_struct.features = features;
121 if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
122 trace_uffd_create_fd_api_failed(errno);
123 goto fail;
124 }
125 if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
126 trace_uffd_create_fd_api_noioctl(ioctl_mask, api_struct.ioctls);
127 goto fail;
128 }
129
130 return uffd_fd;
131
132fail:
133 close(uffd_fd);
134 return -1;
135}
136
137
138
139
140
141
142void uffd_close_fd(int uffd_fd)
143{
144 assert(uffd_fd >= 0);
145 close(uffd_fd);
146}
147
148
149
150
151
152
153
154
155
156
157
158
159int uffd_register_memory(int uffd_fd, void *addr, uint64_t length,
160 uint64_t mode, uint64_t *ioctls)
161{
162 struct uffdio_register uffd_register;
163
164 uffd_register.range.start = (uintptr_t) addr;
165 uffd_register.range.len = length;
166 uffd_register.mode = mode;
167
168 if (ioctl(uffd_fd, UFFDIO_REGISTER, &uffd_register)) {
169 trace_uffd_register_memory_failed(addr, length, mode, errno);
170 return -1;
171 }
172 if (ioctls) {
173 *ioctls = uffd_register.ioctls;
174 }
175
176 return 0;
177}
178
179
180
181
182
183
184
185
186
187
188int uffd_unregister_memory(int uffd_fd, void *addr, uint64_t length)
189{
190 struct uffdio_range uffd_range;
191
192 uffd_range.start = (uintptr_t) addr;
193 uffd_range.len = length;
194
195 if (ioctl(uffd_fd, UFFDIO_UNREGISTER, &uffd_range)) {
196 trace_uffd_unregister_memory_failed(addr, length, errno);
197 return -1;
198 }
199
200 return 0;
201}
202
203
204
205
206
207
208
209
210
211
212
213
214int uffd_change_protection(int uffd_fd, void *addr, uint64_t length,
215 bool wp, bool dont_wake)
216{
217 struct uffdio_writeprotect uffd_writeprotect;
218
219 uffd_writeprotect.range.start = (uintptr_t) addr;
220 uffd_writeprotect.range.len = length;
221 if (!wp && dont_wake) {
222
223 uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
224 } else {
225 uffd_writeprotect.mode = (wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0);
226 }
227
228 if (ioctl(uffd_fd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
229 error_report("uffd_change_protection() failed: addr=%p len=%" PRIu64
230 " mode=%" PRIx64 " errno=%i", addr, length,
231 (uint64_t) uffd_writeprotect.mode, errno);
232 return -1;
233 }
234
235 return 0;
236}
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr,
253 uint64_t length, bool dont_wake)
254{
255 struct uffdio_copy uffd_copy;
256
257 uffd_copy.dst = (uintptr_t) dst_addr;
258 uffd_copy.src = (uintptr_t) src_addr;
259 uffd_copy.len = length;
260 uffd_copy.mode = dont_wake ? UFFDIO_COPY_MODE_DONTWAKE : 0;
261
262 if (ioctl(uffd_fd, UFFDIO_COPY, &uffd_copy)) {
263 error_report("uffd_copy_page() failed: dst_addr=%p src_addr=%p length=%" PRIu64
264 " mode=%" PRIx64 " errno=%i", dst_addr, src_addr,
265 length, (uint64_t) uffd_copy.mode, errno);
266 return -1;
267 }
268
269 return 0;
270}
271
272
273
274
275
276
277
278
279
280
281
282
283
284int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake)
285{
286 struct uffdio_zeropage uffd_zeropage;
287
288 uffd_zeropage.range.start = (uintptr_t) addr;
289 uffd_zeropage.range.len = length;
290 uffd_zeropage.mode = dont_wake ? UFFDIO_ZEROPAGE_MODE_DONTWAKE : 0;
291
292 if (ioctl(uffd_fd, UFFDIO_ZEROPAGE, &uffd_zeropage)) {
293 error_report("uffd_zero_page() failed: addr=%p length=%" PRIu64
294 " mode=%" PRIx64 " errno=%i", addr, length,
295 (uint64_t) uffd_zeropage.mode, errno);
296 return -1;
297 }
298
299 return 0;
300}
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316int uffd_wakeup(int uffd_fd, void *addr, uint64_t length)
317{
318 struct uffdio_range uffd_range;
319
320 uffd_range.start = (uintptr_t) addr;
321 uffd_range.len = length;
322
323 if (ioctl(uffd_fd, UFFDIO_WAKE, &uffd_range)) {
324 error_report("uffd_wakeup() failed: addr=%p length=%" PRIu64 " errno=%i",
325 addr, length, errno);
326 return -1;
327 }
328
329 return 0;
330}
331
332
333
334
335
336
337
338
339
340
341
342int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count)
343{
344 ssize_t res;
345 do {
346 res = read(uffd_fd, msgs, count * sizeof(struct uffd_msg));
347 } while (res < 0 && errno == EINTR);
348
349 if ((res < 0 && errno == EAGAIN)) {
350 return 0;
351 }
352 if (res < 0) {
353 error_report("uffd_read_events() failed: errno=%i", errno);
354 return -1;
355 }
356
357 return (int) (res / sizeof(struct uffd_msg));
358}
359
360
361
362
363
364
365
366
367
368bool uffd_poll_events(int uffd_fd, int tmo)
369{
370 int res;
371 struct pollfd poll_fd = { .fd = uffd_fd, .events = POLLIN, .revents = 0 };
372
373 do {
374 res = poll(&poll_fd, 1, tmo);
375 } while (res < 0 && errno == EINTR);
376
377 if (res == 0) {
378 return false;
379 }
380 if (res < 0) {
381 error_report("uffd_poll_events() failed: errno=%i", errno);
382 return false;
383 }
384
385 return (poll_fd.revents & POLLIN) != 0;
386}
387