1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26#include "qemu/osdep.h"
27#include "qapi/error.h"
28#include "block/aio.h"
29#include "block/thread-pool.h"
30#include "block/graph-lock.h"
31#include "qemu/main-loop.h"
32#include "qemu/atomic.h"
33#include "qemu/rcu_queue.h"
34#include "block/raw-aio.h"
35#include "qemu/coroutine_int.h"
36#include "qemu/coroutine-tls.h"
37#include "sysemu/cpu-timers.h"
38#include "trace.h"
39
40
41
42
43
44enum {
45
46 BH_PENDING = (1 << 0),
47
48
49 BH_SCHEDULED = (1 << 1),
50
51
52 BH_DELETED = (1 << 2),
53
54
55 BH_ONESHOT = (1 << 3),
56
57
58 BH_IDLE = (1 << 4),
59};
60
61struct QEMUBH {
62 AioContext *ctx;
63 const char *name;
64 QEMUBHFunc *cb;
65 void *opaque;
66 QSLIST_ENTRY(QEMUBH) next;
67 unsigned flags;
68 MemReentrancyGuard *reentrancy_guard;
69};
70
71
72static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
73{
74 AioContext *ctx = bh->ctx;
75 unsigned old_flags;
76
77
78
79
80
81 old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
82
83 if (!(old_flags & BH_PENDING)) {
84
85
86
87
88
89
90
91
92
93 QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
94 }
95
96 aio_notify(ctx);
97
98
99
100
101
102
103 icount_notify_exit();
104}
105
106
107static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
108{
109 QEMUBH *bh = QSLIST_FIRST_RCU(head);
110
111 if (!bh) {
112 return NULL;
113 }
114
115 QSLIST_REMOVE_HEAD(head, next);
116
117
118
119
120
121 *flags = qatomic_fetch_and(&bh->flags,
122 ~(BH_PENDING | BH_SCHEDULED | BH_IDLE));
123 return bh;
124}
125
126void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
127 void *opaque, const char *name)
128{
129 QEMUBH *bh;
130 bh = g_new(QEMUBH, 1);
131 *bh = (QEMUBH){
132 .ctx = ctx,
133 .cb = cb,
134 .opaque = opaque,
135 .name = name,
136 };
137 aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT);
138}
139
140QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
141 const char *name, MemReentrancyGuard *reentrancy_guard)
142{
143 QEMUBH *bh;
144 bh = g_new(QEMUBH, 1);
145 *bh = (QEMUBH){
146 .ctx = ctx,
147 .cb = cb,
148 .opaque = opaque,
149 .name = name,
150 .reentrancy_guard = reentrancy_guard,
151 };
152 return bh;
153}
154
155void aio_bh_call(QEMUBH *bh)
156{
157 bool last_engaged_in_io = false;
158
159
160 MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard;
161 if (reentrancy_guard) {
162 last_engaged_in_io = reentrancy_guard->engaged_in_io;
163 if (reentrancy_guard->engaged_in_io) {
164 trace_reentrant_aio(bh->ctx, bh->name);
165 }
166 reentrancy_guard->engaged_in_io = true;
167 }
168
169 bh->cb(bh->opaque);
170
171 if (reentrancy_guard) {
172 reentrancy_guard->engaged_in_io = last_engaged_in_io;
173 }
174}
175
176
177int aio_bh_poll(AioContext *ctx)
178{
179 BHListSlice slice;
180 BHListSlice *s;
181 int ret = 0;
182
183
184 QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
185
186
187
188
189
190
191#if !defined(__clang__)
192#pragma GCC diagnostic push
193#pragma GCC diagnostic ignored "-Wpragmas"
194#pragma GCC diagnostic ignored "-Wdangling-pointer="
195#endif
196 QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
197#if !defined(__clang__)
198#pragma GCC diagnostic pop
199#endif
200
201 while ((s = QSIMPLEQ_FIRST(&ctx->bh_slice_list))) {
202 QEMUBH *bh;
203 unsigned flags;
204
205 bh = aio_bh_dequeue(&s->bh_list, &flags);
206 if (!bh) {
207 QSIMPLEQ_REMOVE_HEAD(&ctx->bh_slice_list, next);
208 continue;
209 }
210
211 if ((flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
212
213 if (!(flags & BH_IDLE)) {
214 ret = 1;
215 }
216 aio_bh_call(bh);
217 }
218 if (flags & (BH_DELETED | BH_ONESHOT)) {
219 g_free(bh);
220 }
221 }
222
223 return ret;
224}
225
226void qemu_bh_schedule_idle(QEMUBH *bh)
227{
228 aio_bh_enqueue(bh, BH_SCHEDULED | BH_IDLE);
229}
230
231void qemu_bh_schedule(QEMUBH *bh)
232{
233 aio_bh_enqueue(bh, BH_SCHEDULED);
234}
235
236
237
238void qemu_bh_cancel(QEMUBH *bh)
239{
240 qatomic_and(&bh->flags, ~BH_SCHEDULED);
241}
242
243
244
245
246void qemu_bh_delete(QEMUBH *bh)
247{
248 aio_bh_enqueue(bh, BH_DELETED);
249}
250
251static int64_t aio_compute_bh_timeout(BHList *head, int timeout)
252{
253 QEMUBH *bh;
254
255 QSLIST_FOREACH_RCU(bh, head, next) {
256 if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
257 if (bh->flags & BH_IDLE) {
258
259
260 timeout = 10000000;
261 } else {
262
263
264 return 0;
265 }
266 }
267 }
268
269 return timeout;
270}
271
272int64_t
273aio_compute_timeout(AioContext *ctx)
274{
275 BHListSlice *s;
276 int64_t deadline;
277 int timeout = -1;
278
279 timeout = aio_compute_bh_timeout(&ctx->bh_list, timeout);
280 if (timeout == 0) {
281 return 0;
282 }
283
284 QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
285 timeout = aio_compute_bh_timeout(&s->bh_list, timeout);
286 if (timeout == 0) {
287 return 0;
288 }
289 }
290
291 deadline = timerlistgroup_deadline_ns(&ctx->tlg);
292 if (deadline == 0) {
293 return 0;
294 } else {
295 return qemu_soonest_timeout(timeout, deadline);
296 }
297}
298
299static gboolean
300aio_ctx_prepare(GSource *source, gint *timeout)
301{
302 AioContext *ctx = (AioContext *) source;
303
304 qatomic_set(&ctx->notify_me, qatomic_read(&ctx->notify_me) | 1);
305
306
307
308
309
310
311 smp_mb();
312
313
314 *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
315
316 if (aio_prepare(ctx)) {
317 *timeout = 0;
318 }
319
320 return *timeout == 0;
321}
322
323static gboolean
324aio_ctx_check(GSource *source)
325{
326 AioContext *ctx = (AioContext *) source;
327 QEMUBH *bh;
328 BHListSlice *s;
329
330
331 qatomic_store_release(&ctx->notify_me, qatomic_read(&ctx->notify_me) & ~1);
332 aio_notify_accept(ctx);
333
334 QSLIST_FOREACH_RCU(bh, &ctx->bh_list, next) {
335 if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
336 return true;
337 }
338 }
339
340 QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
341 QSLIST_FOREACH_RCU(bh, &s->bh_list, next) {
342 if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
343 return true;
344 }
345 }
346 }
347 return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0);
348}
349
350static gboolean
351aio_ctx_dispatch(GSource *source,
352 GSourceFunc callback,
353 gpointer user_data)
354{
355 AioContext *ctx = (AioContext *) source;
356
357 assert(callback == NULL);
358 aio_dispatch(ctx);
359 return true;
360}
361
362static void
363aio_ctx_finalize(GSource *source)
364{
365 AioContext *ctx = (AioContext *) source;
366 QEMUBH *bh;
367 unsigned flags;
368
369 thread_pool_free(ctx->thread_pool);
370
371#ifdef CONFIG_LINUX_AIO
372 if (ctx->linux_aio) {
373 laio_detach_aio_context(ctx->linux_aio, ctx);
374 laio_cleanup(ctx->linux_aio);
375 ctx->linux_aio = NULL;
376 }
377#endif
378
379#ifdef CONFIG_LINUX_IO_URING
380 if (ctx->linux_io_uring) {
381 luring_detach_aio_context(ctx->linux_io_uring, ctx);
382 luring_cleanup(ctx->linux_io_uring);
383 ctx->linux_io_uring = NULL;
384 }
385#endif
386
387 assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
388 qemu_bh_delete(ctx->co_schedule_bh);
389
390
391 assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list));
392
393 while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) {
394
395
396
397
398
399
400
401
402
403 if (unlikely(!(flags & BH_DELETED))) {
404 fprintf(stderr, "%s: BH '%s' leaked, aborting...\n",
405 __func__, bh->name);
406 abort();
407 }
408
409 g_free(bh);
410 }
411
412 aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL, NULL);
413 event_notifier_cleanup(&ctx->notifier);
414 qemu_rec_mutex_destroy(&ctx->lock);
415 qemu_lockcnt_destroy(&ctx->list_lock);
416 timerlistgroup_deinit(&ctx->tlg);
417 unregister_aiocontext(ctx);
418 aio_context_destroy(ctx);
419}
420
421static GSourceFuncs aio_source_funcs = {
422 aio_ctx_prepare,
423 aio_ctx_check,
424 aio_ctx_dispatch,
425 aio_ctx_finalize
426};
427
428GSource *aio_get_g_source(AioContext *ctx)
429{
430 aio_context_use_g_source(ctx);
431 g_source_ref(&ctx->source);
432 return &ctx->source;
433}
434
435ThreadPool *aio_get_thread_pool(AioContext *ctx)
436{
437 if (!ctx->thread_pool) {
438 ctx->thread_pool = thread_pool_new(ctx);
439 }
440 return ctx->thread_pool;
441}
442
443#ifdef CONFIG_LINUX_AIO
444LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp)
445{
446 if (!ctx->linux_aio) {
447 ctx->linux_aio = laio_init(errp);
448 if (ctx->linux_aio) {
449 laio_attach_aio_context(ctx->linux_aio, ctx);
450 }
451 }
452 return ctx->linux_aio;
453}
454
455LinuxAioState *aio_get_linux_aio(AioContext *ctx)
456{
457 assert(ctx->linux_aio);
458 return ctx->linux_aio;
459}
460#endif
461
462#ifdef CONFIG_LINUX_IO_URING
463LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp)
464{
465 if (ctx->linux_io_uring) {
466 return ctx->linux_io_uring;
467 }
468
469 ctx->linux_io_uring = luring_init(errp);
470 if (!ctx->linux_io_uring) {
471 return NULL;
472 }
473
474 luring_attach_aio_context(ctx->linux_io_uring, ctx);
475 return ctx->linux_io_uring;
476}
477
478LuringState *aio_get_linux_io_uring(AioContext *ctx)
479{
480 assert(ctx->linux_io_uring);
481 return ctx->linux_io_uring;
482}
483#endif
484
485void aio_notify(AioContext *ctx)
486{
487
488
489
490
491 smp_wmb();
492 qatomic_set(&ctx->notified, true);
493
494
495
496
497
498 smp_mb();
499 if (qatomic_read(&ctx->notify_me)) {
500 event_notifier_set(&ctx->notifier);
501 }
502}
503
504void aio_notify_accept(AioContext *ctx)
505{
506 qatomic_set(&ctx->notified, false);
507
508
509
510
511
512
513 smp_mb();
514}
515
516static void aio_timerlist_notify(void *opaque, QEMUClockType type)
517{
518 aio_notify(opaque);
519}
520
521static void aio_context_notifier_cb(EventNotifier *e)
522{
523 AioContext *ctx = container_of(e, AioContext, notifier);
524
525 event_notifier_test_and_clear(&ctx->notifier);
526}
527
528
529static bool aio_context_notifier_poll(void *opaque)
530{
531 EventNotifier *e = opaque;
532 AioContext *ctx = container_of(e, AioContext, notifier);
533
534
535
536
537
538
539 return qatomic_read(&ctx->notified);
540}
541
542static void aio_context_notifier_poll_ready(EventNotifier *e)
543{
544
545}
546
547static void co_schedule_bh_cb(void *opaque)
548{
549 AioContext *ctx = opaque;
550 QSLIST_HEAD(, Coroutine) straight, reversed;
551
552 QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines);
553 QSLIST_INIT(&straight);
554
555 while (!QSLIST_EMPTY(&reversed)) {
556 Coroutine *co = QSLIST_FIRST(&reversed);
557 QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next);
558 QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next);
559 }
560
561 while (!QSLIST_EMPTY(&straight)) {
562 Coroutine *co = QSLIST_FIRST(&straight);
563 QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
564 trace_aio_co_schedule_bh_cb(ctx, co);
565 aio_context_acquire(ctx);
566
567
568 qatomic_set(&co->scheduled, NULL);
569 qemu_aio_coroutine_enter(ctx, co);
570 aio_context_release(ctx);
571 }
572}
573
574AioContext *aio_context_new(Error **errp)
575{
576 int ret;
577 AioContext *ctx;
578
579 ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
580 QSLIST_INIT(&ctx->bh_list);
581 QSIMPLEQ_INIT(&ctx->bh_slice_list);
582 aio_context_setup(ctx);
583
584 ret = event_notifier_init(&ctx->notifier, false);
585 if (ret < 0) {
586 error_setg_errno(errp, -ret, "Failed to initialize event notifier");
587 goto fail;
588 }
589 g_source_set_can_recurse(&ctx->source, true);
590 qemu_lockcnt_init(&ctx->list_lock);
591
592 ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx);
593 QSLIST_INIT(&ctx->scheduled_coroutines);
594
595 aio_set_event_notifier(ctx, &ctx->notifier,
596 aio_context_notifier_cb,
597 aio_context_notifier_poll,
598 aio_context_notifier_poll_ready);
599#ifdef CONFIG_LINUX_AIO
600 ctx->linux_aio = NULL;
601#endif
602
603#ifdef CONFIG_LINUX_IO_URING
604 ctx->linux_io_uring = NULL;
605#endif
606
607 ctx->thread_pool = NULL;
608 qemu_rec_mutex_init(&ctx->lock);
609 timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
610
611 ctx->poll_ns = 0;
612 ctx->poll_max_ns = 0;
613 ctx->poll_grow = 0;
614 ctx->poll_shrink = 0;
615
616 ctx->aio_max_batch = 0;
617
618 ctx->thread_pool_min = 0;
619 ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT;
620
621 register_aiocontext(ctx);
622
623 return ctx;
624fail:
625 g_source_destroy(&ctx->source);
626 return NULL;
627}
628
629void aio_co_schedule(AioContext *ctx, Coroutine *co)
630{
631 trace_aio_co_schedule(ctx, co);
632 const char *scheduled = qatomic_cmpxchg(&co->scheduled, NULL,
633 __func__);
634
635 if (scheduled) {
636 fprintf(stderr,
637 "%s: Co-routine was already scheduled in '%s'\n",
638 __func__, scheduled);
639 abort();
640 }
641
642
643
644
645
646 aio_context_ref(ctx);
647
648 QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
649 co, co_scheduled_next);
650 qemu_bh_schedule(ctx->co_schedule_bh);
651
652 aio_context_unref(ctx);
653}
654
655typedef struct AioCoRescheduleSelf {
656 Coroutine *co;
657 AioContext *new_ctx;
658} AioCoRescheduleSelf;
659
660static void aio_co_reschedule_self_bh(void *opaque)
661{
662 AioCoRescheduleSelf *data = opaque;
663 aio_co_schedule(data->new_ctx, data->co);
664}
665
666void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx)
667{
668 AioContext *old_ctx = qemu_get_current_aio_context();
669
670 if (old_ctx != new_ctx) {
671 AioCoRescheduleSelf data = {
672 .co = qemu_coroutine_self(),
673 .new_ctx = new_ctx,
674 };
675
676
677
678
679
680 aio_bh_schedule_oneshot(old_ctx, aio_co_reschedule_self_bh, &data);
681 qemu_coroutine_yield();
682 }
683}
684
685void aio_co_wake(Coroutine *co)
686{
687 AioContext *ctx;
688
689
690
691
692 smp_read_barrier_depends();
693 ctx = qatomic_read(&co->ctx);
694
695 aio_co_enter(ctx, co);
696}
697
698void aio_co_enter(AioContext *ctx, Coroutine *co)
699{
700 if (ctx != qemu_get_current_aio_context()) {
701 aio_co_schedule(ctx, co);
702 return;
703 }
704
705 if (qemu_in_coroutine()) {
706 Coroutine *self = qemu_coroutine_self();
707 assert(self != co);
708 QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next);
709 } else {
710 aio_context_acquire(ctx);
711 qemu_aio_coroutine_enter(ctx, co);
712 aio_context_release(ctx);
713 }
714}
715
716void aio_context_ref(AioContext *ctx)
717{
718 g_source_ref(&ctx->source);
719}
720
721void aio_context_unref(AioContext *ctx)
722{
723 g_source_unref(&ctx->source);
724}
725
726void aio_context_acquire(AioContext *ctx)
727{
728 qemu_rec_mutex_lock(&ctx->lock);
729}
730
731void aio_context_release(AioContext *ctx)
732{
733 qemu_rec_mutex_unlock(&ctx->lock);
734}
735
736QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
737
738AioContext *qemu_get_current_aio_context(void)
739{
740 AioContext *ctx = get_my_aiocontext();
741 if (ctx) {
742 return ctx;
743 }
744 if (qemu_mutex_iothread_locked()) {
745
746 return qemu_get_aio_context();
747 }
748 return NULL;
749}
750
751void qemu_set_current_aio_context(AioContext *ctx)
752{
753 assert(!get_my_aiocontext());
754 set_my_aiocontext(ctx);
755}
756
757void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min,
758 int64_t max, Error **errp)
759{
760
761 if (min > max || !max || min > INT_MAX || max > INT_MAX) {
762 error_setg(errp, "bad thread-pool-min/thread-pool-max values");
763 return;
764 }
765
766 ctx->thread_pool_min = min;
767 ctx->thread_pool_max = max;
768
769 if (ctx->thread_pool) {
770 thread_pool_update_params(ctx->thread_pool, ctx);
771 }
772}
773