From dd765099b7c289d532f665ffb81bf24f58dab78c Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Wed, 24 Jan 2024 13:23:34 +0100 Subject: [PATCH] [DONTMERGE] Fix crash on scsi unplug When scsi_device_for_each_req_async_bh() runs and calls blk_get_aio_context(), we may be in the middle of a bdrv_try_change_aio_context() in a different thread. Therefore, the BDS's and BB's AioContext may differ, which causes an assertion failure in blk_get_aio_context(). The fact that bdrv_try_change_aio_context() doesn't take the graph write lock and that blk_get_io_context() doesn't require/take a read lock seems problematic in general, and this patch doesn't fix that general problem. Instead, it just fixes the very specific interaction on scsi unplug to allow testing repeated hot-plugging/-unplugging without crashing. Signed-off-by: Hanna Czenczek --- block/block-backend.c | 2 ++ hw/scsi/scsi-bus.c | 21 +++++++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 209eb07528..28e21c5b72 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -2451,7 +2451,9 @@ int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, old_allow_change = blk->allow_aio_context_change; blk->allow_aio_context_change = true; + bdrv_graph_wrlock(); ret = bdrv_try_change_aio_context(bs, new_context, NULL, errp); + bdrv_graph_wrunlock(); blk->allow_aio_context_change = old_allow_change; diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index 0a2eb11c56..0a079c4ed7 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -111,7 +111,7 @@ typedef struct { void *fn_opaque; } SCSIDeviceForEachReqAsyncData; -static void scsi_device_for_each_req_async_bh(void *opaque) +static void coroutine_fn scsi_device_for_each_req_async_bh(void *opaque) { g_autofree SCSIDeviceForEachReqAsyncData *data = opaque; SCSIDevice *s = data->s; @@ -125,16 +125,21 @@ static void scsi_device_for_each_req_async_bh(void *opaque) * scsi_device_for_each_req_async() is called and then the AioContext is * changed before BHs are run. */ + bdrv_graph_co_rdlock(); ctx = blk_get_aio_context(s->conf.blk); if (ctx != qemu_get_current_aio_context()) { - aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, - g_steal_pointer(&data)); + bdrv_graph_co_rdunlock(); + Coroutine *co; + co = qemu_coroutine_create(scsi_device_for_each_req_async_bh, + g_steal_pointer(&data)); + aio_co_enter(ctx, co); return; } QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { data->fn(req, data->fn_opaque); } + bdrv_graph_co_rdunlock(); /* Drop the reference taken by scsi_device_for_each_req_async() */ object_unref(OBJECT(s)); @@ -150,6 +155,8 @@ static void scsi_device_for_each_req_async(SCSIDevice *s, { assert(qemu_in_main_thread()); + Coroutine *co; + AioContext *ctx; SCSIDeviceForEachReqAsyncData *data = g_new(SCSIDeviceForEachReqAsyncData, 1); @@ -163,9 +170,11 @@ static void scsi_device_for_each_req_async(SCSIDevice *s, */ object_ref(OBJECT(s)); - aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk), - scsi_device_for_each_req_async_bh, - data); + co = qemu_coroutine_create(scsi_device_for_each_req_async_bh, data); + bdrv_graph_rdlock_main_loop(); + ctx = blk_get_aio_context(s->conf.blk); + bdrv_graph_rdunlock_main_loop(); + aio_co_enter(ctx, co); } static void scsi_device_realize(SCSIDevice *s, Error **errp) -- 2.43.0