aboutsummaryrefslogtreecommitdiffstats
path: root/fs/io_uring.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r--fs/io_uring.c90
1 files changed, 68 insertions, 22 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index de9c9de90655..2e04f718319d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1192,12 +1192,6 @@ static inline bool req_ref_put_and_test(struct io_kiocb *req)
return atomic_dec_and_test(&req->refs);
}
-static inline void req_ref_put(struct io_kiocb *req)
-{
- WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
- WARN_ON_ONCE(req_ref_put_and_test(req));
-}
-
static inline void req_ref_get(struct io_kiocb *req)
{
WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
@@ -5468,12 +5462,14 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
static inline void io_poll_remove_entry(struct io_poll_iocb *poll)
{
- struct wait_queue_head *head = poll->head;
+ struct wait_queue_head *head = smp_load_acquire(&poll->head);
- spin_lock_irq(&head->lock);
- list_del_init(&poll->wait.entry);
- poll->head = NULL;
- spin_unlock_irq(&head->lock);
+ if (head) {
+ spin_lock_irq(&head->lock);
+ list_del_init(&poll->wait.entry);
+ poll->head = NULL;
+ spin_unlock_irq(&head->lock);
+ }
}
static void io_poll_remove_entries(struct io_kiocb *req)
@@ -5481,10 +5477,26 @@ static void io_poll_remove_entries(struct io_kiocb *req)
struct io_poll_iocb *poll = io_poll_get_single(req);
struct io_poll_iocb *poll_double = io_poll_get_double(req);
- if (poll->head)
- io_poll_remove_entry(poll);
- if (poll_double && poll_double->head)
+ /*
+ * While we hold the waitqueue lock and the waitqueue is nonempty,
+ * wake_up_pollfree() will wait for us. However, taking the waitqueue
+ * lock in the first place can race with the waitqueue being freed.
+ *
+ * We solve this as eventpoll does: by taking advantage of the fact that
+ * all users of wake_up_pollfree() will RCU-delay the actual free. If
+ * we enter rcu_read_lock() and see that the pointer to the queue is
+ * non-NULL, we can then lock it without the memory being freed out from
+ * under us.
+ *
+ * Keep holding rcu_read_lock() as long as we hold the queue lock, in
+ * case the caller deletes the entry from the queue, leaving it empty.
+ * In that case, only RCU prevents the queue memory from being freed.
+ */
+ rcu_read_lock();
+ io_poll_remove_entry(poll);
+ if (poll_double)
io_poll_remove_entry(poll_double);
+ rcu_read_unlock();
}
/*
@@ -5624,6 +5636,30 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
wait);
__poll_t mask = key_to_poll(key);
+ if (unlikely(mask & POLLFREE)) {
+ io_poll_mark_cancelled(req);
+ /* we have to kick tw in case it's not already */
+ io_poll_execute(req, 0);
+
+ /*
+ * If the waitqueue is being freed early but someone is already
+ * holds ownership over it, we have to tear down the request as
+ * best we can. That means immediately removing the request from
+ * its waitqueue and preventing all further accesses to the
+ * waitqueue via the request.
+ */
+ list_del_init(&poll->wait.entry);
+
+ /*
+ * Careful: this *must* be the last step, since as soon
+ * as req->head is NULL'ed out, the request can be
+ * completed and freed, since aio_poll_complete_work()
+ * will no longer need to take the waitqueue lock.
+ */
+ smp_store_release(&poll->head, NULL);
+ return 1;
+ }
+
/* for instances that support it check for an event match first */
if (mask && !(mask & poll->events))
return 0;
@@ -6350,16 +6386,21 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
- if (ret != -ENOENT)
- return ret;
+ /*
+ * Fall-through even for -EALREADY, as we may have poll armed
+ * that need unarming.
+ */
+ if (!ret)
+ return 0;
spin_lock(&ctx->completion_lock);
+ ret = io_poll_cancel(ctx, sqe_addr, false);
+ if (ret != -ENOENT)
+ goto out;
+
spin_lock_irq(&ctx->timeout_lock);
ret = io_timeout_cancel(ctx, sqe_addr);
spin_unlock_irq(&ctx->timeout_lock);
- if (ret != -ENOENT)
- goto out;
- ret = io_poll_cancel(ctx, sqe_addr, false);
out:
spin_unlock(&ctx->completion_lock);
return ret;
@@ -7781,10 +7822,15 @@ static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref)
struct io_ring_ctx *ctx = node->rsrc_data->ctx;
unsigned long flags;
bool first_add = false;
+ unsigned long delay = HZ;
spin_lock_irqsave(&ctx->rsrc_ref_lock, flags);
node->done = true;
+ /* if we are mid-quiesce then do not delay */
+ if (node->rsrc_data->quiesce)
+ delay = 0;
+
while (!list_empty(&ctx->rsrc_ref_list)) {
node = list_first_entry(&ctx->rsrc_ref_list,
struct io_rsrc_node, node);
@@ -7797,10 +7843,10 @@ static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref)
spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags);
if (first_add)
- mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ);
+ mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay);
}
-static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
+static struct io_rsrc_node *io_rsrc_node_alloc(void)
{
struct io_rsrc_node *ref_node;
@@ -7851,7 +7897,7 @@ static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx)
{
if (ctx->rsrc_backup_node)
return 0;
- ctx->rsrc_backup_node = io_rsrc_node_alloc(ctx);
+ ctx->rsrc_backup_node = io_rsrc_node_alloc();
return ctx->rsrc_backup_node ? 0 : -ENOMEM;
}