diff options
Diffstat (limited to 'io_uring/poll.c')
-rw-r--r-- | io_uring/poll.c | 90 |
1 files changed, 71 insertions, 19 deletions
diff --git a/io_uring/poll.c b/io_uring/poll.c index 0d9f49c575e0..d9bf1767867e 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -40,7 +40,14 @@ struct io_poll_table { }; #define IO_POLL_CANCEL_FLAG BIT(31) -#define IO_POLL_REF_MASK GENMASK(30, 0) +#define IO_POLL_RETRY_FLAG BIT(30) +#define IO_POLL_REF_MASK GENMASK(29, 0) + +/* + * We usually have 1-2 refs taken, 128 is more than enough and we want to + * maximise the margin between this amount and the moment when it overflows. + */ +#define IO_POLL_REF_BIAS 128 #define IO_WQE_F_DOUBLE 1 @@ -58,6 +65,21 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe) return priv & IO_WQE_F_DOUBLE; } +static bool io_poll_get_ownership_slowpath(struct io_kiocb *req) +{ + int v; + + /* + * poll_refs are already elevated and we don't have much hope for + * grabbing the ownership. Instead of incrementing set a retry flag + * to notify the loop that there might have been some change. + */ + v = atomic_fetch_or(IO_POLL_RETRY_FLAG, &req->poll_refs); + if (v & IO_POLL_REF_MASK) + return false; + return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); +} + /* * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can * bump it and acquire ownership. It's disallowed to modify requests while not @@ -66,6 +88,8 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe) */ static inline bool io_poll_get_ownership(struct io_kiocb *req) { + if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) + return io_poll_get_ownership_slowpath(req); return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); } @@ -116,6 +140,8 @@ static void io_poll_req_insert_locked(struct io_kiocb *req) struct io_hash_table *table = &req->ctx->cancel_table_locked; u32 index = hash_long(req->cqe.user_data, table->hash_bits); + lockdep_assert_held(&req->ctx->uring_lock); + hlist_add_head(&req->hash_node, &table->hbs[index].list); } @@ -226,6 +252,23 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) return IOU_POLL_DONE; if (v & IO_POLL_CANCEL_FLAG) return -ECANCELED; + /* + * cqe.res contains only events of the first wake up + * and all others are be lost. Redo vfs_poll() to get + * up to date state. + */ + if ((v & IO_POLL_REF_MASK) != 1) + req->cqe.res = 0; + if (v & IO_POLL_RETRY_FLAG) { + req->cqe.res = 0; + /* + * We won't find new events that came in between + * vfs_poll and the ref put unless we clear the flag + * in advance. + */ + atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs); + v &= ~IO_POLL_RETRY_FLAG; + } /* the mask was stashed in __io_poll_execute */ if (!req->cqe.res) { @@ -237,6 +280,8 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) continue; if (req->apoll_events & EPOLLONESHOT) return IOU_POLL_DONE; + if (io_is_uring_fops(req->file)) + return IOU_POLL_DONE; /* multishot, just fill a CQE and proceed */ if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { @@ -256,11 +301,15 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) return ret; } + /* force the next iteration to vfs_poll() */ + req->cqe.res = 0; + /* * Release all references, retry if someone tried to restart * task_work while we were executing it. */ - } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); + } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs) & + IO_POLL_REF_MASK); return IOU_POLL_NO_ACTION; } @@ -394,7 +443,8 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, return 1; } -static void io_poll_double_prepare(struct io_kiocb *req) +/* fails only when polling is already completing by the first entry */ +static bool io_poll_double_prepare(struct io_kiocb *req) { struct wait_queue_head *head; struct io_poll *poll = io_poll_get_single(req); @@ -403,20 +453,20 @@ static void io_poll_double_prepare(struct io_kiocb *req) rcu_read_lock(); head = smp_load_acquire(&poll->head); /* - * poll arm may not hold ownership and so race with - * io_poll_wake() by modifying req->flags. There is only one - * poll entry queued, serialise with it by taking its head lock. + * poll arm might not hold ownership and so race for req->flags with + * io_poll_wake(). There is only one poll entry queued, serialise with + * it by taking its head lock. As we're still arming the tw hanlder + * is not going to be run, so there are no races with it. */ - if (head) + if (head) { spin_lock_irq(&head->lock); - - req->flags |= REQ_F_DOUBLE_POLL; - if (req->opcode == IORING_OP_POLL_ADD) - req->flags |= REQ_F_ASYNC_DATA; - - if (head) + req->flags |= REQ_F_DOUBLE_POLL; + if (req->opcode == IORING_OP_POLL_ADD) + req->flags |= REQ_F_ASYNC_DATA; spin_unlock_irq(&head->lock); + } rcu_read_unlock(); + return !!head; } static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, @@ -454,7 +504,11 @@ static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, /* mark as double wq entry */ wqe_private |= IO_WQE_F_DOUBLE; io_init_poll_iocb(poll, first->events, first->wait.func); - io_poll_double_prepare(req); + if (!io_poll_double_prepare(req)) { + /* the request is completing, just back off */ + kfree(poll); + return; + } *poll_ptr = poll; } else { /* fine to modify, there is no poll queued to race with us */ @@ -499,7 +553,6 @@ static int __io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) { struct io_ring_ctx *ctx = req->ctx; - int v; INIT_HLIST_NODE(&req->hash_node); req->work.cancel_seq = atomic_read(&ctx->cancel_seq); @@ -567,11 +620,10 @@ static int __io_arm_poll_handler(struct io_kiocb *req, if (ipt->owning) { /* - * Release ownership. If someone tried to queue a tw while it was - * locked, kick it off for them. + * Try to release ownership. If we see a change of state, e.g. + * poll was waken up, queue up a tw, it'll deal with it. */ - v = atomic_dec_return(&req->poll_refs); - if (unlikely(v & IO_POLL_REF_MASK)) + if (atomic_cmpxchg(&req->poll_refs, 1, 0) != 1) __io_poll_execute(req, 0); } return 0; |