aboutsummaryrefslogtreecommitdiffstats
path: root/io_uring
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring')
-rw-r--r--io_uring/io_uring.c23
-rw-r--r--io_uring/io_uring.h17
-rw-r--r--io_uring/kbuf.c2
-rw-r--r--io_uring/net.c23
-rw-r--r--io_uring/poll.c43
5 files changed, 69 insertions, 39 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 6cc16e39b27f..8840cf3e20f2 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -176,6 +176,11 @@ static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
}
+static inline unsigned int __io_cqring_events_user(struct io_ring_ctx *ctx)
+{
+ return READ_ONCE(ctx->rings->cq.tail) - READ_ONCE(ctx->rings->cq.head);
+}
+
static bool io_match_linked(struct io_kiocb *head)
{
struct io_kiocb *req;
@@ -1173,7 +1178,7 @@ static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
}
}
-int __io_run_local_work(struct io_ring_ctx *ctx, bool locked)
+int __io_run_local_work(struct io_ring_ctx *ctx, bool *locked)
{
struct llist_node *node;
struct llist_node fake;
@@ -1192,7 +1197,7 @@ again:
struct io_kiocb *req = container_of(node, struct io_kiocb,
io_task_work.node);
prefetch(container_of(next, struct io_kiocb, io_task_work.node));
- req->io_task_work.func(req, &locked);
+ req->io_task_work.func(req, locked);
ret++;
node = next;
}
@@ -1208,7 +1213,7 @@ again:
goto again;
}
- if (locked)
+ if (*locked)
io_submit_flush_completions(ctx);
trace_io_uring_local_work_run(ctx, ret, loops);
return ret;
@@ -1225,7 +1230,7 @@ int io_run_local_work(struct io_ring_ctx *ctx)
__set_current_state(TASK_RUNNING);
locked = mutex_trylock(&ctx->uring_lock);
- ret = __io_run_local_work(ctx, locked);
+ ret = __io_run_local_work(ctx, &locked);
if (locked)
mutex_unlock(&ctx->uring_lock);
@@ -1446,8 +1451,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
io_task_work_pending(ctx)) {
u32 tail = ctx->cached_cq_tail;
- if (!llist_empty(&ctx->work_llist))
- __io_run_local_work(ctx, true);
+ (void) io_run_local_work_locked(ctx);
if (task_work_pending(current) ||
wq_list_empty(&ctx->iopoll_list)) {
@@ -1764,7 +1768,7 @@ int io_poll_issue(struct io_kiocb *req, bool *locked)
io_tw_lock(req->ctx, locked);
if (unlikely(req->task->flags & PF_EXITING))
return -EFAULT;
- return io_issue_sqe(req, IO_URING_F_NONBLOCK);
+ return io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_MULTISHOT);
}
struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
@@ -2316,7 +2320,7 @@ static inline bool io_has_work(struct io_ring_ctx *ctx)
static inline bool io_should_wake(struct io_wait_queue *iowq)
{
struct io_ring_ctx *ctx = iowq->ctx;
- int dist = ctx->cached_cq_tail - (int) iowq->cq_tail;
+ int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail;
/*
* Wake up if we have enough events, or if a timeout occurred since we
@@ -2400,7 +2404,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
return ret;
io_cqring_overflow_flush(ctx);
- if (io_cqring_events(ctx) >= min_events)
+ /* if user messes with these they will just get an early return */
+ if (__io_cqring_events_user(ctx) >= min_events)
return 0;
} while (ret > 0);
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index ef77d2aa3172..cef5ff924e63 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -17,8 +17,8 @@ enum {
IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED,
/*
- * Intended only when both REQ_F_POLLED and REQ_F_APOLL_MULTISHOT
- * are set to indicate to the poll runner that multishot should be
+ * Intended only when both IO_URING_F_MULTISHOT is passed
+ * to indicate to the poll runner that multishot should be
* removed and the result is set on req->cqe.res.
*/
IOU_STOP_MULTISHOT = -ECANCELED,
@@ -27,7 +27,7 @@ enum {
struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow);
bool io_req_cqe_overflow(struct io_kiocb *req);
int io_run_task_work_sig(struct io_ring_ctx *ctx);
-int __io_run_local_work(struct io_ring_ctx *ctx, bool locked);
+int __io_run_local_work(struct io_ring_ctx *ctx, bool *locked);
int io_run_local_work(struct io_ring_ctx *ctx);
void io_req_complete_failed(struct io_kiocb *req, s32 res);
void __io_req_complete(struct io_kiocb *req, unsigned issue_flags);
@@ -277,9 +277,18 @@ static inline int io_run_task_work_ctx(struct io_ring_ctx *ctx)
static inline int io_run_local_work_locked(struct io_ring_ctx *ctx)
{
+ bool locked;
+ int ret;
+
if (llist_empty(&ctx->work_llist))
return 0;
- return __io_run_local_work(ctx, true);
+
+ locked = true;
+ ret = __io_run_local_work(ctx, &locked);
+ /* shouldn't happen! */
+ if (WARN_ON_ONCE(!locked))
+ mutex_lock(&ctx->uring_lock);
+ return ret;
}
static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked)
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 25cd724ade18..e2c46889d5fa 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -346,6 +346,8 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
tmp = READ_ONCE(sqe->off);
if (tmp > USHRT_MAX)
return -E2BIG;
+ if (tmp + p->nbufs >= USHRT_MAX)
+ return -EINVAL;
p->bid = tmp;
return 0;
}
diff --git a/io_uring/net.c b/io_uring/net.c
index 15dea91625e2..ab83da7e80f0 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -67,8 +67,6 @@ struct io_sr_msg {
struct io_kiocb *notif;
};
-#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
-
int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
@@ -591,7 +589,8 @@ static inline void io_recv_prep_retry(struct io_kiocb *req)
* again (for multishot).
*/
static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
- unsigned int cflags, bool mshot_finished)
+ unsigned int cflags, bool mshot_finished,
+ unsigned issue_flags)
{
if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
io_req_set_res(req, *ret, cflags);
@@ -614,7 +613,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
io_req_set_res(req, *ret, cflags);
- if (req->flags & REQ_F_POLLED)
+ if (issue_flags & IO_URING_F_MULTISHOT)
*ret = IOU_STOP_MULTISHOT;
else
*ret = IOU_OK;
@@ -773,8 +772,7 @@ retry_multishot:
if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock) {
ret = io_setup_async_msg(req, kmsg, issue_flags);
- if (ret == -EAGAIN && (req->flags & IO_APOLL_MULTI_POLLED) ==
- IO_APOLL_MULTI_POLLED) {
+ if (ret == -EAGAIN && (issue_flags & IO_URING_F_MULTISHOT)) {
io_kbuf_recycle(req, issue_flags);
return IOU_ISSUE_SKIP_COMPLETE;
}
@@ -803,7 +801,7 @@ retry_multishot:
if (kmsg->msg.msg_inq)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
- if (!io_recv_finish(req, &ret, cflags, mshot_finished))
+ if (!io_recv_finish(req, &ret, cflags, mshot_finished, issue_flags))
goto retry_multishot;
if (mshot_finished) {
@@ -869,7 +867,7 @@ retry_multishot:
ret = sock_recvmsg(sock, &msg, flags);
if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock) {
- if ((req->flags & IO_APOLL_MULTI_POLLED) == IO_APOLL_MULTI_POLLED) {
+ if (issue_flags & IO_URING_F_MULTISHOT) {
io_kbuf_recycle(req, issue_flags);
return IOU_ISSUE_SKIP_COMPLETE;
}
@@ -902,7 +900,7 @@ out_free:
if (msg.msg_inq)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
- if (!io_recv_finish(req, &ret, cflags, ret <= 0))
+ if (!io_recv_finish(req, &ret, cflags, ret <= 0, issue_flags))
goto retry_multishot;
return ret;
@@ -1289,8 +1287,7 @@ retry:
* return EAGAIN to arm the poll infra since it
* has already been done
*/
- if ((req->flags & IO_APOLL_MULTI_POLLED) ==
- IO_APOLL_MULTI_POLLED)
+ if (issue_flags & IO_URING_F_MULTISHOT)
ret = IOU_ISSUE_SKIP_COMPLETE;
return ret;
}
@@ -1315,9 +1312,7 @@ retry:
goto retry;
io_req_set_res(req, ret, 0);
- if (req->flags & REQ_F_POLLED)
- return IOU_STOP_MULTISHOT;
- return IOU_OK;
+ return (issue_flags & IO_URING_F_MULTISHOT) ? IOU_STOP_MULTISHOT : IOU_OK;
}
int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
diff --git a/io_uring/poll.c b/io_uring/poll.c
index 0d9f49c575e0..055632e9092a 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -116,6 +116,8 @@ static void io_poll_req_insert_locked(struct io_kiocb *req)
struct io_hash_table *table = &req->ctx->cancel_table_locked;
u32 index = hash_long(req->cqe.user_data, table->hash_bits);
+ lockdep_assert_held(&req->ctx->uring_lock);
+
hlist_add_head(&req->hash_node, &table->hbs[index].list);
}
@@ -226,6 +228,13 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
return IOU_POLL_DONE;
if (v & IO_POLL_CANCEL_FLAG)
return -ECANCELED;
+ /*
+ * cqe.res contains only events of the first wake up
+ * and all others are be lost. Redo vfs_poll() to get
+ * up to date state.
+ */
+ if ((v & IO_POLL_REF_MASK) != 1)
+ req->cqe.res = 0;
/* the mask was stashed in __io_poll_execute */
if (!req->cqe.res) {
@@ -237,6 +246,8 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
continue;
if (req->apoll_events & EPOLLONESHOT)
return IOU_POLL_DONE;
+ if (io_is_uring_fops(req->file))
+ return IOU_POLL_DONE;
/* multishot, just fill a CQE and proceed */
if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
@@ -256,6 +267,9 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
return ret;
}
+ /* force the next iteration to vfs_poll() */
+ req->cqe.res = 0;
+
/*
* Release all references, retry if someone tried to restart
* task_work while we were executing it.
@@ -394,7 +408,8 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
return 1;
}
-static void io_poll_double_prepare(struct io_kiocb *req)
+/* fails only when polling is already completing by the first entry */
+static bool io_poll_double_prepare(struct io_kiocb *req)
{
struct wait_queue_head *head;
struct io_poll *poll = io_poll_get_single(req);
@@ -403,20 +418,20 @@ static void io_poll_double_prepare(struct io_kiocb *req)
rcu_read_lock();
head = smp_load_acquire(&poll->head);
/*
- * poll arm may not hold ownership and so race with
- * io_poll_wake() by modifying req->flags. There is only one
- * poll entry queued, serialise with it by taking its head lock.
+ * poll arm might not hold ownership and so race for req->flags with
+ * io_poll_wake(). There is only one poll entry queued, serialise with
+ * it by taking its head lock. As we're still arming the tw hanlder
+ * is not going to be run, so there are no races with it.
*/
- if (head)
+ if (head) {
spin_lock_irq(&head->lock);
-
- req->flags |= REQ_F_DOUBLE_POLL;
- if (req->opcode == IORING_OP_POLL_ADD)
- req->flags |= REQ_F_ASYNC_DATA;
-
- if (head)
+ req->flags |= REQ_F_DOUBLE_POLL;
+ if (req->opcode == IORING_OP_POLL_ADD)
+ req->flags |= REQ_F_ASYNC_DATA;
spin_unlock_irq(&head->lock);
+ }
rcu_read_unlock();
+ return !!head;
}
static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
@@ -454,7 +469,11 @@ static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
/* mark as double wq entry */
wqe_private |= IO_WQE_F_DOUBLE;
io_init_poll_iocb(poll, first->events, first->wait.func);
- io_poll_double_prepare(req);
+ if (!io_poll_double_prepare(req)) {
+ /* the request is completing, just back off */
+ kfree(poll);
+ return;
+ }
*poll_ptr = poll;
} else {
/* fine to modify, there is no poll queued to race with us */