pipe: remove pipe_wait() and fix wakeup race with splice

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 2 Oct 2020 02:14:36 +0000 (19:14 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 2 Oct 2020 02:14:36 +0000 (19:14 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Oct 2020 02:14:36 +0000 (19:14 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Oct 2020 02:14:36 +0000 (19:14 -0700)
diff --git a/fs/pipe.c b/fs/pipe.c

index 60dbee457143674cc01651e03f213f53776aeaa1..117db82b10af515bbbfbbac4c9a88b0d88669e91 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -106,25 +106,6 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
         }
  }
  
-/* Drop the inode semaphore and wait for a pipe event, atomically */
-void pipe_wait(struct pipe_inode_info *pipe)
-{
-       DEFINE_WAIT(rdwait);
-       DEFINE_WAIT(wrwait);
-
-       /*
-        * Pipes are system-local resources, so sleeping on them
-        * is considered a noninteractive wait:
-        */
-       prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
-       prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE);
-       pipe_unlock(pipe);
-       schedule();
-       finish_wait(&pipe->rd_wait, &rdwait);
-       finish_wait(&pipe->wr_wait, &wrwait);
-       pipe_lock(pipe);
-}
-
  static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
                                   struct pipe_buffer *buf)
  {
@@ -1035,12 +1016,52 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
         return do_pipe2(fildes, 0);
  }
  
+/*
+ * This is the stupid "wait for pipe to be readable or writable"
+ * model.
+ *
+ * See pipe_read/write() for the proper kind of exclusive wait,
+ * but that requires that we wake up any other readers/writers
+ * if we then do not end up reading everything (ie the whole
+ * "wake_next_reader/writer" logic in pipe_read/write()).
+ */
+void pipe_wait_readable(struct pipe_inode_info *pipe)
+{
+       pipe_unlock(pipe);
+       wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe));
+       pipe_lock(pipe);
+}
+
+void pipe_wait_writable(struct pipe_inode_info *pipe)
+{
+       pipe_unlock(pipe);
+       wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe));
+       pipe_lock(pipe);
+}
+
+/*
+ * This depends on both the wait (here) and the wakeup (wake_up_partner)
+ * holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot
+ * race with the count check and waitqueue prep.
+ *
+ * Normally in order to avoid races, you'd do the prepare_to_wait() first,
+ * then check the condition you're waiting for, and only then sleep. But
+ * because of the pipe lock, we can check the condition before being on
+ * the wait queue.
+ *
+ * We use the 'rd_wait' waitqueue for pipe partner waiting.
+ */
  static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
  {
+       DEFINE_WAIT(rdwait);
         int cur = *cnt;
  
         while (cur == *cnt) {
-               pipe_wait(pipe);
+               prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
+               pipe_unlock(pipe);
+               schedule();
+               finish_wait(&pipe->rd_wait, &rdwait);
+               pipe_lock(pipe);
                 if (signal_pending(current))
                         break;
         }
@@ -1050,7 +1071,6 @@ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
  static void wake_up_partner(struct pipe_inode_info *pipe)
  {
         wake_up_interruptible_all(&pipe->rd_wait);
-       wake_up_interruptible_all(&pipe->wr_wait);
  }
  
  static int fifo_open(struct inode *inode, struct file *filp)
diff --git a/fs/splice.c b/fs/splice.c

index d7c8a7c4db07fff8cfc54a83adefba53207691a0..c3d00dfc7344648be62ac41244b0708ec72148f6 100644 (file)
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -563,7 +563,7 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des
                         sd->need_wakeup = false;
                 }
  
-               pipe_wait(pipe);
+               pipe_wait_readable(pipe);
         }
  
         return 1;
@@ -1077,7 +1077,7 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
                         return -EAGAIN;
                 if (signal_pending(current))
                         return -ERESTARTSYS;
-               pipe_wait(pipe);
+               pipe_wait_writable(pipe);
         }
  }
  
@@ -1454,7 +1454,7 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
                         ret = -EAGAIN;
                         break;
                 }
-               pipe_wait(pipe);
+               pipe_wait_readable(pipe);
         }
  
         pipe_unlock(pipe);
@@ -1493,7 +1493,7 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
                         ret = -ERESTARTSYS;
                         break;
                 }
-               pipe_wait(pipe);
+               pipe_wait_writable(pipe);
         }
  
         pipe_unlock(pipe);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h

index 50afd0d0084caf38fa314004c453a8d9a6695dc0..5d2705f1d01c3d7c2574b0a705aecf0ba27ddedf 100644 (file)
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -240,8 +240,9 @@ extern unsigned int pipe_max_size;
  extern unsigned long pipe_user_pages_hard;
  extern unsigned long pipe_user_pages_soft;
  
-/* Drop the inode semaphore and wait for a pipe event, atomically */
-void pipe_wait(struct pipe_inode_info *pipe);
+/* Wait for a pipe to be readable/writable while dropping the pipe lock */
+void pipe_wait_readable(struct pipe_inode_info *);
+void pipe_wait_writable(struct pipe_inode_info *);
  
  struct pipe_inode_info *alloc_pipe_info(void);
  void free_pipe_info(struct pipe_inode_info *);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 2 Oct 2020 02:14:36 +0000 (19:14 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 2 Oct 2020 02:14:36 +0000 (19:14 -0700)
fs/pipe.c		patch \| blob \| history
fs/splice.c		patch \| blob \| history
include/linux/pipe_fs_i.h		patch \| blob \| history