]> git.baikalelectronics.ru Git - kernel.git/commitdiff
block: introduce block_rq_error tracepoint
authorYang Shi <shy828301@gmail.com>
Thu, 10 Feb 2022 22:52:22 +0000 (14:52 -0800)
committerJens Axboe <axboe@kernel.dk>
Fri, 11 Feb 2022 17:00:16 +0000 (10:00 -0700)
Currently, rasdaemon uses the existing tracepoint block_rq_complete
and filters out non-error cases in order to capture block disk errors.

But there are a few problems with this approach:

1. Even kernel trace filter could do the filtering work, there is
   still some overhead after we enable this tracepoint.

2. The filter is merely based on errno, which does not align with kernel
   logic to check the errors for print_req_error().

3. block_rq_complete only provides dev major and minor to identify
   the block device, it is not convenient to use in user-space.

So introduce a new tracepoint block_rq_error just for the error case.
With this patch, rasdaemon could switch to block_rq_error.

Since the new tracepoint has the similar implementation with
block_rq_complete, so move the existing code from TRACE_EVENT
block_rq_complete() into new event class block_rq_completion(). Then add
event for block_rq_complete and block_rq_err respectively from the newly
created event class per the suggestion from Chaitanya Kulkarni.

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Yang Shi <shy828301@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220210225222.260069-1-shy828301@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-mq.c
include/trace/events/block.h

index 4b868e792ba4ac0c40cd7e9a897ec5ea76e4bb2d..6c59ffe765fde9f0dac44edeb64ef8653d108a0d 100644 (file)
@@ -789,8 +789,10 @@ bool blk_update_request(struct request *req, blk_status_t error,
 #endif
 
        if (unlikely(error && !blk_rq_is_passthrough(req) &&
-                    !(req->rq_flags & RQF_QUIET)))
+                    !(req->rq_flags & RQF_QUIET))) {
                blk_print_req_error(req, error);
+               trace_block_rq_error(req, error, nr_bytes);
+       }
 
        blk_account_io_completion(req, nr_bytes);
 
index 27170e40e8c95d05498f9443cd0bb926d1f368b6..7f4dfbdf12a6f1c531c145e59f5311ea83f9e419 100644 (file)
@@ -100,19 +100,7 @@ TRACE_EVENT(block_rq_requeue,
                  __entry->nr_sector, 0)
 );
 
-/**
- * block_rq_complete - block IO operation completed by device driver
- * @rq: block operations request
- * @error: status code
- * @nr_bytes: number of completed bytes
- *
- * The block_rq_complete tracepoint event indicates that some portion
- * of operation request has been completed by the device driver.  If
- * the @rq->bio is %NULL, then there is absolutely no additional work to
- * do for the request. If @rq->bio is non-NULL then there is
- * additional work required to complete the request.
- */
-TRACE_EVENT(block_rq_complete,
+DECLARE_EVENT_CLASS(block_rq_completion,
 
        TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes),
 
@@ -144,6 +132,41 @@ TRACE_EVENT(block_rq_complete,
                  __entry->nr_sector, __entry->error)
 );
 
+/**
+ * block_rq_complete - block IO operation completed by device driver
+ * @rq: block operations request
+ * @error: status code
+ * @nr_bytes: number of completed bytes
+ *
+ * The block_rq_complete tracepoint event indicates that some portion
+ * of operation request has been completed by the device driver.  If
+ * the @rq->bio is %NULL, then there is absolutely no additional work to
+ * do for the request. If @rq->bio is non-NULL then there is
+ * additional work required to complete the request.
+ */
+DEFINE_EVENT(block_rq_completion, block_rq_complete,
+
+       TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes),
+
+       TP_ARGS(rq, error, nr_bytes)
+);
+
+/**
+ * block_rq_error - block IO operation error reported by device driver
+ * @rq: block operations request
+ * @error: status code
+ * @nr_bytes: number of completed bytes
+ *
+ * The block_rq_error tracepoint event indicates that some portion
+ * of operation request has failed as reported by the device driver.
+ */
+DEFINE_EVENT(block_rq_completion, block_rq_error,
+
+       TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes),
+
+       TP_ARGS(rq, error, nr_bytes)
+);
+
 DECLARE_EVENT_CLASS(block_rq,
 
        TP_PROTO(struct request *rq),