out_err:
dip->dio_bio->bi_status = status;
btrfs_dio_private_put(dip);
- return BLK_QC_T_NONE;
}
- const struct iomap_ops btrfs_dio_iomap_ops = {
- .iomap_begin = btrfs_dio_iomap_begin,
- .iomap_end = btrfs_dio_iomap_end,
- };
+ static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
+ const struct iov_iter *iter, loff_t offset)
+ {
+ int seg;
+ int i;
+ unsigned int blocksize_mask = fs_info->sectorsize - 1;
+ ssize_t retval = -EINVAL;
- const struct iomap_dio_ops btrfs_dops = {
- .submit_io = btrfs_submit_direct,
- };
+ if (offset & blocksize_mask)
+ goto out;
+
+ if (iov_iter_alignment(iter) & blocksize_mask)
+ goto out;
+
+ /* If this is a write we don't need to check anymore */
+ if (iov_iter_rw(iter) != READ || !iter_is_iovec(iter))
+ return 0;
+ /*
+ * Check to make sure we don't have duplicate iov_base's in this
+ * iovec, if so return EINVAL, otherwise we'll get csum errors
+ * when reading back.
+ */
+ for (seg = 0; seg < iter->nr_segs; seg++) {
+ for (i = seg + 1; i < iter->nr_segs; i++) {
+ if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
+ goto out;
+ }
+ }
+ retval = 0;
+ out:
+ return retval;
+ }
+
+ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+ {
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_dio_data dio_data = { 0 };
+ struct extent_changeset *data_reserved = NULL;
+ loff_t offset = iocb->ki_pos;
+ size_t count = 0;
+ int flags = 0;
+ bool wakeup = true;
+ bool relock = false;
+ ssize_t ret;
+
+ if (check_direct_IO(fs_info, iter, offset))
+ return 0;
+
+ inode_dio_begin(inode);
+
+ /*
+ * The generic stuff only does filemap_write_and_wait_range, which
+ * isn't enough if we've written compressed pages to this area, so
+ * we need to flush the dirty pages again to make absolutely sure
+ * that any outstanding dirty pages are on disk.
+ */
+ count = iov_iter_count(iter);
+ if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+ &BTRFS_I(inode)->runtime_flags))
+ filemap_fdatawrite_range(inode->i_mapping, offset,
+ offset + count - 1);
+
+ if (iov_iter_rw(iter) == WRITE) {
+ /*
+ * If the write DIO is beyond the EOF, we need update
+ * the isize, but it is protected by i_mutex. So we can
+ * not unlock the i_mutex at this case.
+ */
+ if (offset + count <= inode->i_size) {
+ dio_data.overwrite = 1;
+ inode_unlock(inode);
+ relock = true;
+ } else if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
+ offset, count);
+ if (ret)
+ goto out;
+
+ /*
+ * We need to know how many extents we reserved so that we can
+ * do the accounting properly if we go over the number we
+ * originally calculated. Abuse current->journal_info for this.
+ */
+ dio_data.reserve = round_up(count,
+ fs_info->sectorsize);
+ dio_data.unsubmitted_oe_range_start = (u64)offset;
+ dio_data.unsubmitted_oe_range_end = (u64)offset;
+ current->journal_info = &dio_data;
+ down_read(&BTRFS_I(inode)->dio_sem);
+ } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
+ &BTRFS_I(inode)->runtime_flags)) {
+ inode_dio_end(inode);
+ flags = DIO_LOCKING | DIO_SKIP_HOLES;
+ wakeup = false;
+ }
+
+ ret = __blockdev_direct_IO(iocb, inode,
+ fs_info->fs_devices->latest_bdev,
+ iter, btrfs_get_blocks_direct, NULL,
+ btrfs_submit_direct, flags);
+ if (iov_iter_rw(iter) == WRITE) {
+ up_read(&BTRFS_I(inode)->dio_sem);
+ current->journal_info = NULL;
+ if (ret < 0 && ret != -EIOCBQUEUED) {
+ if (dio_data.reserve)
+ btrfs_delalloc_release_space(inode, data_reserved,
+ offset, dio_data.reserve, true);
+ /*
+ * On error we might have left some ordered extents
+ * without submitting corresponding bios for them, so
+ * cleanup them up to avoid other tasks getting them
+ * and waiting for them to complete forever.
+ */
+ if (dio_data.unsubmitted_oe_range_start <
+ dio_data.unsubmitted_oe_range_end)
+ __endio_write_update_ordered(inode,
+ dio_data.unsubmitted_oe_range_start,
+ dio_data.unsubmitted_oe_range_end -
+ dio_data.unsubmitted_oe_range_start,
+ false);
+ } else if (ret >= 0 && (size_t)ret < count)
+ btrfs_delalloc_release_space(inode, data_reserved,
+ offset, count - (size_t)ret, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), count);
+ }
+ out:
+ if (wakeup)
+ inode_dio_end(inode);
+ if (relock)
+ inode_lock(inode);
+
+ extent_changeset_free(data_reserved);
+ return ret;
+ }
-#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
-
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
{
.readpage = btrfs_readpage,
.writepage = btrfs_writepage,
.writepages = btrfs_writepages,
- .readpages = btrfs_readpages,
+ .readahead = btrfs_readahead,
- .direct_IO = noop_direct_IO,
+ .direct_IO = btrfs_direct_IO,
.invalidatepage = btrfs_invalidatepage,
.releasepage = btrfs_releasepage,
#ifdef CONFIG_MIGRATION