#define ALL_CTR_FLAGS (CTR_FLAG_OPTIONS_NO_ARGS | \
CTR_FLAG_OPTIONS_ONE_ARG)
-/*
- * All flags which cause a recovery unfreeze once they got stored in the raid metadata
- */
-#define ALL_FREEZE_FLAGS (ALL_CTR_FLAGS & ~(CTR_FLAG_REGION_SIZE | CTR_FLAGS_ANY_SYNC | \
- CTR_FLAG_RAID10_FORMAT | CTR_FLAG_RAID10_COPIES | \
- CTR_FLAG_RAID10_USE_NEAR_SETS))
-
/* Invalid options definitions per raid level... */
/* "raid0" does not accept any options */
return NULL;
}
+/*
+ * Set the mddev properties in @rs to the current
+ * ones retrieved from the freshest superblock
+ */
+static void rs_set_cur(struct raid_set *rs)
+{
+ struct mddev *mddev = &rs->md;
+
+ mddev->new_level = mddev->level;
+ mddev->new_layout = mddev->layout;
+ mddev->new_chunk_sectors = mddev->chunk_sectors;
+}
+
/*
* Set the mddev properties in @rs to the new
* ones requested by the ctr
mddev->level = mddev->new_level;
mddev->layout = mddev->new_layout;
mddev->chunk_sectors = mddev->new_chunk_sectors;
+ mddev->raid_disks = rs->raid_disks;
mddev->delta_disks = 0;
}
rs->dev[i].rdev.meta_bdev = rs->dev[i].meta_dev->bdev;
}
rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev;
- list_add(&rs->dev[i].rdev.same_set, &rs->md.disks);
+ list_add_tail(&rs->dev[i].rdev.same_set, &rs->md.disks);
if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
rebuild++;
}
return rs_check_for_invalid_flags(rs);
}
+/* Return # of data stripes as kept in mddev as of @rs (i.e. as of superblock) */
+static unsigned int mddev_data_stripes(struct raid_set *rs)
+{
+ return rs->md.raid_disks - rs->raid_type->parity_devs;
+}
+
static void do_table_event(struct work_struct *ws)
{
struct raid_set *rs = container_of(ws, struct raid_set, md.event_work);
} else {
/*
- * Reshaping is not allowed, because we don't have the appropriate metadata
+ * No takeover/reshaping, because we don't have the extended v1.8.0 metadata
*/
if (le32_to_cpu(sb->level) != mddev->level) {
DMERR("Reshaping/takeover raid sets not yet supported. (raid level/stripes/size change)");
struct mddev *mddev = &rs->md;
struct dm_raid_superblock *sb;
- if (!rdev->sb_page)
+ if (rs_is_raid0(rs) || !rdev->sb_page)
return 0;
sb = page_address(rdev->sb_page);
rdev->new_data_offset = new_data_offset;
}
- rs_set_new(rs);
- set_bit(MD_CHANGE_DEVS, &mddev->flags);
-
return 0;
}
if (r)
return r;
+ /* Tell preresume to update superblocks with new layout */
_set_flag(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
- }
+ rs_set_new(rs);
+ } else
+ rs_set_cur(rs);
/* Start raid set read-only and assumed clean to change in raid_resume() */
rs->md.ro = 1;
return DM_MAPIO_SUBMITTED;
}
+/* Return string describing the current sync action of @mddev */
static const char *decipher_sync_action(struct mddev *mddev)
{
if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
return "idle";
}
-static void raid_status(struct dm_target *ti, status_type_t type,
- unsigned status_flags, char *result, unsigned maxlen)
+/*
+ * Return status string @rdev
+ *
+ * Status characters:
+ *
+ * 'D' = Dead/Failed device
+ * 'a' = Alive but not in-sync
+ * 'A' = Alive and in-sync
+ */
+static const char *_raid_dev_status(struct md_rdev *rdev, bool array_in_sync)
{
- struct raid_set *rs = ti->private;
- unsigned raid_param_cnt = 1; /* at least 1 for chunksize */
- unsigned sz = 0;
- int i, array_in_sync = 0;
- sector_t sync;
+ if (test_bit(Faulty, &rdev->flags))
+ return "D";
+ else if (!array_in_sync || !test_bit(In_sync, &rdev->flags))
+ return "a";
+ else
+ return "A";
+}
- switch (type) {
- case STATUSTYPE_INFO:
- DMEMIT("%s %d ", rs->raid_type->name, rs->md.raid_disks);
+/* Helper to return resync/reshape progress for @rs and @array_in_sync */
+static sector_t rs_get_progress(struct raid_set *rs,
+ sector_t resync_max_sectors, bool *array_in_sync)
+{
+ sector_t r, recovery_cp, curr_resync_completed;
+ struct mddev *mddev = &rs->md;
- if (!rt_is_raid0(rs->raid_type)) {
- if (test_bit(MD_RECOVERY_RUNNING, &rs->md.recovery))
- sync = rs->md.curr_resync_completed;
- else
- sync = rs->md.recovery_cp;
-
- if (sync >= rs->md.resync_max_sectors) {
- /*
- * Sync complete.
- */
- array_in_sync = 1;
- sync = rs->md.resync_max_sectors;
- } else if (test_bit(MD_RECOVERY_REQUESTED, &rs->md.recovery)) {
- /*
- * If "check" or "repair" is occurring, the array has
- * undergone and initial sync and the health characters
- * should not be 'a' anymore.
- */
- array_in_sync = 1;
+ curr_resync_completed = mddev->curr_resync_completed ?: mddev->recovery_cp;
+ recovery_cp = mddev->recovery_cp;
+ *array_in_sync = false;
+
+ if (rs_is_raid0(rs)) {
+ r = resync_max_sectors;
+ *array_in_sync = true;
+
+ } else {
+ r = mddev->reshape_position;
+
+ /* Reshape is relative to the array size */
+ if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
+ r != MaxSector) {
+ if (r == MaxSector) {
+ *array_in_sync = true;
+ r = resync_max_sectors;
} else {
- /*
- * The array may be doing an initial sync, or it may
- * be rebuilding individual components. If all the
- * devices are In_sync, then it is the array that is
- * being initialized.
- */
- for (i = 0; i < rs->md.raid_disks; i++)
- if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
- array_in_sync = 1;
+ /* Got to reverse on backward reshape */
+ if (mddev->reshape_backwards)
+ r = mddev->array_sectors - r;
+
+ /* Devide by # of data stripes */
+ sector_div(r, mddev_data_stripes(rs));
}
+
+ /* Sync is relative to the component device size */
+ } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+ r = curr_resync_completed;
+ else
+ r = recovery_cp;
+
+ if (r == MaxSector) {
+ /*
+ * Sync complete.
+ */
+ *array_in_sync = true;
+ r = resync_max_sectors;
+ } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
+ /*
+ * If "check" or "repair" is occurring, the raid set has
+ * undergone an initial sync and the health characters
+ * should not be 'a' anymore.
+ */
+ *array_in_sync = true;
} else {
- /* RAID0 */
- array_in_sync = 1;
- sync = rs->md.resync_max_sectors;
- }
+ struct md_rdev *rdev;
- /*
- * Status characters:
- * 'D' = Dead/Failed device
- * 'a' = Alive but not in-sync
- * 'A' = Alive and in-sync
- */
- for (i = 0; i < rs->md.raid_disks; i++) {
- if (test_bit(Faulty, &rs->dev[i].rdev.flags))
- DMEMIT("D");
- else if (!array_in_sync ||
- !test_bit(In_sync, &rs->dev[i].rdev.flags))
- DMEMIT("a");
- else
- DMEMIT("A");
+ /*
+ * The raid set may be doing an initial sync, or it may
+ * be rebuilding individual components. If all the
+ * devices are In_sync, then it is the raid set that is
+ * being initialized.
+ */
+ rdev_for_each(rdev, mddev)
+ if (!test_bit(In_sync, &rdev->flags))
+ *array_in_sync = true;
+#if 0
+ r = 0; /* HM FIXME: TESTME: https://bugzilla.redhat.com/show_bug.cgi?id=1210637 ? */
+#endif
}
+ }
+
+ return r;
+}
+
+/* Helper to return @dev name or "-" if !@dev */
+static const char *_get_dev_name(struct dm_dev *dev)
+{
+ return dev ? dev->name : "-";
+}
+
+static void raid_status(struct dm_target *ti, status_type_t type,
+ unsigned int status_flags, char *result, unsigned int maxlen)
+{
+ struct raid_set *rs = ti->private;
+ struct mddev *mddev = &rs->md;
+ struct r5conf *conf = mddev->private;
+ int max_nr_stripes = conf ? conf->max_nr_stripes : 0;
+ bool array_in_sync;
+ unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */
+ unsigned int sz = 0;
+ unsigned int write_mostly_params = 0;
+ sector_t progress, resync_max_sectors, resync_mismatches;
+ const char *sync_action;
+ struct raid_type *rt;
+ struct md_rdev *rdev;
+
+ switch (type) {
+ case STATUSTYPE_INFO:
+ /* *Should* always succeed */
+ rt = get_raid_type_by_ll(mddev->new_level, mddev->new_layout);
+ if (!rt)
+ return;
+
+ DMEMIT("%s %d ", rt ? rt->name : "unknown", mddev->raid_disks);
+
+ /* Access most recent mddev properties for status output */
+ smp_rmb();
+ /* Get sensible max sectors even if raid set not yet started */
+ resync_max_sectors = _test_flag(RT_FLAG_RS_PRERESUMED, rs->runtime_flags) ?
+ mddev->resync_max_sectors : mddev->dev_sectors;
+ progress = rs_get_progress(rs, resync_max_sectors, &array_in_sync);
+ resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
+ (unsigned int) atomic64_read(&mddev->resync_mismatches) : 0;
+ sync_action = decipher_sync_action(&rs->md);
+
+ /* HM FIXME: do we want another state char for raid0? It shows 'D' or 'A' now */
+ rdev_for_each(rdev, mddev)
+ DMEMIT(_raid_dev_status(rdev, array_in_sync));
/*
- * In-sync ratio:
+ * In-sync/Reshape ratio:
* The in-sync ratio shows the progress of:
- * - Initializing the array
- * - Rebuilding a subset of devices of the array
+ * - Initializing the raid set
+ * - Rebuilding a subset of devices of the raid set
* The user can distinguish between the two by referring
* to the status characters.
+ *
+ * The reshape ratio shows the progress of
+ * changing the raid layout or the number of
+ * disks of a raid set
*/
- DMEMIT(" %llu/%llu",
- (unsigned long long) sync,
- (unsigned long long) rs->md.resync_max_sectors);
+ DMEMIT(" %llu/%llu", (unsigned long long) progress,
+ (unsigned long long) resync_max_sectors);
/*
+ * v1.5.0+:
+ *
* Sync action:
- * See Documentation/device-mapper/dm-raid.c for
+ * See Documentation/device-mapper/dm-raid.txt for
* information on each of these states.
*/
- DMEMIT(" %s", decipher_sync_action(&rs->md));
+ DMEMIT(" %s", sync_action);
/*
+ * v1.5.0+:
+ *
* resync_mismatches/mismatch_cnt
* This field shows the number of discrepancies found when
- * performing a "check" of the array.
+ * performing a "check" of the raid set.
*/
- DMEMIT(" %llu",
- (strcmp(rs->md.last_sync_action, "check")) ? 0 :
- (unsigned long long)
- atomic64_read(&rs->md.resync_mismatches));
- break;
- case STATUSTYPE_TABLE:
- /* The string you would use to construct this array */
- for (i = 0; i < rs->md.raid_disks; i++) {
- if (_test_flag(CTR_FLAG_REBUILD, rs->ctr_flags) &&
- rs->dev[i].data_dev &&
- !test_bit(In_sync, &rs->dev[i].rdev.flags))
- raid_param_cnt += 2; /* for rebuilds */
- if (rs->dev[i].data_dev &&
- test_bit(WriteMostly, &rs->dev[i].rdev.flags))
- raid_param_cnt += 2;
- }
-
- raid_param_cnt += (hweight32(rs->ctr_flags & ~CTR_FLAG_REBUILD) * 2);
- if (rs->ctr_flags & (CTR_FLAG_SYNC | CTR_FLAG_NOSYNC))
- raid_param_cnt--;
+ DMEMIT(" %llu", (unsigned long long) resync_mismatches);
- DMEMIT("%s %u %u", rs->raid_type->name,
- raid_param_cnt, rs->md.chunk_sectors);
+ /*
+ * v1.8.0+:
+ *
+ * data_offset (needed for out of space reshaping)
+ * This field shows the data offset into the data
+ * image LV where the first stripes data starts.
+ *
+ * We keep data_offset equal on all raid disks of the set,
+ * so retrieving it from the first raid disk is sufficient.
+ */
+ DMEMIT(" %llu", (unsigned long long) rs->dev[0].rdev.data_offset);
+ break;
- if (_test_flag(CTR_FLAG_SYNC, rs->ctr_flags) &&
- rs->md.recovery_cp == MaxSector)
- DMEMIT(" sync");
+ case STATUSTYPE_TABLE:
+ /* Report the table line string you would use to construct this raid set */
+
+ /* Calculate raid parameter count */
+ rdev_for_each(rdev, mddev)
+ if (test_bit(WriteMostly, &rdev->flags))
+ write_mostly_params += 2;
+ raid_param_cnt += memweight(rs->rebuild_disks,
+ DISKS_ARRAY_ELEMS * sizeof(*rs->rebuild_disks)) * 2 +
+ write_mostly_params +
+ hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) +
+ hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2;
+ /* Emit table line */
+ DMEMIT("%s %u %u", rs->raid_type->name, raid_param_cnt, mddev->new_chunk_sectors);
+ if (_test_flag(CTR_FLAG_RAID10_FORMAT, rs->ctr_flags))
+ DMEMIT(" %s %s", _argname_by_flag(CTR_FLAG_RAID10_FORMAT),
+ raid10_md_layout_to_format(mddev->layout));
+ if (_test_flag(CTR_FLAG_RAID10_COPIES, rs->ctr_flags))
+ DMEMIT(" %s %d", _argname_by_flag(CTR_FLAG_RAID10_COPIES),
+ raid10_md_layout_to_copies(mddev->layout));
if (_test_flag(CTR_FLAG_NOSYNC, rs->ctr_flags))
- DMEMIT(" nosync");
-
- for (i = 0; i < rs->md.raid_disks; i++)
- if (_test_flag(CTR_FLAG_REBUILD, rs->ctr_flags) &&
- rs->dev[i].data_dev &&
- !test_bit(In_sync, &rs->dev[i].rdev.flags))
- DMEMIT(" rebuild %u", i);
-
+ DMEMIT(" %s", _argname_by_flag(CTR_FLAG_NOSYNC));
+ if (_test_flag(CTR_FLAG_SYNC, rs->ctr_flags))
+ DMEMIT(" %s", _argname_by_flag(CTR_FLAG_SYNC));
+ if (_test_flag(CTR_FLAG_REGION_SIZE, rs->ctr_flags))
+ DMEMIT(" %s %llu", _argname_by_flag(CTR_FLAG_REGION_SIZE),
+ (unsigned long long) to_sector(mddev->bitmap_info.chunksize));
+ if (_test_flag(CTR_FLAG_DATA_OFFSET, rs->ctr_flags))
+ DMEMIT(" %s %llu", _argname_by_flag(CTR_FLAG_DATA_OFFSET),
+ (unsigned long long) rs->data_offset);
if (_test_flag(CTR_FLAG_DAEMON_SLEEP, rs->ctr_flags))
- DMEMIT(" daemon_sleep %lu",
- rs->md.bitmap_info.daemon_sleep);
-
- if (_test_flag(CTR_FLAG_MIN_RECOVERY_RATE, rs->ctr_flags))
- DMEMIT(" min_recovery_rate %d", rs->md.sync_speed_min);
-
- if (_test_flag(CTR_FLAG_MAX_RECOVERY_RATE, rs->ctr_flags))
- DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max);
-
- for (i = 0; i < rs->md.raid_disks; i++)
- if (rs->dev[i].data_dev &&
- test_bit(WriteMostly, &rs->dev[i].rdev.flags))
- DMEMIT(" write_mostly %u", i);
-
+ DMEMIT(" %s %lu", _argname_by_flag(CTR_FLAG_DAEMON_SLEEP),
+ mddev->bitmap_info.daemon_sleep);
+ if (_test_flag(CTR_FLAG_DELTA_DISKS, rs->ctr_flags))
+ DMEMIT(" %s %d", _argname_by_flag(CTR_FLAG_DELTA_DISKS),
+ mddev->delta_disks);
+ if (_test_flag(CTR_FLAG_STRIPE_CACHE, rs->ctr_flags))
+ DMEMIT(" %s %d", _argname_by_flag(CTR_FLAG_STRIPE_CACHE),
+ max_nr_stripes);
+ rdev_for_each(rdev, mddev)
+ if (test_bit(rdev->raid_disk, (void *) rs->rebuild_disks))
+ DMEMIT(" %s %u", _argname_by_flag(CTR_FLAG_REBUILD),
+ rdev->raid_disk);
+ rdev_for_each(rdev, mddev)
+ if (test_bit(WriteMostly, &rdev->flags))
+ DMEMIT(" %s %d", _argname_by_flag(CTR_FLAG_WRITE_MOSTLY),
+ rdev->raid_disk);
if (_test_flag(CTR_FLAG_MAX_WRITE_BEHIND, rs->ctr_flags))
- DMEMIT(" max_write_behind %lu",
- rs->md.bitmap_info.max_write_behind);
-
- if (_test_flag(CTR_FLAG_STRIPE_CACHE, rs->ctr_flags)) {
- struct r5conf *conf = rs->md.private;
-
- /* convert from kiB to sectors */
- DMEMIT(" stripe_cache %d",
- conf ? conf->max_nr_stripes * 2 : 0);
- }
-
- if (_test_flag(CTR_FLAG_REGION_SIZE, rs->ctr_flags))
- DMEMIT(" region_size %lu",
- rs->md.bitmap_info.chunksize >> 9);
-
- if (_test_flag(CTR_FLAG_RAID10_COPIES, rs->ctr_flags))
- DMEMIT(" raid10_copies %u",
- raid10_md_layout_to_copies(rs->md.layout));
-
- if (_test_flag(CTR_FLAG_RAID10_FORMAT, rs->ctr_flags))
- DMEMIT(" raid10_format %s",
- raid10_md_layout_to_format(rs->md.layout));
-
- DMEMIT(" %d", rs->md.raid_disks);
- for (i = 0; i < rs->md.raid_disks; i++) {
- if (rs->dev[i].meta_dev)
- DMEMIT(" %s", rs->dev[i].meta_dev->name);
- else
- DMEMIT(" -");
-
- if (rs->dev[i].data_dev)
- DMEMIT(" %s", rs->dev[i].data_dev->name);
- else
- DMEMIT(" -");
+ DMEMIT(" %s %lu", _argname_by_flag(CTR_FLAG_MAX_WRITE_BEHIND),
+ mddev->bitmap_info.max_write_behind);
+ if (_test_flag(CTR_FLAG_MAX_RECOVERY_RATE, rs->ctr_flags))
+ DMEMIT(" %s %d", _argname_by_flag(CTR_FLAG_MAX_RECOVERY_RATE),
+ mddev->sync_speed_max);
+ if (_test_flag(CTR_FLAG_MIN_RECOVERY_RATE, rs->ctr_flags))
+ DMEMIT(" %s %d", _argname_by_flag(CTR_FLAG_MIN_RECOVERY_RATE),
+ mddev->sync_speed_min);
+ DMEMIT(" %d", rs->raid_disks);
+ rdev_for_each(rdev, mddev) {
+ struct raid_dev *rd = container_of(rdev, struct raid_dev, rdev);
+
+ DMEMIT(" %s %s", _get_dev_name(rd->meta_dev),
+ _get_dev_name(rd->data_dev));
}
}
}
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
return -EBUSY;
else if (!strcasecmp(argv[0], "resync"))
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- else if (!strcasecmp(argv[0], "recover")) {
+ ; /* MD_RECOVERY_NEEDED set below */
+ else if (!strcasecmp(argv[0], "recover"))
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- } else {
+ else {
if (!strcasecmp(argv[0], "check"))
set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
else if (!!strcasecmp(argv[0], "repair"))
* canceling read-auto mode
*/
mddev->ro = 0;
- if (!mddev->suspended)
+ if (!mddev->suspended && mddev->sync_thread)
md_wakeup_thread(mddev->sync_thread);
}
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- if (!mddev->suspended)
+ if (!mddev->suspended && mddev->thread)
md_wakeup_thread(mddev->thread);
return 0;
* devices are reachable again.
*/
attempt_restore_of_faulty_devices(rs);
-
- } else {
- mddev->in_sync = 0;
-
- /*
- * If any of the constructor flags got passed in
- * but "region_size" (gets always passed in for
- * mappings with bitmap), we expect userspace to
- * reset them and reload the mapping anyway.
- *
- * -> don't unfreeze resynchronization until imminant
- * reload of the table w/o theses flags
- */
- if (!_test_flags(ALL_FREEZE_FLAGS, rs->ctr_flags))
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}
mddev->ro = 0;
+ mddev->in_sync = 0;
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+
if (mddev->suspended)
mddev_resume(mddev);
}
MODULE_ALIAS("dm-raid5");
MODULE_ALIAS("dm-raid6");
MODULE_AUTHOR("Neil Brown <dm-devel@redhat.com>");
+MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>");
MODULE_LICENSE("GPL");