When DLPAR adding or removing memory we need to check the device
offline status before trying to online/offline the memory. This is
needed because calls to device_online() and device_offline() will
return non-zero for memory that is already online and offline
respectively.
This update resolves two scenarios. First, for a kernel built with
auto-online memory enabled (CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y),
memory will be onlined as part of calls to add_memory(). After adding
the memory the pseries DLPAR code tries to online it and fails since
the memory is already online. The DLPAR code then tries to remove the
memory which produces the oops message below because the memory is not
offline.
The second scenario occurs when removing memory that is already
offline, i.e. marking memory offline (via sysfs) and then trying to
remove that memory. This doesn't work because offlining the already
offline memory does not succeed and the DLPAR code then fails the
DLPAR remove operation.
The fix for both scenarios is to check the device.offline status
before making the calls to device_online() or device_offline().
kernel BUG at mm/memory_hotplug.c:1936!
...
NIP [
c0000000002ca428] .remove_memory+0xb8/0xc0
LR [
c0000000002ca3cc] .remove_memory+0x5c/0xc0
Call Trace:
.remove_memory+0x5c/0xc0 (unreliable)
.dlpar_add_lmb+0x384/0x400
.dlpar_memory+0x5dc/0xca0
.handle_dlpar_errorlog+0x74/0xe0
.pseries_hp_work_fn+0x2c/0x90
.process_one_work+0x17c/0x460
.worker_thread+0x88/0x500
.kthread+0x15c/0x1a0
.ret_from_kernel_thread+0x58/0xc0
Fixes: 011dfa2b77da ("powerpc/pseries: Revert 'Auto-online hotplugged memory'")
Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
[mpe: Use bool, add explicit rc=0 case, change log typos & formatting]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
return mem_block;
}
+static int dlpar_change_lmb_state(struct of_drconf_cell *lmb, bool online)
+{
+ struct memory_block *mem_block;
+ int rc;
+
+ mem_block = lmb_to_memblock(lmb);
+ if (!mem_block)
+ return -EINVAL;
+
+ if (online && mem_block->dev.offline)
+ rc = device_online(&mem_block->dev);
+ else if (!online && !mem_block->dev.offline)
+ rc = device_offline(&mem_block->dev);
+ else
+ rc = 0;
+
+ put_device(&mem_block->dev);
+
+ return rc;
+}
+
+static int dlpar_online_lmb(struct of_drconf_cell *lmb)
+{
+ return dlpar_change_lmb_state(lmb, true);
+}
+
#ifdef CONFIG_MEMORY_HOTREMOVE
+static int dlpar_offline_lmb(struct of_drconf_cell *lmb)
+{
+ return dlpar_change_lmb_state(lmb, false);
+}
+
static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
{
unsigned long block_sz, start_pfn;
static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
{
- struct memory_block *mem_block;
unsigned long block_sz;
int nid, rc;
if (!lmb_is_removable(lmb))
return -EINVAL;
- mem_block = lmb_to_memblock(lmb);
- if (!mem_block)
- return -EINVAL;
-
- rc = device_offline(&mem_block->dev);
- put_device(&mem_block->dev);
+ rc = dlpar_offline_lmb(lmb);
if (rc)
return rc;
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
-static int dlpar_online_lmb(struct of_drconf_cell *lmb)
-{
- struct memory_block *mem_block;
- int rc;
-
- mem_block = lmb_to_memblock(lmb);
- if (!mem_block)
- return -EINVAL;
-
- rc = device_online(&mem_block->dev);
- put_device(&mem_block->dev);
- return rc;
-}
-
static int dlpar_add_lmb(struct of_drconf_cell *lmb)
{
unsigned long block_sz;