fadump: Register for firmware assisted dump.
authorMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Mon, 20 Feb 2012 02:15:03 +0000 (02:15 +0000)
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>
Wed, 22 Feb 2012 23:50:01 +0000 (10:50 +1100)
On 2012-02-20 11:02:51 Mon, Paul Mackerras wrote:
> On Thu, Feb 16, 2012 at 04:44:30PM +0530, Mahesh J Salgaonkar wrote:
>
> If I have read the code correctly, we are going to get this printk on
> non-pSeries machines or on older pSeries machines, even if the user
> has not put the fadump=on option on the kernel command line.  The
> printk will be annoying since there is no actual error condition.  It
> seems to me that the condition for the printk should include
> fw_dump.fadump_enabled.  In other words you should probably add
>
>  if (!fw_dump.fadump_enabled)
>  return 0;
>
> at the beginning of the function.

Hi Paul,

Thanks for pointing it out. Please find the updated patch below.

The existing patches above this (4/10 through 10/10) cleanly applies
on this update.

Thanks,
-Mahesh.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
arch/powerpc/include/asm/fadump.h
arch/powerpc/kernel/fadump.c
arch/powerpc/kernel/iommu.c
arch/powerpc/mm/hash_utils_64.c

index 7be25d3..bbaf278 100644 (file)
 #define FADUMP_HPTE_REGION     0x0002
 #define FADUMP_REAL_MODE_REGION        0x0011
 
+/* Dump request flag */
+#define FADUMP_REQUEST_FLAG    0x00000001
+
+/* FAD commands */
+#define FADUMP_REGISTER                1
+#define FADUMP_UNREGISTER      2
+#define FADUMP_INVALIDATE      3
+
+/* Kernel Dump section info */
+struct fadump_section {
+       u32     request_flag;
+       u16     source_data_type;
+       u16     error_flags;
+       u64     source_address;
+       u64     source_len;
+       u64     bytes_dumped;
+       u64     destination_address;
+};
+
+/* ibm,configure-kernel-dump header. */
+struct fadump_section_header {
+       u32     dump_format_version;
+       u16     dump_num_sections;
+       u16     dump_status_flag;
+       u32     offset_first_dump_section;
+
+       /* Fields for disk dump option. */
+       u32     dd_block_size;
+       u64     dd_block_offset;
+       u64     dd_num_blocks;
+       u32     dd_offset_disk_path;
+
+       /* Maximum time allowed to prevent an automatic dump-reboot. */
+       u32     max_time_auto;
+};
+
+/*
+ * Firmware Assisted dump memory structure. This structure is required for
+ * registering future kernel dump with power firmware through rtas call.
+ *
+ * No disk dump option. Hence disk dump path string section is not included.
+ */
+struct fadump_mem_struct {
+       struct fadump_section_header    header;
+
+       /* Kernel dump sections */
+       struct fadump_section           cpu_state_data;
+       struct fadump_section           hpte_region;
+       struct fadump_section           rmr_region;
+};
+
+/* Firmware-assisted dump configuration details. */
 struct fw_dump {
        unsigned long   cpu_state_data_size;
        unsigned long   hpte_region_size;
@@ -62,10 +114,15 @@ struct fw_dump {
        unsigned long   fadump_enabled:1;
        unsigned long   fadump_supported:1;
        unsigned long   dump_active:1;
+       unsigned long   dump_registered:1;
 };
 
 extern int early_init_dt_scan_fw_dump(unsigned long node,
                const char *uname, int depth, void *data);
 extern int fadump_reserve_mem(void);
+extern int setup_fadump(void);
+extern int is_fadump_active(void);
+#else  /* CONFIG_FA_DUMP */
+static inline int is_fadump_active(void) { return 0; }
 #endif
 #endif
index deb276a..eb8f782 100644 (file)
@@ -29,6 +29,9 @@
 
 #include <linux/string.h>
 #include <linux/memblock.h>
+#include <linux/delay.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 
 #include <asm/page.h>
 #include <asm/prom.h>
 #include <asm/fadump.h>
 
 static struct fw_dump fw_dump;
+static struct fadump_mem_struct fdm;
+static const struct fadump_mem_struct *fdm_active;
+
+static DEFINE_MUTEX(fadump_mutex);
 
 /* Scan the Firmware Assisted dump configuration details. */
 int __init early_init_dt_scan_fw_dump(unsigned long node,
@@ -64,7 +71,8 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
         * The 'ibm,kernel-dump' rtas node is present only if there is
         * dump data waiting for us.
         */
-       if (of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL))
+       fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
+       if (fdm_active)
                fw_dump.dump_active = 1;
 
        /* Get the sizes required to store dump data for the firmware provided
@@ -98,6 +106,85 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
        return 1;
 }
 
+int is_fadump_active(void)
+{
+       return fw_dump.dump_active;
+}
+
+/* Print firmware assisted dump configurations for debugging purpose. */
+static void fadump_show_config(void)
+{
+       pr_debug("Support for firmware-assisted dump (fadump): %s\n",
+                       (fw_dump.fadump_supported ? "present" : "no support"));
+
+       if (!fw_dump.fadump_supported)
+               return;
+
+       pr_debug("Fadump enabled    : %s\n",
+                               (fw_dump.fadump_enabled ? "yes" : "no"));
+       pr_debug("Dump Active       : %s\n",
+                               (fw_dump.dump_active ? "yes" : "no"));
+       pr_debug("Dump section sizes:\n");
+       pr_debug("    CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
+       pr_debug("    HPTE region size   : %lx\n", fw_dump.hpte_region_size);
+       pr_debug("Boot memory size  : %lx\n", fw_dump.boot_memory_size);
+}
+
+static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
+                               unsigned long addr)
+{
+       if (!fdm)
+               return 0;
+
+       memset(fdm, 0, sizeof(struct fadump_mem_struct));
+       addr = addr & PAGE_MASK;
+
+       fdm->header.dump_format_version = 0x00000001;
+       fdm->header.dump_num_sections = 3;
+       fdm->header.dump_status_flag = 0;
+       fdm->header.offset_first_dump_section =
+               (u32)offsetof(struct fadump_mem_struct, cpu_state_data);
+
+       /*
+        * Fields for disk dump option.
+        * We are not using disk dump option, hence set these fields to 0.
+        */
+       fdm->header.dd_block_size = 0;
+       fdm->header.dd_block_offset = 0;
+       fdm->header.dd_num_blocks = 0;
+       fdm->header.dd_offset_disk_path = 0;
+
+       /* set 0 to disable an automatic dump-reboot. */
+       fdm->header.max_time_auto = 0;
+
+       /* Kernel dump sections */
+       /* cpu state data section. */
+       fdm->cpu_state_data.request_flag = FADUMP_REQUEST_FLAG;
+       fdm->cpu_state_data.source_data_type = FADUMP_CPU_STATE_DATA;
+       fdm->cpu_state_data.source_address = 0;
+       fdm->cpu_state_data.source_len = fw_dump.cpu_state_data_size;
+       fdm->cpu_state_data.destination_address = addr;
+       addr += fw_dump.cpu_state_data_size;
+
+       /* hpte region section */
+       fdm->hpte_region.request_flag = FADUMP_REQUEST_FLAG;
+       fdm->hpte_region.source_data_type = FADUMP_HPTE_REGION;
+       fdm->hpte_region.source_address = 0;
+       fdm->hpte_region.source_len = fw_dump.hpte_region_size;
+       fdm->hpte_region.destination_address = addr;
+       addr += fw_dump.hpte_region_size;
+
+       /* RMA region section */
+       fdm->rmr_region.request_flag = FADUMP_REQUEST_FLAG;
+       fdm->rmr_region.source_data_type = FADUMP_REAL_MODE_REGION;
+       fdm->rmr_region.source_address = RMA_START;
+       fdm->rmr_region.source_len = fw_dump.boot_memory_size;
+       fdm->rmr_region.destination_address = addr;
+       addr += fw_dump.boot_memory_size;
+
+       return addr;
+}
+
 /**
  * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
  *
@@ -166,8 +253,15 @@ int __init fadump_reserve_mem(void)
                fw_dump.fadump_enabled = 0;
                return 0;
        }
-       /* Initialize boot memory size */
-       fw_dump.boot_memory_size = fadump_calculate_reserve_size();
+       /*
+        * Initialize boot memory size
+        * If dump is active then we have already calculated the size during
+        * first kernel.
+        */
+       if (fdm_active)
+               fw_dump.boot_memory_size = fdm_active->rmr_region.source_len;
+       else
+               fw_dump.boot_memory_size = fadump_calculate_reserve_size();
 
        /*
         * Calculate the memory boundary.
@@ -244,3 +338,258 @@ static int __init early_fadump_reserve_mem(char *p)
        return 0;
 }
 early_param("fadump_reserve_mem", early_fadump_reserve_mem);
+
+static void register_fw_dump(struct fadump_mem_struct *fdm)
+{
+       int rc;
+       unsigned int wait_time;
+
+       pr_debug("Registering for firmware-assisted kernel dump...\n");
+
+       /* TODO: Add upper time limit for the delay */
+       do {
+               rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
+                       FADUMP_REGISTER, fdm,
+                       sizeof(struct fadump_mem_struct));
+
+               wait_time = rtas_busy_delay_time(rc);
+               if (wait_time)
+                       mdelay(wait_time);
+
+       } while (wait_time);
+
+       switch (rc) {
+       case -1:
+               printk(KERN_ERR "Failed to register firmware-assisted kernel"
+                       " dump. Hardware Error(%d).\n", rc);
+               break;
+       case -3:
+               printk(KERN_ERR "Failed to register firmware-assisted kernel"
+                       " dump. Parameter Error(%d).\n", rc);
+               break;
+       case -9:
+               printk(KERN_ERR "firmware-assisted kernel dump is already "
+                       " registered.");
+               fw_dump.dump_registered = 1;
+               break;
+       case 0:
+               printk(KERN_INFO "firmware-assisted kernel dump registration"
+                       " is successful\n");
+               fw_dump.dump_registered = 1;
+               break;
+       }
+}
+
+static void register_fadump(void)
+{
+       /*
+        * If no memory is reserved then we can not register for firmware-
+        * assisted dump.
+        */
+       if (!fw_dump.reserve_dump_area_size)
+               return;
+
+       /* register the future kernel dump with firmware. */
+       register_fw_dump(&fdm);
+}
+
+static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
+{
+       int rc = 0;
+       unsigned int wait_time;
+
+       pr_debug("Un-register firmware-assisted dump\n");
+
+       /* TODO: Add upper time limit for the delay */
+       do {
+               rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
+                       FADUMP_UNREGISTER, fdm,
+                       sizeof(struct fadump_mem_struct));
+
+               wait_time = rtas_busy_delay_time(rc);
+               if (wait_time)
+                       mdelay(wait_time);
+       } while (wait_time);
+
+       if (rc) {
+               printk(KERN_ERR "Failed to un-register firmware-assisted dump."
+                       " unexpected error(%d).\n", rc);
+               return rc;
+       }
+       fw_dump.dump_registered = 0;
+       return 0;
+}
+
+static ssize_t fadump_enabled_show(struct kobject *kobj,
+                                       struct kobj_attribute *attr,
+                                       char *buf)
+{
+       return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
+}
+
+static ssize_t fadump_register_show(struct kobject *kobj,
+                                       struct kobj_attribute *attr,
+                                       char *buf)
+{
+       return sprintf(buf, "%d\n", fw_dump.dump_registered);
+}
+
+static ssize_t fadump_register_store(struct kobject *kobj,
+                                       struct kobj_attribute *attr,
+                                       const char *buf, size_t count)
+{
+       int ret = 0;
+
+       if (!fw_dump.fadump_enabled || fdm_active)
+               return -EPERM;
+
+       mutex_lock(&fadump_mutex);
+
+       switch (buf[0]) {
+       case '0':
+               if (fw_dump.dump_registered == 0) {
+                       ret = -EINVAL;
+                       goto unlock_out;
+               }
+               /* Un-register Firmware-assisted dump */
+               fadump_unregister_dump(&fdm);
+               break;
+       case '1':
+               if (fw_dump.dump_registered == 1) {
+                       ret = -EINVAL;
+                       goto unlock_out;
+               }
+               /* Register Firmware-assisted dump */
+               register_fadump();
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+unlock_out:
+       mutex_unlock(&fadump_mutex);
+       return ret < 0 ? ret : count;
+}
+
+static int fadump_region_show(struct seq_file *m, void *private)
+{
+       const struct fadump_mem_struct *fdm_ptr;
+
+       if (!fw_dump.fadump_enabled)
+               return 0;
+
+       if (fdm_active)
+               fdm_ptr = fdm_active;
+       else
+               fdm_ptr = &fdm;
+
+       seq_printf(m,
+                       "CPU : [%#016llx-%#016llx] %#llx bytes, "
+                       "Dumped: %#llx\n",
+                       fdm_ptr->cpu_state_data.destination_address,
+                       fdm_ptr->cpu_state_data.destination_address +
+                       fdm_ptr->cpu_state_data.source_len - 1,
+                       fdm_ptr->cpu_state_data.source_len,
+                       fdm_ptr->cpu_state_data.bytes_dumped);
+       seq_printf(m,
+                       "HPTE: [%#016llx-%#016llx] %#llx bytes, "
+                       "Dumped: %#llx\n",
+                       fdm_ptr->hpte_region.destination_address,
+                       fdm_ptr->hpte_region.destination_address +
+                       fdm_ptr->hpte_region.source_len - 1,
+                       fdm_ptr->hpte_region.source_len,
+                       fdm_ptr->hpte_region.bytes_dumped);
+       seq_printf(m,
+                       "DUMP: [%#016llx-%#016llx] %#llx bytes, "
+                       "Dumped: %#llx\n",
+                       fdm_ptr->rmr_region.destination_address,
+                       fdm_ptr->rmr_region.destination_address +
+                       fdm_ptr->rmr_region.source_len - 1,
+                       fdm_ptr->rmr_region.source_len,
+                       fdm_ptr->rmr_region.bytes_dumped);
+
+       if (!fdm_active ||
+               (fw_dump.reserve_dump_area_start ==
+               fdm_ptr->cpu_state_data.destination_address))
+               return 0;
+
+       /* Dump is active. Show reserved memory region. */
+       seq_printf(m,
+                       "    : [%#016llx-%#016llx] %#llx bytes, "
+                       "Dumped: %#llx\n",
+                       (unsigned long long)fw_dump.reserve_dump_area_start,
+                       fdm_ptr->cpu_state_data.destination_address - 1,
+                       fdm_ptr->cpu_state_data.destination_address -
+                       fw_dump.reserve_dump_area_start,
+                       fdm_ptr->cpu_state_data.destination_address -
+                       fw_dump.reserve_dump_area_start);
+       return 0;
+}
+
+static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
+                                               0444, fadump_enabled_show,
+                                               NULL);
+static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
+                                               0644, fadump_register_show,
+                                               fadump_register_store);
+
+static int fadump_region_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, fadump_region_show, inode->i_private);
+}
+
+static const struct file_operations fadump_region_fops = {
+       .open    = fadump_region_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = single_release,
+};
+
+static void fadump_init_files(void)
+{
+       struct dentry *debugfs_file;
+       int rc = 0;
+
+       rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
+       if (rc)
+               printk(KERN_ERR "fadump: unable to create sysfs file"
+                       " fadump_enabled (%d)\n", rc);
+
+       rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
+       if (rc)
+               printk(KERN_ERR "fadump: unable to create sysfs file"
+                       " fadump_registered (%d)\n", rc);
+
+       debugfs_file = debugfs_create_file("fadump_region", 0444,
+                                       powerpc_debugfs_root, NULL,
+                                       &fadump_region_fops);
+       if (!debugfs_file)
+               printk(KERN_ERR "fadump: unable to create debugfs file"
+                               " fadump_region\n");
+       return;
+}
+
+/*
+ * Prepare for firmware-assisted dump.
+ */
+int __init setup_fadump(void)
+{
+       if (!fw_dump.fadump_enabled)
+               return 0;
+
+       if (!fw_dump.fadump_supported) {
+               printk(KERN_ERR "Firmware-assisted dump is not supported on"
+                       " this hardware\n");
+               return 0;
+       }
+
+       fadump_show_config();
+       /* Initialize the kernel dump memory structure for FAD registration. */
+       if (fw_dump.reserve_dump_area_size)
+               init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+       fadump_init_files();
+
+       return 1;
+}
+subsys_initcall(setup_fadump);
index 0cfcf98..359f078 100644 (file)
@@ -39,6 +39,7 @@
 #include <asm/pci-bridge.h>
 #include <asm/machdep.h>
 #include <asm/kdump.h>
+#include <asm/fadump.h>
 
 #define DBG(...)
 
@@ -445,7 +446,12 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 
 static void iommu_table_clear(struct iommu_table *tbl)
 {
-       if (!is_kdump_kernel()) {
+       /*
+        * In case of firmware assisted dump system goes through clean
+        * reboot process at the time of system crash. Hence it's safe to
+        * clear the TCE entries if firmware assisted dump is active.
+        */
+       if (!is_kdump_kernel() || is_fadump_active()) {
                /* Clear the table in case firmware left allocations in it */
                ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
                return;
index 2d28218..b534bba 100644 (file)
@@ -55,6 +55,7 @@
 #include <asm/spu.h>
 #include <asm/udbg.h>
 #include <asm/code-patching.h>
+#include <asm/fadump.h>
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -625,6 +626,16 @@ static void __init htab_initialize(void)
                /* Using a hypervisor which owns the htab */
                htab_address = NULL;
                _SDR1 = 0; 
+#ifdef CONFIG_FA_DUMP
+               /*
+                * If firmware assisted dump is active firmware preserves
+                * the contents of htab along with entire partition memory.
+                * Clear the htab if firmware assisted dump is active so
+                * that we dont end up using old mappings.
+                */
+               if (is_fadump_active() && ppc_md.hpte_clear_all)
+                       ppc_md.hpte_clear_all();
+#endif
        } else {
                /* Find storage for the HPT.  Must be contiguous in
                 * the absolute address space. On cell we want it to be