Merge git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 30 May 2012 01:32:37 +0000 (18:32 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 30 May 2012 01:32:37 +0000 (18:32 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 30 May 2012 01:32:37 +0000 (18:32 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 30 May 2012 01:32:37 +0000 (18:32 -0700)
diff --combined drivers/edac/edac_core.h

index 5b73941,f06ce9a..117490d
--- 1/drivers/edac/edac_core.h
--- 2/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@@ -107,13 -107,13 +107,13 @@@ extern int edac_debug_level
    *
    * CPU caches (L1 and L2)
    * DMA engines
- - * Core CPU swithces
+ + * Core CPU switches
    * Fabric switch units
    * PCIe interface controllers
    * other EDAC/ECC type devices that can be monitored for
    * errors, etc.
    *
- - * It allows for a 2 level set of hiearchry. For example:
+ + * It allows for a 2 level set of hierarchy. For example:
    *
    * cache could be composed of L1, L2 and L3 levels of cache.
    * Each CPU core would have its own L1 cache, while sharing
@@@ -447,8 -447,10 +447,10 @@@ static inline void pci_write_bits32(str
   
   #endif                                /* CONFIG_PCI */
   
- extern struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
-                                         unsigned nr_chans, int edac_index);
+ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
+                                  unsigned n_layers,
+                                  struct edac_mc_layer *layers,
+                                  unsigned sz_pvt);
   extern int edac_mc_add_mc(struct mem_ctl_info *mci);
   extern void edac_mc_free(struct mem_ctl_info *mci);
   extern struct mem_ctl_info *edac_mc_find(int idx);
@@@ -456,35 -458,17 +458,17 @@@ extern struct mem_ctl_info *find_mci_by
   extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
   extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
                                       unsigned long page);
- 
- /*
-  * The no info errors are used when error overflows are reported.
-  * There are a limited number of error logging registers that can
-  * be exausted.  When all registers are exhausted and an additional
-  * error occurs then an error overflow register records that an
-  * error occurred and the type of error, but doesn't have any
-  * further information.  The ce/ue versions make for cleaner
-  * reporting logic and function interface - reduces conditional
-  * statement clutter and extra function arguments.
-  */
- extern void edac_mc_handle_ce(struct mem_ctl_info *mci,
-                             unsigned long page_frame_number,
-                             unsigned long offset_in_page,
-                             unsigned long syndrome, int row, int channel,
-                             const char *msg);
- extern void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci,
-                                     const char *msg);
- extern void edac_mc_handle_ue(struct mem_ctl_info *mci,
-                             unsigned long page_frame_number,
-                             unsigned long offset_in_page, int row,
-                             const char *msg);
- extern void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci,
-                                     const char *msg);
- extern void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, unsigned int csrow,
-                                 unsigned int channel0, unsigned int channel1,
-                                 char *msg);
- extern void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, unsigned int csrow,
-                                 unsigned int channel, char *msg);
+ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
+                         struct mem_ctl_info *mci,
+                         const unsigned long page_frame_number,
+                         const unsigned long offset_in_page,
+                         const unsigned long syndrome,
+                         const int layer0,
+                         const int layer1,
+                         const int layer2,
+                         const char *msg,
+                         const char *other_detail,
+                         const void *mcelog);
   
   /*
    * edac_device APIs
@@@ -496,6 -480,7 +480,7 @@@ extern void edac_device_handle_ue(struc
   extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
                                 int inst_nr, int block_nr, const char *msg);
   extern int edac_device_alloc_index(void);
+ extern const char *edac_layer_name[];
   
   /*
    * edac_pci APIs
diff --combined drivers/edac/edac_device.c

index 45b8f4b,cb397d9..ee3f1f8
--- 1/drivers/edac/edac_device.c
--- 2/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@@ -56,7 -56,7 +56,7 @@@ static void edac_device_dump_device(str
    *
    *    The control structure is allocated in complete chunk
    *    from the OS. It is in turn sub allocated to the
- - *    various objects that compose the struture
+ + *    various objects that compose the structure
    *
    *    The structure has a 'nr_instance' array within itself.
    *    Each instance represents a major component
@@@ -79,7 -79,7 +79,7 @@@ struct edac_device_ctl_info *edac_devic
         unsigned total_size;
         unsigned count;
         unsigned instance, block, attr;
-       void *pvt;
+       void *pvt, *p;
         int err;
   
         debugf4("%s() instances=%d blocks=%d\n",
@@@ -92,35 -92,30 +92,30 @@@
          * to be at least as stringent as what the compiler would
          * provide if we could simply hardcode everything into a single struct.
          */
-       dev_ctl = (struct edac_device_ctl_info *)NULL;
+       p = NULL;
+       dev_ctl = edac_align_ptr(&p, sizeof(*dev_ctl), 1);
   
         /* Calc the 'end' offset past end of ONE ctl_info structure
          * which will become the start of the 'instance' array
          */
-       dev_inst = edac_align_ptr(&dev_ctl[1], sizeof(*dev_inst));
+       dev_inst = edac_align_ptr(&p, sizeof(*dev_inst), nr_instances);
   
         /* Calc the 'end' offset past the instance array within the ctl_info
          * which will become the start of the block array
          */
-       dev_blk = edac_align_ptr(&dev_inst[nr_instances], sizeof(*dev_blk));
+       count = nr_instances * nr_blocks;
+       dev_blk = edac_align_ptr(&p, sizeof(*dev_blk), count);
   
         /* Calc the 'end' offset past the dev_blk array
          * which will become the start of the attrib array, if any.
          */
-       count = nr_instances * nr_blocks;
-       dev_attrib = edac_align_ptr(&dev_blk[count], sizeof(*dev_attrib));
- 
-       /* Check for case of when an attribute array is specified */
-       if (nr_attrib > 0) {
-               /* calc how many nr_attrib we need */
+       /* calc how many nr_attrib we need */
+       if (nr_attrib > 0)
                 count *= nr_attrib;
+       dev_attrib = edac_align_ptr(&p, sizeof(*dev_attrib), count);
   
-               /* Calc the 'end' offset past the attributes array */
-               pvt = edac_align_ptr(&dev_attrib[count], sz_private);
-       } else {
-               /* no attribute array specified */
-               pvt = edac_align_ptr(dev_attrib, sz_private);
-       }
+       /* Calc the 'end' offset past the attributes array */
+       pvt = edac_align_ptr(&p, sz_private, 1);
   
         /* 'pvt' now points to where the private data area is.
          * At this point 'pvt' (like dev_inst,dev_blk and dev_attrib)
@@@ -394,7 -389,7 +389,7 @@@ static void edac_device_workq_function(
   
         /* Reschedule the workq for the next time period to start again
          * if the number of msec is for 1 sec, then adjust to the next
- -       * whole one second to save timers fireing all over the period
+ +       * whole one second to save timers firing all over the period
          * between integral seconds
          */
         if (edac_dev->poll_msec == 1000)
@@@ -563,7 -558,7 +558,7 @@@ EXPORT_SYMBOL_GPL(edac_device_add_devic
    *    Remove sysfs entries for specified edac_device structure and
    *    then remove edac_device structure from global list
    *
- - * @pdev:
+ + * @dev:
    *    Pointer to 'struct device' representing edac_device
    *    structure to remove.
    *
diff --combined drivers/edac/i7core_edac.c

index 7f1dfcc,c05e1ad..d27778f
--- 1/drivers/edac/i7core_edac.c
--- 2/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@@ -90,7 -90,7 +90,7 @@@ MODULE_PARM_DESC(use_pci_fixup, "Enabl
   #define MC_MAX_DOD    0x64
   
   /*
- - * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
+ + * OFFSETS for Device 3 Function 4, as indicated on Xeon 5500 datasheet:
    * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
    */
   
@@@ -101,7 -101,7 +101,7 @@@
     #define DIMM1_COR_ERR(r)                    (((r) >> 16) & 0x7fff)
     #define DIMM0_COR_ERR(r)                    ((r) & 0x7fff)
   
- -/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
+ +/* OFFSETS for Device 3 Function 2, as indicated on Xeon 5500 datasheet */
   #define MC_SSRCONTROL         0x48
     #define SSR_MODE_DISABLE    0x00
     #define SSR_MODE_ENABLE     0x01
@@@ -221,7 -221,9 +221,9 @@@ struct i7core_inject 
   };
   
   struct i7core_channel {
-       u32             ranks;
+       bool            is_3dimms_present;
+       bool            is_single_4rank;
+       bool            has_4rank;
         u32             dimms;
   };
   
@@@ -257,7 -259,6 +259,6 @@@ struct i7core_pvt 
         struct i7core_channel   channel[NUM_CHANS];
   
         int             ce_count_available;
-       int             csrow_map[NUM_CHANS][MAX_DIMMS];
   
                         /* ECC corrected errors counts per udimm */
         unsigned long   udimm_ce_count[MAX_DIMMS];
@@@ -398,7 -399,7 +399,7 @@@ static DEFINE_PCI_DEVICE_TABLE(i7core_p
   };
   
   /****************************************************************************
- -                      Anciliary status routines
+ +                      Ancillary status routines
    ****************************************************************************/
   
         /* MC_CONTROL bits */
@@@ -492,116 -493,15 +493,15 @@@ static void free_i7core_dev(struct i7co
   /****************************************************************************
                         Memory check routines
    ****************************************************************************/
- static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
-                                         unsigned func)
- {
-       struct i7core_dev *i7core_dev = get_i7core_dev(socket);
-       int i;
- 
-       if (!i7core_dev)
-               return NULL;
- 
-       for (i = 0; i < i7core_dev->n_devs; i++) {
-               if (!i7core_dev->pdev[i])
-                       continue;
- 
-               if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
-                   PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
-                       return i7core_dev->pdev[i];
-               }
-       }
- 
-       return NULL;
- }
- 
- /**
-  * i7core_get_active_channels() - gets the number of channels and csrows
-  * @socket:   Quick Path Interconnect socket
-  * @channels: Number of channels that will be returned
-  * @csrows:   Number of csrows found
-  *
-  * Since EDAC core needs to know in advance the number of available channels
-  * and csrows, in order to allocate memory for csrows/channels, it is needed
-  * to run two similar steps. At the first step, implemented on this function,
-  * it checks the number of csrows/channels present at one socket.
-  * this is used in order to properly allocate the size of mci components.
-  *
-  * It should be noticed that none of the current available datasheets explain
-  * or even mention how csrows are seen by the memory controller. So, we need
-  * to add a fake description for csrows.
-  * So, this driver is attributing one DIMM memory for one csrow.
-  */
- static int i7core_get_active_channels(const u8 socket, unsigned *channels,
-                                     unsigned *csrows)
- {
-       struct pci_dev *pdev = NULL;
-       int i, j;
-       u32 status, control;
- 
-       *channels = 0;
-       *csrows = 0;
- 
-       pdev = get_pdev_slot_func(socket, 3, 0);
-       if (!pdev) {
-               i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
-                             socket);
-               return -ENODEV;
-       }
- 
-       /* Device 3 function 0 reads */
-       pci_read_config_dword(pdev, MC_STATUS, &status);
-       pci_read_config_dword(pdev, MC_CONTROL, &control);
- 
-       for (i = 0; i < NUM_CHANS; i++) {
-               u32 dimm_dod[3];
-               /* Check if the channel is active */
-               if (!(control & (1 << (8 + i))))
-                       continue;
- 
-               /* Check if the channel is disabled */
-               if (status & (1 << i))
-                       continue;
- 
-               pdev = get_pdev_slot_func(socket, i + 4, 1);
-               if (!pdev) {
-                       i7core_printk(KERN_ERR, "Couldn't find socket %d "
-                                               "fn %d.%d!!!\n",
-                                               socket, i + 4, 1);
-                       return -ENODEV;
-               }
-               /* Devices 4-6 function 1 */
-               pci_read_config_dword(pdev,
-                               MC_DOD_CH_DIMM0, &dimm_dod[0]);
-               pci_read_config_dword(pdev,
-                               MC_DOD_CH_DIMM1, &dimm_dod[1]);
-               pci_read_config_dword(pdev,
-                               MC_DOD_CH_DIMM2, &dimm_dod[2]);
   
-               (*channels)++;
- 
-               for (j = 0; j < 3; j++) {
-                       if (!DIMM_PRESENT(dimm_dod[j]))
-                               continue;
-                       (*csrows)++;
-               }
-       }
- 
-       debugf0("Number of active channels on socket %d: %d\n",
-               socket, *channels);
- 
-       return 0;
- }
- 
- static int get_dimm_config(const struct mem_ctl_info *mci)
+ static int get_dimm_config(struct mem_ctl_info *mci)
   {
         struct i7core_pvt *pvt = mci->pvt_info;
-       struct csrow_info *csr;
         struct pci_dev *pdev;
         int i, j;
-       int csrow = 0;
-       unsigned long last_page = 0;
         enum edac_type mode;
         enum mem_type mtype;
+       struct dimm_info *dimm;
   
         /* Get data from the MC register, function 0 */
         pdev = pvt->pci_mcr[0];
@@@ -657,21 -557,20 +557,20 @@@
                 pci_read_config_dword(pvt->pci_ch[i][0],
                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
   
-               pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
-                                               4 : 2;
+ 
+               if (data & THREE_DIMMS_PRESENT)
+                       pvt->channel[i].is_3dimms_present = true;
+ 
+               if (data & SINGLE_QUAD_RANK_PRESENT)
+                       pvt->channel[i].is_single_4rank = true;
+ 
+               if (data & QUAD_RANK_PRESENT)
+                       pvt->channel[i].has_4rank = true;
   
                 if (data & REGISTERED_DIMM)
                         mtype = MEM_RDDR3;
                 else
                         mtype = MEM_DDR3;
- #if 0
-               if (data & THREE_DIMMS_PRESENT)
-                       pvt->channel[i].dimms = 3;
-               else if (data & SINGLE_QUAD_RANK_PRESENT)
-                       pvt->channel[i].dimms = 1;
-               else
-                       pvt->channel[i].dimms = 2;
- #endif
   
                 /* Devices 4-6 function 1 */
                 pci_read_config_dword(pvt->pci_ch[i][1],
@@@ -682,11 -581,13 +581,13 @@@
                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
   
                 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
-                       "%d ranks, %cDIMMs\n",
+                       "%s%s%s%cDIMMs\n",
                         i,
                         RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
                         data,
-                       pvt->channel[i].ranks,
+                       pvt->channel[i].is_3dimms_present ? "3DIMMS " : "",
+                       pvt->channel[i].is_3dimms_present ? "SINGLE_4R " : "",
+                       pvt->channel[i].has_4rank ? "HAS_4R " : "",
                         (data & REGISTERED_DIMM) ? 'R' : 'U');
   
                 for (j = 0; j < 3; j++) {
@@@ -696,6 -597,8 +597,8 @@@
                         if (!DIMM_PRESENT(dimm_dod[j]))
                                 continue;
   
+                       dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
+                                      i, j, 0);
                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
@@@ -704,8 -607,6 +607,6 @@@
                         /* DDR3 has 8 I/O banks */
                         size = (rows * cols * banks * ranks) >> (20 - 3);
   
-                       pvt->channel[i].dimms++;
- 
                         debugf0("\tdimm %d %d Mb offset: %x, "
                                 "bank: %d, rank: %d, row: %#x, col: %#x\n",
                                 j, size,
@@@ -714,44 -615,28 +615,28 @@@
   
                         npages = MiB_TO_PAGES(size);
   
-                       csr = &mci->csrows[csrow];
-                       csr->first_page = last_page + 1;
-                       last_page += npages;
-                       csr->last_page = last_page;
-                       csr->nr_pages = npages;
- 
-                       csr->page_mask = 0;
-                       csr->grain = 8;
-                       csr->csrow_idx = csrow;
-                       csr->nr_channels = 1;
- 
-                       csr->channels[0].chan_idx = i;
-                       csr->channels[0].ce_count = 0;
- 
-                       pvt->csrow_map[i][j] = csrow;
+                       dimm->nr_pages = npages;
   
                         switch (banks) {
                         case 4:
-                               csr->dtype = DEV_X4;
+                               dimm->dtype = DEV_X4;
                                 break;
                         case 8:
-                               csr->dtype = DEV_X8;
+                               dimm->dtype = DEV_X8;
                                 break;
                         case 16:
-                               csr->dtype = DEV_X16;
+                               dimm->dtype = DEV_X16;
                                 break;
                         default:
-                               csr->dtype = DEV_UNKNOWN;
+                               dimm->dtype = DEV_UNKNOWN;
                         }
   
-                       csr->edac_mode = mode;
-                       csr->mtype = mtype;
-                       snprintf(csr->channels[0].label,
-                                       sizeof(csr->channels[0].label),
-                                       "CPU#%uChannel#%u_DIMM#%u",
-                                       pvt->i7core_dev->socket, i, j);
- 
-                       csrow++;
+                       snprintf(dimm->label, sizeof(dimm->label),
+                                "CPU#%uChannel#%u_DIMM#%u",
+                                pvt->i7core_dev->socket, i, j);
+                       dimm->grain = 8;
+                       dimm->edac_mode = mode;
+                       dimm->mtype = mtype;
                 }
   
                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
@@@ -1361,7 -1246,7 +1246,7 @@@ static int i7core_get_onedevice(struct 
                               dev_descr->dev_id, *prev);
   
         /*
- -       * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
+ +       * On Xeon 55xx, the Intel QuickPath Arch Generic Non-core regs
          * is at addr 8086:2c40, instead of 8086:2c41. So, we need
          * to probe for the alternate address in case of failure
          */
@@@ -1567,22 -1452,16 +1452,16 @@@ error
   /****************************************************************************
                         Error check routines
    ****************************************************************************/
- static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
+ static void i7core_rdimm_update_errcount(struct mem_ctl_info *mci,
                                       const int chan,
                                       const int dimm,
                                       const int add)
   {
-       char *msg;
-       struct i7core_pvt *pvt = mci->pvt_info;
-       int row = pvt->csrow_map[chan][dimm], i;
+       int i;
   
         for (i = 0; i < add; i++) {
-               msg = kasprintf(GFP_KERNEL, "Corrected error "
-                               "(Socket=%d channel=%d dimm=%d)",
-                               pvt->i7core_dev->socket, chan, dimm);
- 
-               edac_mc_handle_fbd_ce(mci, row, 0, msg);
-               kfree (msg);
+               edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 0, 0, 0,
+                                    chan, dimm, -1, "error", "", NULL);
         }
   }
   
@@@ -1623,11 -1502,11 +1502,11 @@@ static void i7core_rdimm_update_ce_coun
   
         /*updated the edac core */
         if (add0 != 0)
-               i7core_rdimm_update_csrow(mci, chan, 0, add0);
+               i7core_rdimm_update_errcount(mci, chan, 0, add0);
         if (add1 != 0)
-               i7core_rdimm_update_csrow(mci, chan, 1, add1);
+               i7core_rdimm_update_errcount(mci, chan, 1, add1);
         if (add2 != 0)
-               i7core_rdimm_update_csrow(mci, chan, 2, add2);
+               i7core_rdimm_update_errcount(mci, chan, 2, add2);
   
   }
   
@@@ -1747,20 -1626,30 +1626,30 @@@ static void i7core_mce_output_error(str
                                     const struct mce *m)
   {
         struct i7core_pvt *pvt = mci->pvt_info;
-       char *type, *optype, *err, *msg;
+       char *type, *optype, *err, msg[80];
+       enum hw_event_mc_err_type tp_event;
         unsigned long error = m->status & 0x1ff0000l;
+       bool uncorrected_error = m->mcgstatus & 1ll << 61;
+       bool ripv = m->mcgstatus & 1;
         u32 optypenum = (m->status >> 4) & 0x07;
         u32 core_err_cnt = (m->status >> 38) & 0x7fff;
         u32 dimm = (m->misc >> 16) & 0x3;
         u32 channel = (m->misc >> 18) & 0x3;
         u32 syndrome = m->misc >> 32;
         u32 errnum = find_first_bit(&error, 32);
-       int csrow;
   
-       if (m->mcgstatus & 1)
-               type = "FATAL";
-       else
-               type = "NON_FATAL";
+       if (uncorrected_error) {
+               if (ripv) {
+                       type = "FATAL";
+                       tp_event = HW_EVENT_ERR_FATAL;
+               } else {
+                       type = "NON_FATAL";
+                       tp_event = HW_EVENT_ERR_UNCORRECTED;
+               }
+       } else {
+               type = "CORRECTED";
+               tp_event = HW_EVENT_ERR_CORRECTED;
+       }
   
         switch (optypenum) {
         case 0:
@@@ -1815,27 -1704,20 +1704,20 @@@
                 err = "unknown";
         }
   
-       /* FIXME: should convert addr into bank and rank information */
-       msg = kasprintf(GFP_ATOMIC,
-               "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
-               "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
-               type, (long long) m->addr, m->cpu, dimm, channel,
-               syndrome, core_err_cnt, (long long)m->status,
-               (long long)m->misc, optype, err);
- 
-       debugf0("%s", msg);
- 
-       csrow = pvt->csrow_map[channel][dimm];
+       snprintf(msg, sizeof(msg), "count=%d %s", core_err_cnt, optype);
   
-       /* Call the helper to output message */
-       if (m->mcgstatus & 1)
-               edac_mc_handle_fbd_ue(mci, csrow, 0,
-                               0 /* FIXME: should be channel here */, msg);
-       else if (!pvt->is_registered)
-               edac_mc_handle_fbd_ce(mci, csrow,
-                               0 /* FIXME: should be channel here */, msg);
- 
-       kfree(msg);
+       /*
+        * Call the helper to output message
+        * FIXME: what to do if core_err_cnt > 1? Currently, it generates
+        * only one event
+        */
+       if (uncorrected_error || !pvt->is_registered)
+               edac_mc_handle_error(tp_event, mci,
+                                    m->addr >> PAGE_SHIFT,
+                                    m->addr & ~PAGE_MASK,
+                                    syndrome,
+                                    channel, dimm, -1,
+                                    err, msg, m);
   }
   
   /*
@@@ -2132,7 -2014,7 +2014,7 @@@ static int set_sdram_scrub_rate(struct 
   
   /*
    * get_sdram_scrub_rate               This routine convert current scrub rate value
- - *                            into byte/sec bandwidth accourding to
+ + *                            into byte/sec bandwidth according to
    *                            SCRUBINTERVAL formula found in datasheet.
    */
   static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
@@@ -2252,15 -2134,19 +2134,19 @@@ static int i7core_register_mci(struct i
   {
         struct mem_ctl_info *mci;
         struct i7core_pvt *pvt;
-       int rc, channels, csrows;
- 
-       /* Check the number of active and not disabled channels */
-       rc = i7core_get_active_channels(i7core_dev->socket, &channels, &csrows);
-       if (unlikely(rc < 0))
-               return rc;
+       int rc;
+       struct edac_mc_layer layers[2];
   
         /* allocate a new MC control structure */
-       mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, i7core_dev->socket);
+ 
+       layers[0].type = EDAC_MC_LAYER_CHANNEL;
+       layers[0].size = NUM_CHANS;
+       layers[0].is_virt_csrow = false;
+       layers[1].type = EDAC_MC_LAYER_SLOT;
+       layers[1].size = MAX_DIMMS;
+       layers[1].is_virt_csrow = true;
+       mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers,
+                           sizeof(*pvt));
         if (unlikely(!mci))
                 return -ENOMEM;
   
diff --combined drivers/edac/sb_edac.c

index 123204f,e834dfd..4adaf4b
--- 1/drivers/edac/sb_edac.c
--- 2/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@@ -58,7 -58,7 +58,7 @@@ static int probed
   
   /*
    * FIXME: For now, let's order by device function, as it makes
- - * easier for driver's development proccess. This table should be
+ + * easier for driver's development process. This table should be
    * moved to pci_id.h when submitted upstream
    */
   #define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0      0x3cf4  /* 12.6 */
@@@ -314,8 -314,6 +314,6 @@@ struct sbridge_pvt 
         struct sbridge_info     info;
         struct sbridge_channel  channel[NUM_CHANNELS];
   
-       int                     csrow_map[NUM_CHANNELS][MAX_DIMMS];
- 
         /* Memory type detection */
         bool                    is_mirrored, is_lockstep, is_close_pg;
   
@@@ -375,7 -373,7 +373,7 @@@ static DEFINE_PCI_DEVICE_TABLE(sbridge_
   
   
   /****************************************************************************
- -                      Anciliary status routines
+ +                      Ancillary status routines
    ****************************************************************************/
   
   static inline int numrank(u32 mtr)
@@@ -487,29 -485,14 +485,14 @@@ static struct pci_dev *get_pdev_slot_fu
   }
   
   /**
-  * sbridge_get_active_channels() - gets the number of channels and csrows
+  * check_if_ecc_is_active() - Checks if ECC is active
    * bus:               Device bus
-  * @channels: Number of channels that will be returned
-  * @csrows:   Number of csrows found
-  *
-  * Since EDAC core needs to know in advance the number of available channels
-  * and csrows, in order to allocate memory for csrows/channels, it is needed
-  * to run two similar steps. At the first step, implemented on this function,
-  * it checks the number of csrows/channels present at one socket, identified
-  * by the associated PCI bus.
-  * this is used in order to properly allocate the size of mci components.
-  * Note: one csrow is one dimm.
    */
- static int sbridge_get_active_channels(const u8 bus, unsigned *channels,
-                                     unsigned *csrows)
+ static int check_if_ecc_is_active(const u8 bus)
   {
         struct pci_dev *pdev = NULL;
-       int i, j;
         u32 mcmtr;
   
-       *channels = 0;
-       *csrows = 0;
- 
         pdev = get_pdev_slot_func(bus, 15, 0);
         if (!pdev) {
                 sbridge_printk(KERN_ERR, "Couldn't find PCI device "
@@@ -523,41 -506,14 +506,14 @@@
                 sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
                 return -ENODEV;
         }
- 
-       for (i = 0; i < NUM_CHANNELS; i++) {
-               u32 mtr;
- 
-               /* Device 15 functions 2 - 5  */
-               pdev = get_pdev_slot_func(bus, 15, 2 + i);
-               if (!pdev) {
-                       sbridge_printk(KERN_ERR, "Couldn't find PCI device "
-                                                "%2x.%02d.%d!!!\n",
-                                                bus, 15, 2 + i);
-                       return -ENODEV;
-               }
-               (*channels)++;
- 
-               for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
-                       pci_read_config_dword(pdev, mtr_regs[j], &mtr);
-                       debugf1("Bus#%02x channel #%d  MTR%d = %x\n", bus, i, j, mtr);
-                       if (IS_DIMM_PRESENT(mtr))
-                               (*csrows)++;
-               }
-       }
- 
-       debugf0("Number of active channels: %d, number of active dimms: %d\n",
-               *channels, *csrows);
- 
         return 0;
   }
   
- static int get_dimm_config(const struct mem_ctl_info *mci)
+ static int get_dimm_config(struct mem_ctl_info *mci)
   {
         struct sbridge_pvt *pvt = mci->pvt_info;
-       struct csrow_info *csr;
+       struct dimm_info *dimm;
         int i, j, banks, ranks, rows, cols, size, npages;
-       int csrow = 0;
-       unsigned long last_page = 0;
         u32 reg;
         enum edac_type mode;
         enum mem_type mtype;
@@@ -616,6 -572,8 +572,8 @@@
                 u32 mtr;
   
                 for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
+                       dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
+                                      i, j, 0);
                         pci_read_config_dword(pvt->pci_tad[i],
                                               mtr_regs[j], &mtr);
                         debugf4("Channel #%d  MTR%d = %x\n", i, j, mtr);
@@@ -634,29 -592,15 +592,15 @@@
                                         pvt->sbridge_dev->mc, i, j,
                                         size, npages,
                                         banks, ranks, rows, cols);
-                               csr = &mci->csrows[csrow];
- 
-                               csr->first_page = last_page;
-                               csr->last_page = last_page + npages - 1;
-                               csr->page_mask = 0UL;   /* Unused */
-                               csr->nr_pages = npages;
-                               csr->grain = 32;
-                               csr->csrow_idx = csrow;
-                               csr->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
-                               csr->ce_count = 0;
-                               csr->ue_count = 0;
-                               csr->mtype = mtype;
-                               csr->edac_mode = mode;
-                               csr->nr_channels = 1;
-                               csr->channels[0].chan_idx = i;
-                               csr->channels[0].ce_count = 0;
-                               pvt->csrow_map[i][j] = csrow;
-                               snprintf(csr->channels[0].label,
-                                        sizeof(csr->channels[0].label),
+ 
+                               dimm->nr_pages = npages;
+                               dimm->grain = 32;
+                               dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
+                               dimm->mtype = mtype;
+                               dimm->edac_mode = mode;
+                               snprintf(dimm->label, sizeof(dimm->label),
                                          "CPU_SrcID#%u_Channel#%u_DIMM#%u",
                                          pvt->sbridge_dev->source_id, i, j);
-                               last_page += npages;
-                               csrow++;
                         }
                 }
         }
@@@ -844,11 -788,10 +788,10 @@@ static int get_memory_error_data(struc
                                  u8 *socket,
                                  long *channel_mask,
                                  u8 *rank,
-                                char *area_type)
+                                char **area_type, char *msg)
   {
         struct mem_ctl_info     *new_mci;
         struct sbridge_pvt *pvt = mci->pvt_info;
-       char                    msg[256];
         int                     n_rir, n_sads, n_tads, sad_way, sck_xch;
         int                     sad_interl, idx, base_ch;
         int                     interleave_mode;
@@@ -870,12 -813,10 +813,10 @@@
          */
         if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) {
                 sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr);
-               edac_mc_handle_ce_no_info(mci, msg);
                 return -EINVAL;
         }
         if (addr >= (u64)pvt->tohm) {
                 sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr);
-               edac_mc_handle_ce_no_info(mci, msg);
                 return -EINVAL;
         }
   
@@@ -892,7 -833,6 +833,6 @@@
                 limit = SAD_LIMIT(reg);
                 if (limit <= prv) {
                         sprintf(msg, "Can't discover the memory socket");
-                       edac_mc_handle_ce_no_info(mci, msg);
                         return -EINVAL;
                 }
                 if  (addr <= limit)
@@@ -901,10 -841,9 +841,9 @@@
         }
         if (n_sads == MAX_SAD) {
                 sprintf(msg, "Can't discover the memory socket");
-               edac_mc_handle_ce_no_info(mci, msg);
                 return -EINVAL;
         }
-       area_type = get_dram_attr(reg);
+       *area_type = get_dram_attr(reg);
         interleave_mode = INTERLEAVE_MODE(reg);
   
         pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
@@@ -942,7 -881,6 +881,6 @@@
                 break;
         default:
                 sprintf(msg, "Can't discover socket interleave");
-               edac_mc_handle_ce_no_info(mci, msg);
                 return -EINVAL;
         }
         *socket = sad_interleave[idx];
@@@ -957,7 -895,6 +895,6 @@@
         if (!new_mci) {
                 sprintf(msg, "Struct for socket #%u wasn't initialized",
                         *socket);
-               edac_mc_handle_ce_no_info(mci, msg);
                 return -EINVAL;
         }
         mci = new_mci;
@@@ -973,7 -910,6 +910,6 @@@
                 limit = TAD_LIMIT(reg);
                 if (limit <= prv) {
                         sprintf(msg, "Can't discover the memory channel");
-                       edac_mc_handle_ce_no_info(mci, msg);
                         return -EINVAL;
                 }
                 if  (addr <= limit)
@@@ -1013,7 -949,6 +949,6 @@@
                 break;
         default:
                 sprintf(msg, "Can't discover the TAD target");
-               edac_mc_handle_ce_no_info(mci, msg);
                 return -EINVAL;
         }
         *channel_mask = 1 << base_ch;
@@@ -1027,7 -962,6 +962,6 @@@
                         break;
                 default:
                         sprintf(msg, "Invalid mirror set. Can't decode addr");
-                       edac_mc_handle_ce_no_info(mci, msg);
                         return -EINVAL;
                 }
         } else
@@@ -1055,7 -989,6 +989,6 @@@
         if (offset > addr) {
                 sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!",
                         offset, addr);
-               edac_mc_handle_ce_no_info(mci, msg);
                 return -EINVAL;
         }
         addr -= offset;
@@@ -1095,7 -1028,6 +1028,6 @@@
         if (n_rir == MAX_RIR_RANGES) {
                 sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx",
                         ch_addr);
-               edac_mc_handle_ce_no_info(mci, msg);
                 return -EINVAL;
         }
         rir_way = RIR_WAY(reg);
@@@ -1409,7 -1341,8 +1341,8 @@@ static void sbridge_mce_output_error(st
   {
         struct mem_ctl_info *new_mci;
         struct sbridge_pvt *pvt = mci->pvt_info;
-       char *type, *optype, *msg, *recoverable_msg;
+       enum hw_event_mc_err_type tp_event;
+       char *type, *optype, msg[256];
         bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
         bool overflow = GET_BITFIELD(m->status, 62, 62);
         bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@@@ -1421,16 -1354,24 +1354,24 @@@
         u32 optypenum = GET_BITFIELD(m->status, 4, 6);
         long channel_mask, first_channel;
         u8  rank, socket;
-       int csrow, rc, dimm;
-       char *area_type = "Unknown";
- 
-       if (ripv)
-               type = "NON_FATAL";
-       else
-               type = "FATAL";
+       int rc, dimm;
+       char *area_type = NULL;
+ 
+       if (uncorrected_error) {
+               if (ripv) {
+                       type = "FATAL";
+                       tp_event = HW_EVENT_ERR_FATAL;
+               } else {
+                       type = "NON_FATAL";
+                       tp_event = HW_EVENT_ERR_UNCORRECTED;
+               }
+       } else {
+               type = "CORRECTED";
+               tp_event = HW_EVENT_ERR_CORRECTED;
+       }
   
         /*
- -       * According with Table 15-9 of the Intel Archictecture spec vol 3A,
+ +       * According with Table 15-9 of the Intel Architecture spec vol 3A,
          * memory errors should fit in this mask:
          *      000f 0000 1mmm cccc (binary)
          * where:
@@@ -1445,19 -1386,19 +1386,19 @@@
         } else {
                 switch (optypenum) {
                 case 0:
-                       optype = "generic undef request";
+                       optype = "generic undef request error";
                         break;
                 case 1:
-                       optype = "memory read";
+                       optype = "memory read error";
                         break;
                 case 2:
-                       optype = "memory write";
+                       optype = "memory write error";
                         break;
                 case 3:
-                       optype = "addr/cmd";
+                       optype = "addr/cmd error";
                         break;
                 case 4:
-                       optype = "memory scrubbing";
+                       optype = "memory scrubbing error";
                         break;
                 default:
                         optype = "reserved";
@@@ -1466,13 -1407,13 +1407,13 @@@
         }
   
         rc = get_memory_error_data(mci, m->addr, &socket,
-                                  &channel_mask, &rank, area_type);
+                                  &channel_mask, &rank, &area_type, msg);
         if (rc < 0)
-               return;
+               goto err_parsing;
         new_mci = get_mci_for_node_id(socket);
         if (!new_mci) {
-               edac_mc_handle_ce_no_info(mci, "Error: socket got corrupted!");
-               return;
+               strcpy(msg, "Error: socket got corrupted!");
+               goto err_parsing;
         }
         mci = new_mci;
         pvt = mci->pvt_info;
@@@ -1486,45 -1427,39 +1427,39 @@@
         else
                 dimm = 2;
   
-       csrow = pvt->csrow_map[first_channel][dimm];
- 
-       if (uncorrected_error && recoverable)
-               recoverable_msg = " recoverable";
-       else
-               recoverable_msg = "";
   
         /*
-        * FIXME: What should we do with "channel" information on mcelog?
-        * Probably, we can just discard it, as the channel information
-        * comes from the get_memory_error_data() address decoding
+        * FIXME: On some memory configurations (mirror, lockstep), the
+        * Memory Controller can't point the error to a single DIMM. The
+        * EDAC core should be handling the channel mask, in order to point
+        * to the group of dimm's where the error may be happening.
          */
-       msg = kasprintf(GFP_ATOMIC,
-                       "%d %s error(s): %s on %s area %s%s: cpu=%d Err=%04x:%04x (ch=%d), "
-                       "addr = 0x%08llx => socket=%d, Channel=%ld(mask=%ld), rank=%d\n",
-                       core_err_cnt,
-                       area_type,
-                       optype,
-                       type,
-                       recoverable_msg,
-                       overflow ? "OVERFLOW" : "",
-                       m->cpu,
-                       mscod, errcode,
-                       channel,                /* 1111b means not specified */
-                       (long long) m->addr,
-                       socket,
-                       first_channel,          /* This is the real channel on SB */
-                       channel_mask,
-                       rank);
+       snprintf(msg, sizeof(msg),
+                "count:%d%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d",
+                core_err_cnt,
+                overflow ? " OVERFLOW" : "",
+                (uncorrected_error && recoverable) ? " recoverable" : "",
+                area_type,
+                mscod, errcode,
+                socket,
+                channel_mask,
+                rank);
   
         debugf0("%s", msg);
   
+       /* FIXME: need support for channel mask */
+ 
         /* Call the helper to output message */
-       if (uncorrected_error)
-               edac_mc_handle_fbd_ue(mci, csrow, 0, 0, msg);
-       else
-               edac_mc_handle_fbd_ce(mci, csrow, 0, msg);
+       edac_mc_handle_error(tp_event, mci,
+                            m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
+                            channel, dimm, -1,
+                            optype, msg, m);
+       return;
+ err_parsing:
+       edac_mc_handle_error(tp_event, mci, 0, 0, 0,
+                            -1, -1, -1,
+                            msg, "", m);
   
-       kfree(msg);
   }
   
   /*
@@@ -1683,16 -1618,25 +1618,25 @@@ static void sbridge_unregister_mci(stru
   static int sbridge_register_mci(struct sbridge_dev *sbridge_dev)
   {
         struct mem_ctl_info *mci;
+       struct edac_mc_layer layers[2];
         struct sbridge_pvt *pvt;
-       int rc, channels, csrows;
+       int rc;
   
         /* Check the number of active and not disabled channels */
-       rc = sbridge_get_active_channels(sbridge_dev->bus, &channels, &csrows);
+       rc = check_if_ecc_is_active(sbridge_dev->bus);
         if (unlikely(rc < 0))
                 return rc;
   
         /* allocate a new MC control structure */
-       mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, sbridge_dev->mc);
+       layers[0].type = EDAC_MC_LAYER_CHANNEL;
+       layers[0].size = NUM_CHANNELS;
+       layers[0].is_virt_csrow = false;
+       layers[1].type = EDAC_MC_LAYER_SLOT;
+       layers[1].size = MAX_DIMMS;
+       layers[1].is_virt_csrow = true;
+       mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers,
+                           sizeof(*pvt));
+ 
         if (unlikely(!mci))
                 return -ENOMEM;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 30 May 2012 01:32:37 +0000 (18:32 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 30 May 2012 01:32:37 +0000 (18:32 -0700)
		1	2
drivers/edac/edac_core.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/edac/edac_device.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/edac/i7core_edac.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/edac/sb_edac.c	patch \|	diff1 \|	diff2 \|	blob \| history