#define EDAC_MC_LABEL_LEN 31
#define MC_PROC_NAME_MAX_LEN 7
-/* memory devices */
+/**
+ * enum dev_type - describe the type of memory DRAM chips used at the stick
+ * @DEV_UNKNOWN: Can't be determined, or MC doesn't support detect it
+ * @DEV_X1: 1 bit for data
+ * @DEV_X2: 2 bits for data
+ * @DEV_X4: 4 bits for data
+ * @DEV_X8: 8 bits for data
+ * @DEV_X16: 16 bits for data
+ * @DEV_X32: 32 bits for data
+ * @DEV_X64: 64 bits for data
+ *
+ * Typical values are x4 and x8.
+ */
enum dev_type {
DEV_UNKNOWN = 0,
DEV_X1,
#define DEV_FLAG_X64 BIT(DEV_X64)
/**
+ * enum hw_event_mc_err_type - type of the detected error
+ *
+ * @HW_EVENT_ERR_CORRECTED: Corrected Error - Indicates that an ECC
+ * corrected error was detected
+ * @HW_EVENT_ERR_UNCORRECTED: Uncorrected Error - Indicates an error that
+ * can't be corrected by ECC, but it is not
+ * fatal (maybe it is on an unused memory area,
+ * or the memory controller could recover from
+ * it for example, by re-trying the operation).
+ * @HW_EVENT_ERR_FATAL: Fatal Error - Uncorrected error that could not
+ * be recovered.
+ */
+enum hw_event_mc_err_type {
+ HW_EVENT_ERR_CORRECTED,
+ HW_EVENT_ERR_UNCORRECTED,
+ HW_EVENT_ERR_FATAL,
+};
+
+/**
* enum mem_type - memory types. For a more detailed reference, please see
* http://en.wikipedia.org/wiki/DRAM
*
#define MEM_FLAG_DDR3 BIT(MEM_DDR3)
#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
-/* chipset Error Detection and Correction capabilities and mode */
+/**
+ * enum edac-type - Error Detection and Correction capabilities and mode
+ * @EDAC_UNKNOWN: Unknown if ECC is available
+ * @EDAC_NONE: Doesn't support ECC
+ * @EDAC_RESERVED: Reserved ECC type
+ * @EDAC_PARITY: Detects parity errors
+ * @EDAC_EC: Error Checking - no correction
+ * @EDAC_SECDED: Single bit error correction, Double detection
+ * @EDAC_S2ECD2ED: Chipkill x2 devices - do these exist?
+ * @EDAC_S4ECD4ED: Chipkill x4 devices
+ * @EDAC_S8ECD8ED: Chipkill x8 devices
+ * @EDAC_S16ECD16ED: Chipkill x16 devices
+ */
enum edac_type {
- EDAC_UNKNOWN = 0, /* Unknown if ECC is available */
- EDAC_NONE, /* Doesn't support ECC */
- EDAC_RESERVED, /* Reserved ECC type */
- EDAC_PARITY, /* Detects parity errors */
- EDAC_EC, /* Error Checking - no correction */
- EDAC_SECDED, /* Single bit error correction, Double detection */
- EDAC_S2ECD2ED, /* Chipkill x2 devices - do these exist? */
- EDAC_S4ECD4ED, /* Chipkill x4 devices */
- EDAC_S8ECD8ED, /* Chipkill x8 devices */
- EDAC_S16ECD16ED, /* Chipkill x16 devices */
+ EDAC_UNKNOWN = 0,
+ EDAC_NONE,
+ EDAC_RESERVED,
+ EDAC_PARITY,
+ EDAC_EC,
+ EDAC_SECDED,
+ EDAC_S2ECD2ED,
+ EDAC_S4ECD4ED,
+ EDAC_S8ECD8ED,
+ EDAC_S16ECD16ED,
};
#define EDAC_FLAG_UNKNOWN BIT(EDAC_UNKNOWN)
#define EDAC_FLAG_S8ECD8ED BIT(EDAC_S8ECD8ED)
#define EDAC_FLAG_S16ECD16ED BIT(EDAC_S16ECD16ED)
-/* scrubbing capabilities */
+/**
+ * enum scrub_type - scrubbing capabilities
+ * @SCRUB_UNKNOWN Unknown if scrubber is available
+ * @SCRUB_NONE: No scrubber
+ * @SCRUB_SW_PROG: SW progressive (sequential) scrubbing
+ * @SCRUB_SW_SRC: Software scrub only errors
+ * @SCRUB_SW_PROG_SRC: Progressive software scrub from an error
+ * @SCRUB_SW_TUNABLE: Software scrub frequency is tunable
+ * @SCRUB_HW_PROG: HW progressive (sequential) scrubbing
+ * @SCRUB_HW_SRC: Hardware scrub only errors
+ * @SCRUB_HW_PROG_SRC: Progressive hardware scrub from an error
+ * SCRUB_HW_TUNABLE: Hardware scrub frequency is tunable
+ */
enum scrub_type {
- SCRUB_UNKNOWN = 0, /* Unknown if scrubber is available */
- SCRUB_NONE, /* No scrubber */
- SCRUB_SW_PROG, /* SW progressive (sequential) scrubbing */
- SCRUB_SW_SRC, /* Software scrub only errors */
- SCRUB_SW_PROG_SRC, /* Progressive software scrub from an error */
- SCRUB_SW_TUNABLE, /* Software scrub frequency is tunable */
- SCRUB_HW_PROG, /* HW progressive (sequential) scrubbing */
- SCRUB_HW_SRC, /* Hardware scrub only errors */
- SCRUB_HW_PROG_SRC, /* Progressive hardware scrub from an error */
- SCRUB_HW_TUNABLE /* Hardware scrub frequency is tunable */
+ SCRUB_UNKNOWN = 0,
+ SCRUB_NONE,
+ SCRUB_SW_PROG,
+ SCRUB_SW_SRC,
+ SCRUB_SW_PROG_SRC,
+ SCRUB_SW_TUNABLE,
+ SCRUB_HW_PROG,
+ SCRUB_HW_SRC,
+ SCRUB_HW_PROG_SRC,
+ SCRUB_HW_TUNABLE
};
#define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG)
*/
/**
+ * enum edac_mc_layer - memory controller hierarchy layer
+ *
+ * @EDAC_MC_LAYER_BRANCH: memory layer is named "branch"
+ * @EDAC_MC_LAYER_CHANNEL: memory layer is named "channel"
+ * @EDAC_MC_LAYER_SLOT: memory layer is named "slot"
+ * @EDAC_MC_LAYER_CHIP_SELECT: memory layer is named "chip select"
+ *
+ * This enum is used by the drivers to tell edac_mc_sysfs what name should
+ * be used when describing a memory stick location.
+ */
+enum edac_mc_layer_type {
+ EDAC_MC_LAYER_BRANCH,
+ EDAC_MC_LAYER_CHANNEL,
+ EDAC_MC_LAYER_SLOT,
+ EDAC_MC_LAYER_CHIP_SELECT,
+};
+
+/**
+ * struct edac_mc_layer - describes the memory controller hierarchy
+ * @layer: layer type
+ * @size: number of components per layer. For example,
+ * if the channel layer has two channels, size = 2
+ * @is_virt_csrow: This layer is part of the "csrow" when old API
+ * compatibility mode is enabled. Otherwise, it is
+ * a channel
+ */
+struct edac_mc_layer {
+ enum edac_mc_layer_type type;
+ unsigned size;
+ bool is_virt_csrow;
+};
+
+/*
+ * Maximum number of layers used by the memory controller to uniquely
+ * identify a single memory stick.
+ * NOTE: Changing this constant requires not only to change the constant
+ * below, but also to change the existing code at the core, as there are
+ * some code there that are optimized for 3 layers.
+ */
+#define EDAC_MAX_LAYERS 3
+
+/**
+ * EDAC_DIMM_PTR - Macro responsible to find a pointer inside a pointer array
+ * for the element given by [layer0,layer1,layer2] position
+ *
+ * @layers: a struct edac_mc_layer array, describing how many elements
+ * were allocated for each layer
+ * @var: name of the var where we want to get the pointer
+ * (like mci->dimms)
+ * @n_layers: Number of layers at the @layers array
+ * @layer0: layer0 position
+ * @layer1: layer1 position. Unused if n_layers < 2
+ * @layer2: layer2 position. Unused if n_layers < 3
+ *
+ * For 1 layer, this macro returns &var[layer0]
+ * For 2 layers, this macro is similar to allocate a bi-dimensional array
+ * and to return "&var[layer0][layer1]"
+ * For 3 layers, this macro is similar to allocate a tri-dimensional array
+ * and to return "&var[layer0][layer1][layer2]"
+ *
+ * A loop could be used here to make it more generic, but, as we only have
+ * 3 layers, this is a little faster.
+ * By design, layers can never be 0 or more than 3. If that ever happens,
+ * a NULL is returned, causing an OOPS during the memory allocation routine,
+ * with would point to the developer that he's doing something wrong.
+ */
+#define EDAC_DIMM_PTR(layers, var, nlayers, layer0, layer1, layer2) ({ \
+ typeof(var) __p; \
+ if ((nlayers) == 1) \
+ __p = &var[layer0]; \
+ else if ((nlayers) == 2) \
+ __p = &var[(layer1) + ((layers[1]).size * (layer0))]; \
+ else if ((nlayers) == 3) \
+ __p = &var[(layer2) + ((layers[2]).size * ((layer1) + \
+ ((layers[1]).size * (layer0))))]; \
+ else \
+ __p = NULL; \
+ __p; \
+})
+
+
+/* FIXME: add the proper per-location error counts */
+struct dimm_info {
+ char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
+
+ /* Memory location data */
+ unsigned location[EDAC_MAX_LAYERS];
+
+ struct mem_ctl_info *mci; /* the parent */
+
+ u32 grain; /* granularity of reported error in bytes */
+ enum dev_type dtype; /* memory device type */
+ enum mem_type mtype; /* memory dimm type */
+ enum edac_type edac_mode; /* EDAC mode for this dimm */
+
+ u32 nr_pages; /* number of pages on this dimm */
+
+ unsigned csrow, cschannel; /* Points to the old API data */
+};
+
+/**
* struct rank_info - contains the information for one DIMM rank
*
* @chan_idx: channel number where the rank is (typically, 0 or 1)
* @ce_count: number of correctable errors for this rank
- * @label: DIMM label. Different ranks for the same DIMM should be
- * filled, on userspace, with the same label.
- * FIXME: The core currently won't enforce it.
* @csrow: A pointer to the chip select row structure (the parent
* structure). The location of the rank is given by
* the (csrow->csrow_idx, chan_idx) vector.
+ * @dimm: A pointer to the DIMM structure, where the DIMM label
+ * information is stored.
+ *
+ * FIXME: Currently, the EDAC core model will assume one DIMM per rank.
+ * This is a bad assumption, but it makes this patch easier. Later
+ * patches in this series will fix this issue.
*/
struct rank_info {
int chan_idx;
- u32 ce_count;
- char label[EDAC_MC_LABEL_LEN + 1];
- struct csrow_info *csrow; /* the parent */
+ struct csrow_info *csrow;
+ struct dimm_info *dimm;
+
+ u32 ce_count; /* Correctable Errors for this csrow */
};
struct csrow_info {
- unsigned long first_page; /* first page number in dimm */
- unsigned long last_page; /* last page number in dimm */
+ /* Used only by edac_mc_find_csrow_by_page() */
+ unsigned long first_page; /* first page number in csrow */
+ unsigned long last_page; /* last page number in csrow */
unsigned long page_mask; /* used for interleaving -
- * 0UL for non intlv
- */
- u32 nr_pages; /* number of pages in csrow */
- u32 grain; /* granularity of reported error in bytes */
- int csrow_idx; /* the chip-select row */
- enum dev_type dtype; /* memory device type */
+ * 0UL for non intlv */
+
+ int csrow_idx; /* the chip-select row */
+
u32 ue_count; /* Uncorrectable Errors for this csrow */
u32 ce_count; /* Correctable Errors for this csrow */
- enum mem_type mtype; /* memory csrow type */
- enum edac_type edac_mode; /* EDAC mode for this csrow */
+
struct mem_ctl_info *mci; /* the parent */
struct kobject kobj; /* sysfs kobject for this csrow */
unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
unsigned long page);
int mc_idx;
- int nr_csrows;
struct csrow_info *csrows;
+ unsigned nr_csrows, num_cschannel;
+
+ /* Memory Controller hierarchy */
+ unsigned n_layers;
+ struct edac_mc_layer *layers;
+ bool mem_is_per_rank;
+
+ /*
+ * DIMM info. Will eventually remove the entire csrows_info some day
+ */
+ unsigned tot_dimms;
+ struct dimm_info *dimms;
+
/*
* FIXME - what about controllers on other busses? - IDs must be
* unique. dev pointer should be sufficiently unique, but
* BUS:SLOT.FUNC numbers may not be unique.
*/
- struct device *dev;
+ struct device *pdev;
const char *mod_name;
const char *mod_ver;
const char *ctl_name;
const char *dev_name;
char proc_name[MC_PROC_NAME_MAX_LEN + 1];
void *pvt_info;
- u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */
- u32 ce_noinfo_count; /* Correctable Errors w/o info */
- u32 ue_count; /* Total Uncorrectable Errors for this MC */
- u32 ce_count; /* Total Correctable Errors for this MC */
unsigned long start_time; /* mci load start time (in jiffies) */
+ /*
+ * drivers shouldn't access those fields directly, as the core
+ * already handles that.
+ */
+ u32 ce_noinfo_count, ue_noinfo_count;
+ u32 ue_mc, ce_mc;
+ u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
+
struct completion complete;
/* edac sysfs device control */
* by the low level driver.
*
* Set by the low level driver to provide attributes at the
- * controller level, same level as 'ue_count' and 'ce_count' above.
+ * controller level.
* An array of structures, NULL terminated
*
* If attributes are desired, then set to array of attributes