Subject: EDAC From: Benjamin Herrenschmidt Haven't had a chance to test it... can't error inject and I just finished backporting it. Apply edac-add-xdr.diff then edac-add-cell-ecc.diff It's not suitable for upstream, I'm waiting for the EDAC folks to submit the updated core first. Signed-off-by: Arnd Bergmann --- --=-N2b/mdp0UWZQBJVXOLrV Content-Disposition: attachment; filename=edac-add-xdr.diff Content-Type: text/x-patch; name=edac-add-xdr.diff; charset=UTF-8 Content-Transfer-Encoding: 7bit Index: linux-2.6/drivers/edac/edac_mc.c =================================================================== --- linux-2.6.orig/drivers/edac/edac_mc.c +++ linux-2.6/drivers/edac/edac_mc.c @@ -78,7 +78,8 @@ static const char *mem_types[] = { [MEM_RDR] = "Registered-SDR", [MEM_DDR] = "Unbuffered-DDR", [MEM_RDDR] = "Registered-DDR", - [MEM_RMBS] = "RMBS" + [MEM_RMBS] = "RMBS", + [MEM_XDR] = "XDR", }; static const char *dev_types[] = { Index: linux-2.6/drivers/edac/edac_mc.h =================================================================== --- linux-2.6.orig/drivers/edac/edac_mc.h +++ linux-2.6/drivers/edac/edac_mc.h @@ -84,11 +84,15 @@ extern int edac_debug_level; #define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \ PCI_DEVICE_ID_ ## vend ## _ ## dev +#if 0 /* That hack is dead in newer EDAC stack */ #if defined(CONFIG_X86) && defined(CONFIG_PCI) #define dev_name(dev) pci_name(to_pci_dev(dev)) #else #define dev_name(dev) to_platform_device(dev)->name #endif +#else +#define dev_name(dev) ((dev)->bus_id) +#endif /* memory devices */ enum dev_type { @@ -126,6 +130,7 @@ enum mem_type { MEM_RMBS, /* Rambus DRAM */ MEM_DDR2, /* DDR2 RAM */ MEM_FB_DDR2, /* fully buffered DDR2 */ + MEM_XDR, /* Rambus XDR for Cell BE */ }; #define MEM_FLAG_EMPTY BIT(MEM_EMPTY) @@ -141,6 +146,7 @@ enum mem_type { #define MEM_FLAG_RMBS BIT(MEM_RMBS) #define MEM_FLAG_DDR2 BIT(MEM_DDR2) #define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2) +#define MEM_FLAG_XDR BIT(MEM_XDR) /* chipset Error Detection and Correction capabilities and mode */ enum edac_type { Index: linux-2.6/arch/powerpc/platforms/cell/cbe_regs.h =================================================================== --- linux-2.6.orig/arch/powerpc/platforms/cell/cbe_regs.h +++ linux-2.6/arch/powerpc/platforms/cell/cbe_regs.h @@ -240,16 +240,60 @@ struct cbe_mic_tm_regs { u64 slow_fast_timer_0; /* 0x0090 */ u64 slow_next_timer_0; /* 0x0098 */ - u8 pad_0x00a0_0x01c0[0x01c0 - 0x0a0]; /* 0x00a0 */ + u8 pad_0x00a0_0x00f8[0x00f8 - 0x00a0]; /* 0x00a0 */ + u64 mic_df_ecc_address_0; /* 0x00f8 */ + + u8 pad_0x0100_0x01b8[0x01b8 - 0x0100]; /* 0x0100 */ + u64 mic_df_ecc_address_1; /* 0x01b8 */ u64 mic_ctl_cnfg_1; /* 0x01c0 */ #define CBE_MIC_DISABLE_PWR_SAV_1 0x8000000000000000LL + u64 pad_0x01c8; /* 0x01c8 */ u64 slow_fast_timer_1; /* 0x01d0 */ u64 slow_next_timer_1; /* 0x01d8 */ - u8 pad_0x01e0_0x1000[0x1000 - 0x01e0]; /* 0x01e0 */ + u8 pad_0x01e0_0x0208[0x0208 - 0x01e0]; /* 0x01e0 */ + u64 mic_exc; /* 0x0208 */ +#define CBE_MIC_EXC_BLOCK_SCRUB 0x0800000000000000ULL +#define CBE_MIC_EXC_FAST_SCRUB 0x0100000000000000ULL + + u64 mic_mnt_cfg; /* 0x0210 */ +#define CBE_MIC_MNT_CFG_CHAN_0_POP 0x0002000000000000ULL +#define CBE_MIC_MNT_CFG_CHAN_1_POP 0x0004000000000000ULL + + u64 mic_df_config; /* 0x0218 */ +#define CBE_MIC_ECC_DISABLE_0 0x4000000000000000ULL +#define CBE_MIC_ECC_REP_SINGLE_0 0x2000000000000000ULL +#define CBE_MIC_ECC_DISABLE_1 0x0080000000000000ULL +#define CBE_MIC_ECC_REP_SINGLE_1 0x0040000000000000ULL + + u8 pad_0x0220_0x0230[0x0230 - 0x0220]; /* 0x0220 */ + u64 mic_fir; /* 0x0230 */ +#define CBE_MIC_FIR_ECC_SINGLE_0_ERR 0x0200000000000000ULL +#define CBE_MIC_FIR_ECC_MULTI_0_ERR 0x0100000000000000ULL +#define CBE_MIC_FIR_ECC_SINGLE_1_ERR 0x0080000000000000ULL +#define CBE_MIC_FIR_ECC_MULTI_1_ERR 0x0040000000000000ULL +#define CBE_MIC_FIR_ECC_ERR_MASK 0xffff000000000000ULL +#define CBE_MIC_FIR_ECC_SINGLE_0_CTE 0x0000020000000000ULL +#define CBE_MIC_FIR_ECC_MULTI_0_CTE 0x0000010000000000ULL +#define CBE_MIC_FIR_ECC_SINGLE_1_CTE 0x0000008000000000ULL +#define CBE_MIC_FIR_ECC_MULTI_1_CTE 0x0000004000000000ULL +#define CBE_MIC_FIR_ECC_CTE_MASK 0x0000ffff00000000ULL +#define CBE_MIC_FIR_ECC_SINGLE_0_RESET 0x0000000002000000ULL +#define CBE_MIC_FIR_ECC_MULTI_0_RESET 0x0000000001000000ULL +#define CBE_MIC_FIR_ECC_SINGLE_1_RESET 0x0000000000800000ULL +#define CBE_MIC_FIR_ECC_MULTI_1_RESET 0x0000000000400000ULL +#define CBE_MIC_FIR_ECC_RESET_MASK 0x00000000ffff0000ULL +#define CBE_MIC_FIR_ECC_SINGLE_0_SET 0x0000000000000200ULL +#define CBE_MIC_FIR_ECC_MULTI_0_SET 0x0000000000000100ULL +#define CBE_MIC_FIR_ECC_SINGLE_1_SET 0x0000000000000080ULL +#define CBE_MIC_FIR_ECC_MULTI_1_SET 0x0000000000000040ULL +#define CBE_MIC_FIR_ECC_SET_MASK 0x000000000000ffffULL + u64 mic_fir_debug; /* 0x0238 */ + + u8 pad_0x0240_0x1000[0x1000 - 0x0240]; /* 0x0240 */ }; extern struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np); Index: linux-2.6/arch/powerpc/platforms/cell/setup.c =================================================================== --- linux-2.6.orig/arch/powerpc/platforms/cell/setup.c +++ linux-2.6/arch/powerpc/platforms/cell/setup.c @@ -83,12 +83,22 @@ static void cell_progress(char *s, unsig static int __init cell_publish_devices(void) { + int node; + if (!machine_is(cell)) return 0; /* Publish OF platform devices for southbridge IOs */ of_platform_bus_probe(NULL, NULL, NULL); + /* There is no device for the MIC memory controller, thus we create + * a platform device for it to attach the EDAC driver to. + */ + for_each_online_node(node) { + if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL) + continue; + platform_device_register_simple("cbe-mic", node, NULL, 0); + } return 0; } device_initcall(cell_publish_devices); Index: linux-2.6/drivers/edac/Kconfig =================================================================== --- linux-2.6.orig/drivers/edac/Kconfig +++ linux-2.6/drivers/edac/Kconfig @@ -11,7 +11,7 @@ menu 'EDAC - error detection and reporti config EDAC tristate "EDAC core system error reporting (EXPERIMENTAL)" - depends on X86 && EXPERIMENTAL + depends on EXPERIMENTAL help EDAC is designed to report errors in the core system. These are low-level errors that are reported in the CPU or @@ -98,6 +98,13 @@ config EDAC_R82600 Support for error detection and correction on the Radisys 82600 embedded chipset. +config EDAC_CELL + tristate "Cell Broadband Engine memory controller" + depends on EDAC_MM_EDAC && PPC_CELL_NATIVE + help + Support for error detection and correction on the + Cell Broadband Engine internal memory controller + on platform without a hypervisor choice prompt "Error detecting method" depends on EDAC Index: linux-2.6/drivers/edac/Makefile =================================================================== --- linux-2.6.orig/drivers/edac/Makefile +++ linux-2.6/drivers/edac/Makefile @@ -15,4 +15,4 @@ obj-$(CONFIG_EDAC_E752X) += e752x_edac. obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o obj-$(CONFIG_EDAC_I82860) += i82860_edac.o obj-$(CONFIG_EDAC_R82600) += r82600_edac.o - +obj-$(CONFIG_EDAC_CELL) += cell-edac.o Index: linux-2.6/drivers/edac/cell-edac.c =================================================================== --- /dev/null +++ linux-2.6/drivers/edac/cell-edac.c @@ -0,0 +1,259 @@ +/* + * Cell MIC driver for ECC counting + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * + * + * This file may be distributed under the terms of the + * GNU General Public License. + */ +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include <../arch/powerpc/platforms/cell/cbe_regs.h> + +#include "edac_mc.h" + +struct cell_edac_priv +{ + struct cbe_mic_tm_regs __iomem *regs; + int node; + int chanmask; +#ifdef DEBUG + u64 prev_fir; +#endif +}; + +static void cell_edac_count_ce(struct mem_ctl_info *mci, int chan, u64 ar) +{ + struct cell_edac_priv *priv = mci->pvt_info; + struct csrow_info *csrow = &mci->csrows[0]; + unsigned long address, pfn, offset; + + dev_dbg(mci->dev, "ECC CE err on node %d, channel %d, ar = 0x%016lx\n", + priv->node, chan, ar); + + /* Address decoding is likely a bit bogus, to dbl check */ + address = (ar & 0xffffffffe0000000ul) >> 29; + if (priv->chanmask == 0x3) + address = (address << 1) | chan; + pfn = address >> PAGE_SHIFT; + offset = address & ~PAGE_MASK; + + /* TODO: Decoding of the error addresss */ + edac_mc_handle_ce(mci, csrow->first_page + pfn, offset, + 0, 0, chan, ""); +} + +static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar) +{ + struct cell_edac_priv *priv = mci->pvt_info; + struct csrow_info *csrow = &mci->csrows[0]; + unsigned long address, pfn, offset; + + dev_dbg(mci->dev, "ECC UE err on node %d, channel %d, ar = 0x%016lx\n", + priv->node, chan, ar); + + /* Address decoding is likely a bit bogus, to dbl check */ + address = (ar & 0xffffffffe0000000ul) >> 29; + if (priv->chanmask == 0x3) + address = (address << 1) | chan; + pfn = address >> PAGE_SHIFT; + offset = address & ~PAGE_MASK; + + /* TODO: Decoding of the error addresss */ + edac_mc_handle_ue(mci, csrow->first_page + pfn, offset, 0, ""); +} + +static void cell_edac_check(struct mem_ctl_info *mci) +{ + struct cell_edac_priv *priv = mci->pvt_info; + u64 fir, addreg, clear = 0; + + fir = in_be64(&priv->regs->mic_fir); +#ifdef DEBUG + if (fir != priv->prev_fir) { + dev_dbg(mci->dev, "fir change : 0x%016lx\n", fir); + priv->prev_fir = fir; + } +#endif + if ((priv->chanmask & 0x1) && (fir & CBE_MIC_FIR_ECC_SINGLE_0_ERR)) { + addreg = in_be64(&priv->regs->mic_df_ecc_address_0); + clear |= CBE_MIC_FIR_ECC_SINGLE_0_RESET; + cell_edac_count_ce(mci, 0, addreg); + } + if ((priv->chanmask & 0x2) && (fir & CBE_MIC_FIR_ECC_SINGLE_1_ERR)) { + addreg = in_be64(&priv->regs->mic_df_ecc_address_1); + clear |= CBE_MIC_FIR_ECC_SINGLE_1_RESET; + cell_edac_count_ce(mci, 1, addreg); + } + if ((priv->chanmask & 0x1) && (fir & CBE_MIC_FIR_ECC_MULTI_0_ERR)) { + addreg = in_be64(&priv->regs->mic_df_ecc_address_0); + clear |= CBE_MIC_FIR_ECC_MULTI_0_RESET; + cell_edac_count_ue(mci, 0, addreg); + } + if ((priv->chanmask & 0x2) && (fir & CBE_MIC_FIR_ECC_MULTI_1_ERR)) { + addreg = in_be64(&priv->regs->mic_df_ecc_address_1); + clear |= CBE_MIC_FIR_ECC_MULTI_1_RESET; + cell_edac_count_ue(mci, 1, addreg); + } + + /* The procedure for clearing FIR bits is a bit ... weird */ + if (clear) { + fir &= ~(CBE_MIC_FIR_ECC_ERR_MASK | CBE_MIC_FIR_ECC_SET_MASK); + fir |= CBE_MIC_FIR_ECC_RESET_MASK; + fir &= ~clear; + out_be64(&priv->regs->mic_fir, fir); + (void)in_be64(&priv->regs->mic_fir); + mb(); +#ifdef DEBUG + fir = in_be64(&priv->regs->mic_fir); + dev_dbg(mci->dev, "fir clear : 0x%016lx\n", fir); +#endif + } +} + +static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci) +{ + struct csrow_info *csrow = &mci->csrows[0]; + struct cell_edac_priv *priv = mci->pvt_info; + struct device_node *np; + + for (np = NULL; + (np = of_find_node_by_name(np, "memory")) != NULL;) { + struct resource r; + + /* We "know" that the Cell firmware only creates one entry + * in the "memory" nodes. If that changes, this code will + * need to be adapted. + */ + if (of_address_to_resource(np, 0, &r)) + continue; + if (of_node_to_nid(np) != priv->node) + continue; + csrow->first_page = r.start >> PAGE_SHIFT; + csrow->nr_pages = (r.end - r.start + 1) >> PAGE_SHIFT; + csrow->last_page = csrow->first_page + csrow->nr_pages - 1; + csrow->mtype = MEM_XDR; + csrow->edac_mode = EDAC_FLAG_EC | EDAC_FLAG_SECDED; + dev_dbg(mci->dev, + "Initialized on node %d, chanmask=0x%x," + " first_page=0x%lx, nr_pages=0x%x\n", + priv->node, priv->chanmask, + csrow->first_page, csrow->nr_pages); + break; + } +} + +static int __devinit cell_edac_probe(struct platform_device *pdev) +{ + struct cbe_mic_tm_regs __iomem *regs; + struct mem_ctl_info *mci; + struct cell_edac_priv *priv; + u64 reg; + int rc, chanmask; + + regs = cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(pdev->id)); + if (regs == NULL) + return -ENODEV; + + /* Get channel population */ + reg = in_be64(®s->mic_mnt_cfg); + dev_dbg(&pdev->dev, "MIC_MNT_CFG = 0x%016lx\n", reg); + chanmask = 0; + if (reg & CBE_MIC_MNT_CFG_CHAN_0_POP) + chanmask |= 0x1; + if (reg & CBE_MIC_MNT_CFG_CHAN_1_POP) + chanmask |= 0x2; + if (chanmask == 0) { + dev_warn(&pdev->dev, + "Yuck ! No channel populated ? Aborting !\n"); + return -ENODEV; + } + dev_dbg(&pdev->dev, "Initial FIR = 0x%016lx\n", + in_be64(®s->mic_fir)); + + /* Allocate & init EDAC MC data structure */ + mci = edac_mc_alloc(sizeof(struct cell_edac_priv), 1, + chanmask == 3 ? 2 : 1); + if (mci == NULL) + return -ENOMEM; + priv = mci->pvt_info; + priv->regs = regs; + priv->node = pdev->id; + priv->chanmask = chanmask; + mci->dev = &pdev->dev; + mci->mtype_cap = MEM_FLAG_XDR; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_EC | EDAC_FLAG_SECDED; + mci->mod_name = "cell-edac"; + mci->ctl_name = "MIC"; + mci->edac_check = cell_edac_check; + cell_edac_init_csrows(mci); + + /* Register with EDAC core */ + rc = edac_mc_add_mc(mci, priv->node); + if (rc) { + dev_err(&pdev->dev, "failed to register with EDAC core\n"); + edac_mc_free(mci); + return rc; + } + + return 0; +} + +static int __devexit cell_edac_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci; + + mci = edac_mc_del_mc(&pdev->dev); + if (mci) + edac_mc_free(mci); + return 0; +} + +static struct platform_driver cell_edac_driver = +{ + .driver = { + .name = "cbe-mic", + .owner = THIS_MODULE, + }, + .probe = cell_edac_probe, + .remove = cell_edac_remove, +}; + +static int __init cell_edac_init(void) +{ + /* Sanity check registers data structure */ + BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, + mic_df_ecc_address_0) != 0xf8); + BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, + mic_df_ecc_address_1) != 0x1b8); + BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, + mic_df_config) != 0x218); + BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, + mic_fir) != 0x230); + BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, + mic_mnt_cfg) != 0x210); + BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs, + mic_exc) != 0x208); + + return platform_driver_register(&cell_edac_driver); +} + +static void __exit cell_edac_exit(void) +{ + platform_driver_unregister(&cell_edac_driver); +} + +module_init(cell_edac_init); +module_exit(cell_edac_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Benjamin Herrenschmidt "); +MODULE_DESCRIPTION("ECC counting for Cell MIC");