From: Andrew Morton - Don't open-code on_each_cpu(). - uninline do_wrmsr(). It is slow, and large. - ditto do_rdmsr() - Drastically simplify do_rdmsr() and do_wrmsr() - on_each_cpu will do the right thing. - Remove zillions of unneeded casts of void* - The driver does lots of: if ((csrow = sys_addr_to_csrow(log_mci, error_address)) < 0) Please don't. Kernel-style prefers csrow = sys_addr_to_csrow(log_mci, error_address); if (csrow < 0) But I couldn't be assed fixing them all. The driver will need run-time testing again after these changes, sorry. With CONFIG_PREEMPT and CONFIG_DEBUG_PREEMPT so we pick up any smp_processor_id() bugs. On UP and SMP. Cc: Doug Thompson Cc: Alan Cox Cc: Andi Kleen Signed-off-by: Andrew Morton --- drivers/edac/k8_edac.c | 131 +++++++++++---------------------------- 1 file changed, 37 insertions(+), 94 deletions(-) diff -puN drivers/edac/k8_edac.c~edac-new-opteron-athlon64-memory-controller-driver-tidy drivers/edac/k8_edac.c --- a/drivers/edac/k8_edac.c~edac-new-opteron-athlon64-memory-controller-driver-tidy +++ a/drivers/edac/k8_edac.c @@ -360,10 +360,7 @@ static void build_node_revision_table(vo if (initialized) return; - preempt_disable(); - smp_call_function(store_node_revision, NULL, 1, 1); - store_node_revision(NULL); - preempt_enable(); + on_each_cpu(store_node_revision, NULL, 1, 1); initialized = 1; } @@ -433,8 +430,6 @@ static struct pci_dev * pci_get_related_ } /* FIXME - stolen from msr.c - the calls in msr.c could be exported */ -#ifdef CONFIG_SMP - struct msr_command { int cpu; int err; @@ -444,90 +439,38 @@ struct msr_command { static void smp_wrmsr(void *cmd_block) { - struct msr_command *cmd = (struct msr_command *) cmd_block; - unsigned int cpu; - - cpu = smp_processor_id(); - debugf1("%s(): %d ? %d\n", __func__, cmd->cpu, cpu); - - if (cmd->cpu != cpu) - return; - - debugf1("%s(): Matched %d\n", __func__, cmd->cpu); + struct msr_command *cmd = cmd_block; wrmsr(cmd->reg, cmd->data[0], cmd->data[1]); } static void smp_rdmsr(void *cmd_block) { - struct msr_command *cmd = (struct msr_command *) cmd_block; - unsigned int cpu; - - cpu = smp_processor_id(); - debugf1("%s(): %d ? %d\n", __func__, cmd->cpu, cpu); - - if (cmd->cpu != cpu) - return; - - debugf1("%s(): Matched %d\n", __func__, cmd->cpu); + struct msr_command *cmd = cmd_block; rdmsr(cmd->reg, cmd->data[0], cmd->data[1]); } -static inline void do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) +static void do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) { struct msr_command cmd; - preempt_disable(); - debugf0("%s(): %d\n", __func__, cpu); - - if (cpu == smp_processor_id()) - wrmsr(reg, eax, edx); - else { - cmd.cpu = cpu; - cmd.reg = reg; - cmd.data[0] = eax; - cmd.data[1] = edx; - smp_call_function(smp_wrmsr, &cmd, 1, 1); - } - - preempt_enable(); + cmd.cpu = raw_smp_processor_id(); + cmd.reg = reg; + cmd.data[0] = eax; + cmd.data[1] = edx; + on_each_cpu(smp_wrmsr, &cmd, 1, 1); } -static inline void do_rdmsr(int cpu, u32 reg, u32 * eax, u32 * edx) +static void do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx) { struct msr_command cmd; - preempt_disable(); - debugf0("%s(): %d\n", __func__, cpu); - - if (cpu == smp_processor_id()) - rdmsr(reg, eax, edx); - else { - cmd.cpu = cpu; - cmd.reg = reg; - smp_call_function(smp_rdmsr, &cmd, 1, 1); - *eax = cmd.data[0]; - *edx = cmd.data[1]; - } - - preempt_enable(); -} - -#else /* ! CONFIG_SMP */ - -static inline void do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) -{ - debugf0("%s()\n", __func__); - wrmsr(reg, eax, edx); -} - -static inline void do_rdmsr(int cpu, u32 reg, u32 * eax, u32 * edx) -{ - debugf0("%s()\n", __func__); - rdmsr(reg, eax, edx); + cmd.cpu = raw_smp_processor_id(); + cmd.reg = reg; + on_each_cpu(smp_rdmsr, &cmd, 1, 1); + *eax = cmd.data[0]; + *edx = cmd.data[1]; } -#endif /* ! CONFIG_SMP */ - /* * FIXME - This is a large chunk of memory to suck up just to decode the * syndrome. It would be nice to discover a pattern in the syndromes that @@ -730,7 +673,7 @@ static inline u64 base_from_dcsb(u32 dcs return ((u64) (dcsb & 0xffe0fe00)) << 4; } -static inline u64 mask_from_dcsm(u32 dcsm) +static u64 mask_from_dcsm(u32 dcsm) { u64 dcsm_bits, other_bits; @@ -754,7 +697,7 @@ static inline u64 mask_from_dcsm(u32 dcs * section 3.4.4 (p. 70). They are the lowest and highest physical addresses * in the address range they represent. */ -static inline void get_base_and_limit(struct k8_pvt *pvt, int node_id, +static void get_base_and_limit(struct k8_pvt *pvt, int node_id, u64 *base, u64 *limit) { *base = ((u64) (pvt->dbr[node_id] & 0xffff0000)) << 8; @@ -790,7 +733,7 @@ static int base_limit_match(struct k8_pv * return NULL. */ static struct mem_ctl_info * find_mc_by_sys_addr(struct mem_ctl_info *mci, - u64 sys_addr) + u64 sys_addr) { struct k8_pvt *pvt; int node_id; @@ -800,7 +743,7 @@ static struct mem_ctl_info * find_mc_by_ * 3.4.4.2) registers to map the SysAddr to a node ID. */ - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; /* The value of this field should be the same for all DRAM Base * registers. Therefore we arbitrarily choose to read it from the @@ -876,7 +819,7 @@ static inline u64 get_dram_base(struct m { struct k8_pvt *pvt; - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; return ((u64) (pvt->dbr[pvt->node_id] & 0xffff0000)) << 8; } @@ -900,7 +843,7 @@ static int get_dram_hole_info(struct mem struct k8_pvt *pvt; u64 base; - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; if (node_rev(pvt->node_id) < OPTERON_CPU_REV_E) { debugf2("revision %d for node %d does not support DHAR\n", @@ -1017,7 +960,7 @@ static u64 sys_addr_to_dram_addr(struct * register (section 3.4.4.1). Return the number of bits from a SysAddr that * are used for node interleaving. */ -static inline int num_node_interleave_bits(unsigned intlv_en) +static int num_node_interleave_bits(unsigned intlv_en) { static const int intlv_shift_table[] = { 0, 1, 0, 2, 0, 0, 0, 3 }; int n; @@ -1037,7 +980,7 @@ static u64 dram_addr_to_input_addr(struc int intlv_shift; u64 input_addr; - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; /* Near the start of section 3.4.4 (p. 70), the k8 documentation gives * instructions for translating a DramAddr to an InputAddr. Here we @@ -1078,7 +1021,7 @@ static int input_addr_to_csrow(struct me u32 dcsb, dcsm; u64 base, mask; - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; /* Here we use the DRAM CS Base (section 3.5.4) and DRAM CS Mask * (section 3.5.5) registers. For each CS base/mask register pair, @@ -1130,7 +1073,7 @@ static u64 input_addr_to_dram_addr(struc * (section 3.4.4.2) for the node that input_addr is associated with. */ - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; node_id = pvt->node_id; BUG_ON((node_id < 0) || (node_id > 7)); intlv_shift = num_node_interleave_bits((pvt->dbr[0] >> 8) & 0x07); @@ -1160,7 +1103,7 @@ static u64 dram_addr_to_sys_addr(struct struct k8_pvt *pvt; u64 hole_base, hole_offset, hole_size, base, limit, sys_addr; - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; if (!get_dram_hole_info(mci, &hole_base, &hole_offset, &hole_size)) { if ((dram_addr >= hole_base) && @@ -1214,7 +1157,7 @@ static void find_csrow_limits(struct mem struct k8_pvt *pvt; u64 base, mask; - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; BUG_ON((csrow < 0) || (csrow >= K8_NR_CSROWS)); base = base_from_dcsb(pvt->dcsb[csrow]); mask = mask_from_dcsm(pvt->dcsm[csrow]); @@ -1260,7 +1203,7 @@ static int k8_get_error_info_regs(struct { struct k8_pvt *pvt; - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; pci_read_config_dword(pvt->misc_ctl, K8_NBSH, ®s->nbsh); if (!k8_error_info_valid(regs)) @@ -1278,7 +1221,7 @@ static void k8_get_error_info(struct mem struct k8_pvt *pvt; struct k8_error_info_regs regs; - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; info->race_condition_detected = 0; if (k8_get_error_info_regs(mci, &info->error_info)) @@ -1395,7 +1338,7 @@ static void k8_handle_ce(struct mem_ctl_ struct mem_ctl_info *log_mci, *src_mci; log_mci = mci; - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; if ((info->error_info.nbsh & BIT(26)) == 0) goto no_info; /* error address not valid */ @@ -1551,7 +1494,7 @@ static int k8_process_error_info(struct u32 err_code, ext_ec; int gart_tlb_error; - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; /* check for an error */ if (!k8_error_info_valid(&info->error_info)) @@ -1624,7 +1567,7 @@ static int k8_get_devs(struct mem_ctl_in struct pci_dev *pdev; pdev = to_pci_dev(mci->dev); - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; /* The address mapping device provides a table that indicates which * physical address ranges are owned by which node. Each node's @@ -1669,7 +1612,7 @@ static void k8_get_mc_regs(struct mem_ct int i; pdev = to_pci_dev(mci->dev); - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; debugf1("k8 regs:\n"); for (i = 0; i < MAX_K8_NODES; i++) { @@ -1731,7 +1674,7 @@ static int k8_init_csrows(struct mem_ctl u64 input_addr_min, input_addr_max, sys_addr; u32 nbcfg; - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; pci_read_config_dword(pvt->misc_ctl, K8_NBCFG, &nbcfg); empty = 1; @@ -1784,7 +1727,7 @@ static void k8_enable_error_reporting(st struct k8_pvt *pvt; u32 mc4ctl_l=0, mc4ctl_h=0, mcgctl_l=0, mcgctl_h=0; - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; pci_write_bits32(pvt->misc_ctl, K8_NBCTL, 0x3UL, 0x3UL); do_rdmsr(pvt->node_id, K8_MSR_MC4CTL, &mc4ctl_l, &mc4ctl_h); mc4ctl_l |= BIT(0) | BIT(1); @@ -1814,7 +1757,7 @@ static int k8_probe1(struct pci_dev *pde return -ENOMEM; debugf0("%s(): mci = %p\n", __func__, mci); - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; pvt->dcl = dcl; mci->dev = &pdev->dev; pvt->node_id = MCI_TO_NODE_ID(mci); @@ -1900,7 +1843,7 @@ static void __devexit k8_remove_one(stru if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) return; - pvt = (struct k8_pvt *) mci->pvt_info; + pvt = mci->pvt_info; pci_dev_put(pvt->addr_map); pci_dev_put(pvt->misc_ctl); edac_mc_free(mci); _