Subject: cbe_cpufreq: reorganize code and fix some bugs From: Christian Krafft This patch reorganizes the code of the driver into three files. Two cbe_cpufreq_pmi.c and cbe_cpufreq_pervasive.c care about hardware. cbe_cpufreq.c contains the logic. Tuning the frequency using the PMI device has been commented out, as there is no hardware working yet. However, using PMI the board management controller is able to limit the frequency in situations as power failure. The PMI code is marked as experimental until it has been tested much more. Signed-off-by: Christian Krafft Signed-off-by: Arnd Bergmann Index: linux-2.6/arch/powerpc/platforms/cell/cbe_cpufreq.c =================================================================== --- linux-2.6.orig/arch/powerpc/platforms/cell/cbe_cpufreq.c +++ linux-2.6/arch/powerpc/platforms/cell/cbe_cpufreq.c @@ -1,7 +1,7 @@ /* * cpufreq driver for the cell processor * - * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 * * Author: Christian Krafft * @@ -21,24 +21,15 @@ */ #include -#include - -#include -#include #include -#include -#include -#include -#include #include - -#include "cbe_regs.h" +#include +#include "cbe_cpufreq.h" static DEFINE_MUTEX(cbe_switch_mutex); - -/* the CBE supports an 8 step frequency scaling */ -static struct cpufreq_frequency_table cbe_freqs[] = { +/* the CBE supportsan 8 step frequency scaling */ +struct cpufreq_frequency_table cbe_freqs[] = { {1, 0}, {2, 0}, {3, 0}, @@ -50,156 +41,17 @@ static struct cpufreq_frequency_table cb {0, CPUFREQ_TABLE_END}, }; -/* to write to MIC register */ -static u64 MIC_Slow_Fast_Timer_table[] = { - [0 ... 7] = 0x007fc00000000000ull, -}; - -/* more values for the MIC */ -static u64 MIC_Slow_Next_Timer_table[] = { - 0x0000240000000000ull, - 0x0000268000000000ull, - 0x000029C000000000ull, - 0x00002D0000000000ull, - 0x0000300000000000ull, - 0x0000334000000000ull, - 0x000039C000000000ull, - 0x00003FC000000000ull, -}; -static unsigned int pmi_frequency_limit = 0; -/* - * hardware specific functions - */ - -static struct of_device *pmi_dev; - -static int set_pmode_pmi(int cpu, unsigned int pmode) +static int set_pmode(int cpu, unsigned int slow_mode) { - int ret; - pmi_message_t pmi_msg; -#ifdef DEBUG - u64 time; -#endif - - pmi_msg.type = PMI_TYPE_FREQ_CHANGE; - pmi_msg.data1 = cbe_cpu_to_node(cpu); - pmi_msg.data2 = pmode; - -#ifdef DEBUG - time = (u64) get_cycles(); -#endif - - pmi_send_message(pmi_dev, pmi_msg); - ret = pmi_msg.data2; - - pr_debug("PMI returned slow mode %d\n", ret); - -#ifdef DEBUG - time = (u64) get_cycles() - time; /* actual cycles (not cpu cycles!) */ - time = 1000000000 * time / CLOCK_TICK_RATE; /* time in ns (10^-9) */ - pr_debug("had to wait %lu ns for a transition\n", time); -#endif - return ret; -} - - -static int get_pmode(int cpu) -{ - int ret; - struct cbe_pmd_regs __iomem *pmd_regs; - - pmd_regs = cbe_get_cpu_pmd_regs(cpu); - ret = in_be64(&pmd_regs->pmsr) & 0x07; - - return ret; -} - -static int set_pmode_reg(int cpu, unsigned int pmode) -{ - struct cbe_pmd_regs __iomem *pmd_regs; - struct cbe_mic_tm_regs __iomem *mic_tm_regs; - u64 flags; - u64 value; - - local_irq_save(flags); - - mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu); - pmd_regs = cbe_get_cpu_pmd_regs(cpu); - - pr_debug("pm register is mapped at %p\n", &pmd_regs->pmcr); - pr_debug("mic register is mapped at %p\n", &mic_tm_regs->slow_fast_timer_0); - - out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]); - out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]); - - out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]); - out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]); - - value = in_be64(&pmd_regs->pmcr); - /* set bits to zero */ - value &= 0xFFFFFFFFFFFFFFF8ull; - /* set bits to next pmode */ - value |= pmode; - - out_be64(&pmd_regs->pmcr, value); - - /* wait until new pmode appears in status register */ - value = in_be64(&pmd_regs->pmsr) & 0x07; - while(value != pmode) { - cpu_relax(); - value = in_be64(&pmd_regs->pmsr) & 0x07; - } - - local_irq_restore(flags); - - return 0; -} - -static int set_pmode(int cpu, unsigned int slow_mode) { - if (pmi_dev) - return set_pmode_pmi(cpu, slow_mode); +#ifdef CONFIG_CBE_CPUFREQ_PMI + if (cbe_cpufreq_pmi_dev) + return cbe_cpufreq_set_pmode_pmi(cpu, slow_mode); else - return set_pmode_reg(cpu, slow_mode); -} - -static void cbe_cpufreq_handle_pmi(struct of_device *dev, pmi_message_t pmi_msg) -{ - u8 cpu; - u8 cbe_pmode_new; - - BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE); - - cpu = cbe_node_to_cpu(pmi_msg.data1); - cbe_pmode_new = pmi_msg.data2; - - pmi_frequency_limit = cbe_freqs[cbe_pmode_new].frequency; - - pr_debug("cbe_handle_pmi: max freq=%d\n", pmi_frequency_limit); -} - -static int pmi_notifier(struct notifier_block *nb, - unsigned long event, void *data) -{ - struct cpufreq_policy *policy = data; - - if (event != CPUFREQ_INCOMPATIBLE) - return 0; - - cpufreq_verify_within_limits(policy, 0, pmi_frequency_limit); - return 0; +#endif + return cbe_cpufreq_set_pmode(cpu, slow_mode); } -static struct notifier_block pmi_notifier_block = { - .notifier_call = pmi_notifier, -}; - -static struct pmi_handler cbe_pmi_handler = { - .type = PMI_TYPE_FREQ_CHANGE, - .handle_pmi_message = cbe_cpufreq_handle_pmi, -}; - - /* * cpufreq functions */ @@ -220,6 +72,8 @@ static int cbe_cpufreq_cpu_init(struct c max_freqp = of_get_property(cpu, "clock-frequency", NULL); + of_node_put(cpu); + if (!max_freqp) return -EINVAL; @@ -239,7 +93,7 @@ static int cbe_cpufreq_cpu_init(struct c /* if DEBUG is enabled set_pmode() measures the correct latency of a transition */ policy->cpuinfo.transition_latency = 25000; - cur_pmode = get_pmode(policy->cpu); + cur_pmode = cbe_cpufreq_get_pmode(policy->cpu); pr_debug("current pmode is at %d\n",cur_pmode); policy->cur = cbe_freqs[cur_pmode].frequency; @@ -248,13 +102,11 @@ static int cbe_cpufreq_cpu_init(struct c policy->cpus = cpu_sibling_map[policy->cpu]; #endif - cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu); +#ifdef CONFIG_CBE_CPUFREQ_PMI + cbe_cpufreq_pmi_register(); +#endif - if (pmi_dev) { - /* frequency might get limited later, initialize limit with max_freq */ - pmi_frequency_limit = max_freq; - cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); - } + cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu); /* this ensures that policy->cpuinfo_min and policy->cpuinfo_max are set correctly */ return cpufreq_frequency_table_cpuinfo(policy, cbe_freqs); @@ -262,8 +114,9 @@ static int cbe_cpufreq_cpu_init(struct c static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy) { - if (pmi_dev) - cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); +#ifdef CONFIG_CBE_CPUFREQ_PMI + cbe_cpufreq_pmi_unregister(); +#endif cpufreq_frequency_table_put_attr(policy->cpu); return 0; @@ -280,7 +133,7 @@ static int cbe_cpufreq_target(struct cpu { int rc; struct cpufreq_freqs freqs; - int cbe_pmode_new; + unsigned int cbe_pmode_new; cpufreq_frequency_table_target(policy, cbe_freqs, @@ -301,6 +154,7 @@ static int cbe_cpufreq_target(struct cpu cbe_freqs[cbe_pmode_new].index); rc = set_pmode(policy->cpu, cbe_pmode_new); + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); mutex_unlock(&cbe_switch_mutex); @@ -323,26 +177,14 @@ static struct cpufreq_driver cbe_cpufreq static int __init cbe_cpufreq_init(void) { - struct device_node *np; - if (!machine_is(cell)) return -ENODEV; - np = of_find_node_by_type(NULL, "ibm,pmi"); - - pmi_dev = of_find_device_by_node(np); - - if (pmi_dev) - pmi_register_handler(pmi_dev, &cbe_pmi_handler); - return cpufreq_register_driver(&cbe_cpufreq_driver); } static void __exit cbe_cpufreq_exit(void) { - if (pmi_dev) - pmi_unregister_handler(pmi_dev, &cbe_pmi_handler); - cpufreq_unregister_driver(&cbe_cpufreq_driver); } Index: linux-2.6/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c =================================================================== --- /dev/null +++ linux-2.6/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c @@ -0,0 +1,121 @@ +/* + * pervasive backend for the cbe_cpufreq driver + * + * This driver makes use of the pervasive unit to + * engage the desired frequency. + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 + * + * Author: Christian Krafft + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include + +#ifdef DEBUG +#include +#endif + +#include "cbe_regs.h" +#include "cbe_cpufreq.h" + +/* to write to MIC register */ +static u64 MIC_Slow_Fast_Timer_table[] = { + [0 ... 7] = 0x007fc00000000000ull, +}; + +/* more values for the MIC */ +static u64 MIC_Slow_Next_Timer_table[] = { + 0x0000240000000000ull, + 0x0000268000000000ull, + 0x000029C000000000ull, + 0x00002D0000000000ull, + 0x0000300000000000ull, + 0x0000334000000000ull, + 0x000039C000000000ull, + 0x00003FC000000000ull, +}; + + +int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode) +{ + struct cbe_pmd_regs __iomem *pmd_regs; + struct cbe_mic_tm_regs __iomem *mic_tm_regs; + u64 flags; + u64 value; +#ifdef DEBUG + long time; +#endif + + local_irq_save(flags); + + mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu); + pmd_regs = cbe_get_cpu_pmd_regs(cpu); + + pr_debug("pm register is mapped at %p\n", &pmd_regs->pmcr); + pr_debug("mic register is mapped at %p\n", &mic_tm_regs->slow_fast_timer_0); + +#ifdef DEBUG + time = jiffies; +#endif + + out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]); + out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]); + + out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]); + out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]); + + value = in_be64(&pmd_regs->pmcr); + /* set bits to zero */ + value &= 0xFFFFFFFFFFFFFFF8ull; + /* set bits to next pmode */ + value |= pmode; + + out_be64(&pmd_regs->pmcr, value); + +#ifdef DEBUG + /* wait until new pmode appears in status register */ + value = in_be64(&pmd_regs->pmsr) & 0x07; + while(value != pmode) { + cpu_relax(); + value = in_be64(&pmd_regs->pmsr) & 0x07; + } + + time = jiffies - time; + time = jiffies_to_usecs(time); + pr_debug("had to wait %lu us for a transition using regs\n", time); +#endif + + local_irq_restore(flags); + + return 0; +} + + +int cbe_cpufreq_get_pmode(int cpu) +{ + int ret; + struct cbe_pmd_regs __iomem *pmd_regs; + + pmd_regs = cbe_get_cpu_pmd_regs(cpu); + ret = in_be64(&pmd_regs->pmsr) & 0x07; + + return ret; +} + Index: linux-2.6/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c =================================================================== --- /dev/null +++ linux-2.6/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c @@ -0,0 +1,128 @@ +/* + * pmi backend for the cbe_cpufreq driver + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Christian Krafft + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG +#include +#endif + +#include "cbe_regs.h" +#include "cbe_cpufreq.h" + +static unsigned int pmi_frequency_limit = 0; +struct of_device *cbe_cpufreq_pmi_dev; + + +/* + * hardware specific functions + */ + +int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode) +{ + int ret; + pmi_message_t pmi_msg; +#ifdef DEBUG + long time; +#endif + pmi_msg.type = PMI_TYPE_FREQ_CHANGE; + pmi_msg.data1 = cbe_cpu_to_node(cpu); + pmi_msg.data2 = pmode; + +#ifdef DEBUG + time = jiffies; +#endif + pmi_send_message(cbe_cpufreq_pmi_dev, pmi_msg); + +#ifdef DEBUG + time = jiffies - time; + time = jiffies_to_usecs(time); + pr_debug("had to wait %lu us for a transition using PMI\n", time); +#endif + ret = pmi_msg.data2; + pr_debug("PMI returned slow mode %d\n", ret); + return ret; +} +EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi); + + +static void cbe_cpufreq_handle_pmi(struct of_device *dev, pmi_message_t pmi_msg) +{ + u8 cpu; + u8 cbe_pmode_new; + + BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE); + + cpu = cbe_node_to_cpu(pmi_msg.data1); + cbe_pmode_new = pmi_msg.data2; + + pmi_frequency_limit = cbe_freqs[cbe_pmode_new].frequency; + + pr_debug("cbe_handle_pmi: max freq=%d\n", pmi_frequency_limit); +} + +static int pmi_notifier(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct cpufreq_policy *policy = data; + + if (pmi_frequency_limit != 0) + cpufreq_verify_within_limits(policy, 0, pmi_frequency_limit); + + return 0; +} + +static struct notifier_block pmi_notifier_block = { + .notifier_call = pmi_notifier, +}; + +static struct pmi_handler cbe_pmi_handler = { + .type = PMI_TYPE_FREQ_CHANGE, + .handle_pmi_message = cbe_cpufreq_handle_pmi, +}; + +void cbe_cpufreq_pmi_register(void) +{ + struct device_node *np; + + np = of_find_node_by_type(NULL, "ibm,pmi"); + cbe_cpufreq_pmi_dev = of_find_device_by_node(np); + + if (cbe_cpufreq_pmi_dev) { + pmi_register_handler(cbe_cpufreq_pmi_dev, &cbe_pmi_handler); + cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); + } +} + +void cbe_cpufreq_pmi_unregister(void) +{ + if (cbe_cpufreq_pmi_dev) { + cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); + pmi_unregister_handler(cbe_cpufreq_pmi_dev, &cbe_pmi_handler); + } +} + Index: linux-2.6/arch/powerpc/platforms/cell/Kconfig =================================================================== --- linux-2.6.orig/arch/powerpc/platforms/cell/Kconfig +++ linux-2.6/arch/powerpc/platforms/cell/Kconfig @@ -73,4 +73,14 @@ config CBE_CPUFREQ For details, take a look at . If you don't have such processor, say N +config CBE_CPUFREQ_PMI + bool "CBE frequency scaling using PMI interface" + depends on CBE_CPUFREQ && PPC_PMI && EXPERIMENTAL + default n + help + Select this, if you want to use the PMI interface + to switch frequencies. Using PMI, the + processor will not only be able to run at lower speed, + but also at lower core voltage. + endmenu Index: linux-2.6/arch/powerpc/platforms/cell/Makefile =================================================================== --- linux-2.6.orig/arch/powerpc/platforms/cell/Makefile +++ linux-2.6/arch/powerpc/platforms/cell/Makefile @@ -4,7 +4,9 @@ obj-$(CONFIG_PPC_CELL_NATIVE) += interr obj-$(CONFIG_CBE_RAS) += ras.o obj-$(CONFIG_CBE_THERM) += cbe_thermal.o -obj-$(CONFIG_CBE_CPUFREQ) += cbe_cpufreq.o +obj-$(CONFIG_CBE_CPUFREQ) += cbe-cpufreq.o +cbe-cpufreq-y += cbe_cpufreq_pervasive.o cbe_cpufreq.o +cbe-cpufreq-$(CONFIG_CBE_CPUFREQ_PMI) += cbe_cpufreq_pmi.o ifeq ($(CONFIG_SMP),y) obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o Index: linux-2.6/arch/powerpc/platforms/cell/cbe_cpufreq.h =================================================================== --- /dev/null +++ linux-2.6/arch/powerpc/platforms/cell/cbe_cpufreq.h @@ -0,0 +1,24 @@ +/* + * cbe_cpufreq.h + * + * This file contains the definitions used by the cbe_cpufreq driver. + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 + * + * Author: Christian Krafft + * + */ +#include + +extern struct cpufreq_frequency_table cbe_freqs[]; + +int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode); +int cbe_cpufreq_get_pmode(int cpu); + +void cbe_cpufreq_pmi_register(void); +void cbe_cpufreq_pmi_unregister(void); + +extern int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode); + +extern struct of_device *cbe_cpufreq_pmi_dev; +