GIT b1e6dfdc02f1895565513f2892bd34ca261f9e06 git+ssh://master.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6.git#test commit 2730c9295a9a797a22b800d3befd6a64fdc56b02 Author: Keith Owens Date: Wed Feb 8 13:41:10 2006 +1100 [IA64] MCA: remove obsolete ifdef No platform in the community tree uses PLATFORM_MCA_HANDLERS, remove the references. Signed-off-by: Keith Owens Signed-off-by: Tony Luck commit e9ac054daaecf8a11f2113b60f2b6ce381c4f131 Author: Keith Owens Date: Wed Feb 8 13:41:04 2006 +1100 [IA64] MCA: update MCA comm field for user space tasks Update the comm field on the MCA handler for user tasks as well as for verified kernel tasks. This helps to identify the task that was running when the MCA occurred. Signed-off-by: Keith Owens Signed-off-by: Tony Luck commit 9336b0836bf789136b51caf9ddd49dcbf1726cf4 Author: Keith Owens Date: Wed Feb 8 13:40:59 2006 +1100 [IA64] MCA: print messages in MCA handler Print a message identifying the monarch MCA handler. Print a summary of the status of the slave MCA cpus. Signed-off-by: Keith Owens Signed-off-by: Tony Luck commit dcc1dd2366a7c355fd8b6543c52685b864a2044f Author: Jack Steiner Date: Tue Feb 7 09:24:14 2006 -0800 [IA64-SGI] - Eliminate SN pio_phys_xxx macros. Move to assembly Rewrite the SN pio_phys_xxx macros in assembly language. This avoids issues with the Intel icc compiler. Function call overhead is not an issue - the functions reference PIOs and take 100's nsec to complete. In addition, the functions should likely be in assembly language anyway - they reference memory using physical addressing mode. One function executes with psr.ic disabled. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck commit 412e6a378260608bf28f29d4fa8a9241e0240a2d Author: Chen, Kenneth W Date: Thu Jan 26 17:19:46 2006 -0800 [IA64] use icc defined constant Use icc defined constant instead of magic number. Signed-off-by: Ken Chen Signed-off-by: Tony Luck commit 9df79decc395b2f9484ff93a1383ba705ff34b10 Author: Chen, Kenneth W Date: Thu Jan 26 17:12:02 2006 -0800 [IA64] add __builtin_trap definition for icc build Map __builtin_trap function to break 0 instruction. Signed-off-by: HJ Lu Signed-off-by: Ken Chen Signed-off-by: Tony Luck commit c583f66dc41cfa4055b6ac8f50cc1ebf362298f7 Author: Chen, Kenneth W Date: Thu Jan 26 17:08:47 2006 -0800 [IA64] clean up asm/intel_intrin.h Include intrinsic header file from icc compiler. Remove duplicate definition from kernel source. Signed-off-by: HJ Lu Signed-off-by: Ken Chen Signed-off-by: Tony Luck commit 4bf64e72bd499d2bf3509c2dc60d09c39f72c782 Author: Chen, Kenneth W Date: Thu Jan 26 16:58:52 2006 -0800 [IA64] map ia64_hint definition to intel compiler intrinsic Map ia64_hint() to internal intel compiler intrinsic. Signed-off-by: Ken Chen Signed-off-by: Tony Luck commit e08e6c521355cd33e647b2f739885bc3050eead6 Author: Brent Casavant Date: Thu Jan 26 15:55:52 2006 -0800 [IA64] hooks to wait for mmio writes to drain when migrating processes On SN2, MMIO writes which are issued from separate processors are not guaranteed to arrive in any particular order at the IO hardware. When performing such writes from the kernel this is not a problem, as a kernel thread will not migrate to another CPU during execution, and mmiowb() calls can guarantee write ordering when control of the IO resource is allowed to move between threads. However, when MMIO writes can be performed from user space (e.g. DRM) there are no such guarantees and mechanisms, as the process may context-switch at any time, and may migrate to a different CPU as part of the switch. For such programs/hardware to operate correctly, it is required that the MMIO writes from the old CPU be accepted by the IO hardware before subsequent writes from the new CPU can be issued. The following patch implements this behavior on SN2 by waiting for a Shub register to indicate that these writes have been accepted. This is placed in the context switch-in path, and only performs the wait when the newly scheduled task changes CPUs. Signed-off-by: Prarit Bhargava Signed-off-by: Brent Casavant commit 13938ca7a1ad9a4788cf73309f187d99c97ddfde Author: Mark Maule Date: Thu Jan 26 14:46:39 2006 -0600 [IA64-SGI] driver bugfixes and hardware workarounds for CE1.0 asic Various bugfixes and hardware bug workarounds necessary for the rev 1.0 version of the altix TIO CE asic. Signed-off-by: Mark Maule Signed-off-by: Tony Luck commit 28ff6b9b2fc01d2c2746c72ce8af1729344fae27 Author: Aaron Young Date: Mon Jan 23 09:00:51 2006 -0800 [IA64-SGI] Handle SC env. powerdown events Handle system controller power down pending events on SN systems. This allows the system to gracefully shutdown before the system controller removes power due to an adverse environmental condition. Signed-off-by: Aaron Young Signed-off-by: Tony Luck commit b0a06623dc4caf6dfb6a84419507643471676d20 Author: Keith Owens Date: Sun Jan 22 10:55:25 2006 +1100 [IA64] Delete MCA/INIT sigdelayed code The only user of the MCA/INIT sigdelayed code (SGI's I/O probing) has moved from the kernel into SAL. Delete the MCA/INIT sigdelayed code. Signed-off-by: Keith Owens Signed-off-by: Tony Luck commit 6e586f32931d6c98431d54cd0430d4366195b0ba Author: Jes Sorensen Date: Tue Jan 17 12:24:39 2006 -0500 [IA64-SGI] sem2mutex ioc4.c Convert to use a single mutex instead of two rwsems as this isn't performance critical. Signed-off-by: Jes Sorensen Signed-off-by: Brent Casavant Signed-off-by: Tony Luck commit a454c2f3d1fd1cab7073b53c6c14d6d4b61f4e09 Author: Chen, Kenneth W Date: Wed Jan 11 17:11:09 2006 -0800 [IA64] implement ia64 specific mutex primitives Implement ia64 optimized mutex primitives. It properly uses acquire/release memory ordering semantics in lock/unlock path. 2nd version making them all static inline functions. Signed-off-by: Ken Chen Signed-off-by: Tony Luck commit b88e926584bf100bc23f5e76b7b674d4257edcb0 Author: Ashok Raj Date: Thu Jan 19 16:18:47 2006 -0800 [IA64] Fix UP build with BSP removal support. Causes undefined force_cpei_retarget defined in arch/ia64/kernel/smpboot.c Push the unneeded code inside #ifdef CONFIG_HOTPLUG_CPU. Signed-off-by: Ashok Raj Signed-off-by: Tony Luck commit ff741906ad3cf4b8ca1a958acb013a97a6381ca2 Author: Ashok Raj Date: Fri Nov 11 14:32:40 2005 -0800 [IA64] support for cpu0 removal here is the BSP removal support for IA64. Its pretty much the same thing that was released a while back, but has your feedback incorporated. - Removed CONFIG_BSP_REMOVE_WORKAROUND and associated cmdline param - Fixed compile issue with sn2/zx1 due to a undefined fix_b0_for_bsp - some formatting nits (whitespace etc) This has been tested on tiger and long back by alex on hp systems as well. Signed-off-by: Ashok Raj Signed-off-by: Tony Luck --- diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 845cd09..df2ffd2 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -271,6 +271,25 @@ config SCHED_SMT Intel IA64 chips with MultiThreading at a cost of slightly increased overhead in some places. If unsure say N here. +config PERMIT_BSP_REMOVE + bool "Support removal of Bootstrap Processor" + depends on HOTPLUG_CPU + default n + ---help--- + Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU + support. + +config FORCE_CPEI_RETARGET + bool "Force assumption that CPEI can be re-targetted" + depends on PERMIT_BSP_REMOVE + default n + ---help--- + Say Y if you need to force the assumption that CPEI can be re-targetted to + any cpu in the system. This hint is available via ACPI 3.0 specifications. + Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP. + This option it useful to enable this feature on older BIOS's as well. + You can also enable this by using boot command line option force_cpei=1. + config PREEMPT bool "Preemptible Kernel" help diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig index 6859119..65d6b2e 100644 --- a/arch/ia64/configs/tiger_defconfig +++ b/arch/ia64/configs/tiger_defconfig @@ -114,6 +114,8 @@ CONFIG_FORCE_MAX_ZONEORDER=17 CONFIG_SMP=y CONFIG_NR_CPUS=4 CONFIG_HOTPLUG_CPU=y +CONFIG_PERMIT_BSP_REMOVE=y +CONFIG_FORCE_CPEI_RETARGET=y # CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set CONFIG_SELECT_MEMORY_MODEL=y diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index ecd44bd..4722ec5 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -284,19 +284,24 @@ acpi_parse_plat_int_src(acpi_table_entry return 0; } +#ifdef CONFIG_HOTPLUG_CPU unsigned int can_cpei_retarget(void) { extern int cpe_vector; + extern unsigned int force_cpei_retarget; /* * Only if CPEI is supported and the override flag * is present, otherwise return that its re-targettable * if we are in polling mode. */ - if (cpe_vector > 0 && !acpi_cpei_override) - return 0; - else - return 1; + if (cpe_vector > 0) { + if (acpi_cpei_override || force_cpei_retarget) + return 1; + else + return 0; + } + return 1; } unsigned int is_cpu_cpei_target(unsigned int cpu) @@ -315,6 +320,7 @@ void set_cpei_target_cpu(unsigned int cp { acpi_cpei_phys_cpuid = cpu_physical_id(cpu); } +#endif unsigned int get_cpei_target_cpu(void) { diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 930fdfc..0e3eda9 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1102,9 +1102,6 @@ skip_rbs_switch: st8 [r2]=r8 st8 [r3]=r10 .work_pending: - tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context? -(p6) br.cond.sptk.few .sigdelayed - ;; tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0? (p6) br.cond.sptk.few .notify #ifdef CONFIG_PREEMPT @@ -1131,17 +1128,6 @@ skip_rbs_switch: (pLvSys)br.cond.sptk.few .work_pending_syscall_end br.cond.sptk.many .work_processed_kernel // don't re-check -// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where -// it could not be delivered. Deliver it now. The signal might be for us and -// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed -// signal. - -.sigdelayed: - br.call.sptk.many rp=do_sigdelayed - cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check -(pLvSys)br.cond.sptk.few .work_pending_syscall_end - br.cond.sptk.many .work_processed_kernel // re-check - .work_pending_syscall_end: adds r2=PT(R8)+16,r12 adds r3=PT(R10)+16,r12 diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 574084f..8832c55 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -631,6 +631,7 @@ get_target_cpu (unsigned int gsi, int ve { #ifdef CONFIG_SMP static int cpu = -1; + extern int cpe_vector; /* * In case of vector shared by multiple RTEs, all RTEs that @@ -653,6 +654,11 @@ get_target_cpu (unsigned int gsi, int ve if (!cpu_online(smp_processor_id())) return cpu_physical_id(smp_processor_id()); +#ifdef CONFIG_ACPI + if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR) + return get_cpei_target_cpu(); +#endif + #ifdef CONFIG_NUMA { int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index d33244c..5ce908e 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -163,8 +163,19 @@ void fixup_irqs(void) { unsigned int irq; extern void ia64_process_pending_intr(void); + extern void ia64_disable_timer(void); + extern volatile int time_keeper_id; + + ia64_disable_timer(); + + /* + * Find a new timesync master + */ + if (smp_processor_id() == time_keeper_id) { + time_keeper_id = first_cpu(cpu_online_map); + printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id); + } - ia64_set_itv(1<<16); /* * Phase 1: Locate irq's bound to this cpu and * relocate them for cpu removal. diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index ee7eec9..b57e723 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -281,14 +281,10 @@ ia64_mca_log_sal_error_record(int sal_in ia64_sal_clear_state_info(sal_info_type); } -/* - * platform dependent error handling - */ -#ifndef PLATFORM_MCA_HANDLERS - #ifdef CONFIG_ACPI int cpe_vector = -1; +int ia64_cpe_irq = -1; static irqreturn_t ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) @@ -377,8 +373,6 @@ ia64_mca_register_cpev (int cpev) } #endif /* CONFIG_ACPI */ -#endif /* PLATFORM_MCA_HANDLERS */ - /* * ia64_mca_cmc_vector_setup * @@ -630,6 +624,32 @@ copy_reg(const u64 *fr, u64 fnat, u64 *t *tnat |= (nat << tslot); } +/* Change the comm field on the MCA/INT task to include the pid that + * was interrupted, it makes for easier debugging. If that pid was 0 + * (swapper or nested MCA/INIT) then use the start of the previous comm + * field suffixed with its cpu. + */ + +static void +ia64_mca_modify_comm(const task_t *previous_current) +{ + char *p, comm[sizeof(current->comm)]; + if (previous_current->pid) + snprintf(comm, sizeof(comm), "%s %d", + current->comm, previous_current->pid); + else { + int l; + if ((p = strchr(previous_current->comm, ' '))) + l = p - previous_current->comm; + else + l = strlen(previous_current->comm); + snprintf(comm, sizeof(comm), "%s %*s %d", + current->comm, l, previous_current->comm, + task_thread_info(previous_current)->cpu); + } + memcpy(current->comm, comm, sizeof(current->comm)); +} + /* On entry to this routine, we are running on the per cpu stack, see * mca_asm.h. The original stack has not been touched by this event. Some of * the original stack's registers will be in the RBS on this stack. This stack @@ -648,7 +668,7 @@ ia64_mca_modify_original_stack(struct pt struct ia64_sal_os_state *sos, const char *type) { - char *p, comm[sizeof(current->comm)]; + char *p; ia64_va va; extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ const pal_min_state_area_t *ms = sos->pal_min_state; @@ -721,6 +741,10 @@ ia64_mca_modify_original_stack(struct pt /* Verify the previous stack state before we change it */ if (user_mode(regs)) { msg = "occurred in user space"; + /* previous_current is guaranteed to be valid when the task was + * in user space, so ... + */ + ia64_mca_modify_comm(previous_current); goto no_mod; } if (r13 != sos->prev_IA64_KR_CURRENT) { @@ -750,25 +774,7 @@ ia64_mca_modify_original_stack(struct pt goto no_mod; } - /* Change the comm field on the MCA/INT task to include the pid that - * was interrupted, it makes for easier debugging. If that pid was 0 - * (swapper or nested MCA/INIT) then use the start of the previous comm - * field suffixed with its cpu. - */ - if (previous_current->pid) - snprintf(comm, sizeof(comm), "%s %d", - current->comm, previous_current->pid); - else { - int l; - if ((p = strchr(previous_current->comm, ' '))) - l = p - previous_current->comm; - else - l = strlen(previous_current->comm); - snprintf(comm, sizeof(comm), "%s %*s %d", - current->comm, l, previous_current->comm, - task_thread_info(previous_current)->cpu); - } - memcpy(current->comm, comm, sizeof(current->comm)); + ia64_mca_modify_comm(previous_current); /* Make the original task look blocked. First stack a struct pt_regs, * describing the state at the time of interrupt. mca_asm.S built a @@ -908,7 +914,7 @@ no_mod: static void ia64_wait_for_slaves(int monarch) { - int c, wait = 0; + int c, wait = 0, missing = 0; for_each_online_cpu(c) { if (c == monarch) continue; @@ -919,15 +925,32 @@ ia64_wait_for_slaves(int monarch) } } if (!wait) - return; + goto all_in; for_each_online_cpu(c) { if (c == monarch) continue; if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */ + if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) + missing = 1; break; } } + if (!missing) + goto all_in; + printk(KERN_INFO "OS MCA slave did not rendezvous on cpu"); + for_each_online_cpu(c) { + if (c == monarch) + continue; + if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) + printk(" %d", c); + } + printk("\n"); + return; + +all_in: + printk(KERN_INFO "All OS MCA slaves have reached rendezvous\n"); + return; } /* @@ -953,6 +976,10 @@ ia64_mca_handler(struct pt_regs *regs, s task_t *previous_current; oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ + console_loglevel = 15; /* make sure printks make it to console */ + printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n", + sos->proc_state_param, cpu, sos->monarch); + previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); monarch_cpu = cpu; if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0) @@ -1444,11 +1471,13 @@ void __devinit ia64_mca_cpu_init(void *cpu_data) { void *pal_vaddr; + static int first_time = 1; - if (smp_processor_id() == 0) { + if (first_time) { void *mca_data; int cpu; + first_time = 0; mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu) * NR_CPUS + KERNEL_STACK_SIZE); mca_data = (void *)(((unsigned long)mca_data + @@ -1704,6 +1733,7 @@ ia64_mca_late_init(void) desc = irq_descp(irq); desc->status |= IRQ_PER_CPU; setup_irq(irq, &mca_cpe_irqaction); + ia64_cpe_irq = irq; } ia64_mca_register_cpev(cpe_vector); IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__); diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 9c5194b..077f212 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -6722,6 +6722,7 @@ __initcall(pfm_init); void pfm_init_percpu (void) { + static int first_time=1; /* * make sure no measurement is active * (may inherit programmed PMCs from EFI). @@ -6734,8 +6735,10 @@ pfm_init_percpu (void) */ pfm_unfreeze_pmu(); - if (smp_processor_id() == 0) + if (first_time) { register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); + first_time=0; + } ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); ia64_srlz_d(); diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 463f6bb..1d7903e 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -588,104 +588,3 @@ ia64_do_signal (sigset_t *oldset, struct } return 0; } - -/* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it - * could not be delivered. It is important that the target process is not - * allowed to do any more work in user space. Possible cases for the target - * process: - * - * - It is sleeping and will wake up soon. Store the data in the current task, - * the signal will be sent when the current task returns from the next - * interrupt. - * - * - It is running in user context. Store the data in the current task, the - * signal will be sent when the current task returns from the next interrupt. - * - * - It is running in kernel context on this or another cpu and will return to - * user context. Store the data in the target task, the signal will be sent - * to itself when the target task returns to user space. - * - * - It is running in kernel context on this cpu and will sleep before - * returning to user context. Because this is also the current task, the - * signal will not get delivered and the task could sleep indefinitely. - * Store the data in the idle task for this cpu, the signal will be sent - * after the idle task processes its next interrupt. - * - * To cover all cases, store the data in the target task, the current task and - * the idle task on this cpu. Whatever happens, the signal will be delivered - * to the target task before it can do any useful user space work. Multiple - * deliveries have no unwanted side effects. - * - * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts - * disabled. It must not take any locks nor use kernel structures or services - * that require locks. - */ - -/* To ensure that we get the right pid, check its start time. To avoid extra - * include files in thread_info.h, convert the task start_time to unsigned long, - * giving us a cycle time of > 580 years. - */ -static inline unsigned long -start_time_ul(const struct task_struct *t) -{ - return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec; -} - -void -set_sigdelayed(pid_t pid, int signo, int code, void __user *addr) -{ - struct task_struct *t; - unsigned long start_time = 0; - int i; - - for (i = 1; i <= 3; ++i) { - switch (i) { - case 1: - t = find_task_by_pid(pid); - if (t) - start_time = start_time_ul(t); - break; - case 2: - t = current; - break; - default: - t = idle_task(smp_processor_id()); - break; - } - - if (!t) - return; - task_thread_info(t)->sigdelayed.signo = signo; - task_thread_info(t)->sigdelayed.code = code; - task_thread_info(t)->sigdelayed.addr = addr; - task_thread_info(t)->sigdelayed.start_time = start_time; - task_thread_info(t)->sigdelayed.pid = pid; - wmb(); - set_tsk_thread_flag(t, TIF_SIGDELAYED); - } -} - -/* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that - * was detected in MCA/INIT/NMI/PMI context where it could not be delivered. - */ - -void -do_sigdelayed(void) -{ - struct siginfo siginfo; - pid_t pid; - struct task_struct *t; - - clear_thread_flag(TIF_SIGDELAYED); - memset(&siginfo, 0, sizeof(siginfo)); - siginfo.si_signo = current_thread_info()->sigdelayed.signo; - siginfo.si_code = current_thread_info()->sigdelayed.code; - siginfo.si_addr = current_thread_info()->sigdelayed.addr; - pid = current_thread_info()->sigdelayed.pid; - t = find_task_by_pid(pid); - if (!t) - return; - if (current_thread_info()->sigdelayed.start_time != start_time_ul(t)) - return; - force_sig_info(siginfo.si_signo, &siginfo, t); -} diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index b681ef3..c4b633b 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -70,6 +70,12 @@ #endif #ifdef CONFIG_HOTPLUG_CPU +#ifdef CONFIG_PERMIT_BSP_REMOVE +#define bsp_remove_ok 1 +#else +#define bsp_remove_ok 0 +#endif + /* * Store all idle threads, this can be reused instead of creating * a new thread. Also avoids complicated thread destroy functionality @@ -104,7 +110,7 @@ struct sal_to_os_boot *sal_state_for_boo /* * ITC synchronization related stuff: */ -#define MASTER 0 +#define MASTER (0) #define SLAVE (SMP_CACHE_BYTES/8) #define NUM_ROUNDS 64 /* magic value */ @@ -151,6 +157,27 @@ char __initdata no_int_routing; unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ +#ifdef CONFIG_FORCE_CPEI_RETARGET +#define CPEI_OVERRIDE_DEFAULT (1) +#else +#define CPEI_OVERRIDE_DEFAULT (0) +#endif + +unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT; + +static int __init +cmdl_force_cpei(char *str) +{ + int value=0; + + get_option (&str, &value); + force_cpei_retarget = value; + + return 1; +} + +__setup("force_cpei=", cmdl_force_cpei); + static int __init nointroute (char *str) { @@ -161,6 +188,27 @@ nointroute (char *str) __setup("nointroute", nointroute); +static void fix_b0_for_bsp(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + int cpuid; + static int fix_bsp_b0 = 1; + + cpuid = smp_processor_id(); + + /* + * Cache the b0 value on the first AP that comes up + */ + if (!(fix_bsp_b0 && cpuid)) + return; + + sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0]; + printk ("Fixed BSP b0 value from CPU %d\n", cpuid); + + fix_bsp_b0 = 0; +#endif +} + void sync_master (void *arg) { @@ -327,8 +375,9 @@ smp_setup_percpu_timer (void) static void __devinit smp_callin (void) { - int cpuid, phys_id; + int cpuid, phys_id, itc_master; extern void ia64_init_itm(void); + extern volatile int time_keeper_id; #ifdef CONFIG_PERFMON extern void pfm_init_percpu(void); @@ -336,6 +385,7 @@ smp_callin (void) cpuid = smp_processor_id(); phys_id = hard_smp_processor_id(); + itc_master = time_keeper_id; if (cpu_online(cpuid)) { printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n", @@ -343,6 +393,8 @@ smp_callin (void) BUG(); } + fix_b0_for_bsp(); + lock_ipi_calllock(); cpu_set(cpuid, cpu_online_map); unlock_ipi_calllock(); @@ -365,8 +417,8 @@ smp_callin (void) * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls * local_bh_enable(), which bugs out if irqs are not enabled... */ - Dprintk("Going to syncup ITC with BP.\n"); - ia64_sync_itc(0); + Dprintk("Going to syncup ITC with ITC Master.\n"); + ia64_sync_itc(itc_master); } /* @@ -635,6 +687,47 @@ remove_siblinginfo(int cpu) } extern void fixup_irqs(void); + +int migrate_platform_irqs(unsigned int cpu) +{ + int new_cpei_cpu; + irq_desc_t *desc = NULL; + cpumask_t mask; + int retval = 0; + + /* + * dont permit CPEI target to removed. + */ + if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) { + printk ("CPU (%d) is CPEI Target\n", cpu); + if (can_cpei_retarget()) { + /* + * Now re-target the CPEI to a different processor + */ + new_cpei_cpu = any_online_cpu(cpu_online_map); + mask = cpumask_of_cpu(new_cpei_cpu); + set_cpei_target_cpu(new_cpei_cpu); + desc = irq_descp(ia64_cpe_irq); + /* + * Switch for now, immediatly, we need to do fake intr + * as other interrupts, but need to study CPEI behaviour with + * polling before making changes. + */ + if (desc) { + desc->handler->disable(ia64_cpe_irq); + desc->handler->set_affinity(ia64_cpe_irq, mask); + desc->handler->enable(ia64_cpe_irq); + printk ("Re-targetting CPEI to cpu %d\n", new_cpei_cpu); + } + } + if (!desc) { + printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu); + retval = -EBUSY; + } + } + return retval; +} + /* must be called with cpucontrol mutex held */ int __cpu_disable(void) { @@ -643,8 +736,17 @@ int __cpu_disable(void) /* * dont permit boot processor for now */ - if (cpu == 0) - return -EBUSY; + if (cpu == 0 && !bsp_remove_ok) { + printk ("Your platform does not support removal of BSP\n"); + return (-EBUSY); + } + + cpu_clear(cpu, cpu_online_map); + + if (migrate_platform_irqs(cpu)) { + cpu_set(cpu, cpu_online_map); + return (-EBUSY); + } remove_siblinginfo(cpu); cpu_clear(cpu, cpu_online_map); diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 307d01e..ac16743 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -32,7 +32,7 @@ extern unsigned long wall_jiffies; -#define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */ +volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */ #ifdef CONFIG_IA64_DEBUG_IRQ @@ -71,7 +71,7 @@ timer_interrupt (int irq, void *dev_id, new_itm += local_cpu_data->itm_delta; - if (smp_processor_id() == TIME_KEEPER_ID) { + if (smp_processor_id() == time_keeper_id) { /* * Here we are in the timer irq handler. We have irqs locally * disabled, but we don't know if the timer_bh is running on @@ -236,6 +236,11 @@ static struct irqaction timer_irqaction .name = "timer" }; +void __devinit ia64_disable_timer(void) +{ + ia64_set_itv(1 << 16); +} + void __init time_init (void) { diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 6e5eea1..3b6fd79 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -36,7 +36,7 @@ int arch_register_cpu(int num) parent = &sysfs_nodes[cpu_to_node(num)]; #endif /* CONFIG_NUMA */ -#ifdef CONFIG_ACPI +#if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU) /* * If CPEI cannot be re-targetted, and this is * CPEI target, then dont create the control file diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index acaaec4..9855ba3 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -181,13 +181,15 @@ per_cpu_init (void) { void *cpu_data; int cpu; + static int first_time=1; /* * get_free_pages() cannot be used before cpu_init() done. BSP * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls * get_zeroed_page(). */ - if (smp_processor_id() == 0) { + if (first_time) { + first_time=0; cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS, PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); for (cpu = 0; cpu < NR_CPUS; cpu++) { diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index c87d6d1..573d5cc 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -528,12 +528,17 @@ void __init find_memory(void) void *per_cpu_init(void) { int cpu; + static int first_time = 1; + if (smp_processor_id() != 0) return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; - for (cpu = 0; cpu < NR_CPUS; cpu++) - per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; + if (first_time) { + first_time = 0; + for (cpu = 0; cpu < NR_CPUS; cpu++) + per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; + } return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; } diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile index 3e9b4ee..ab9c48c 100644 --- a/arch/ia64/sn/kernel/Makefile +++ b/arch/ia64/sn/kernel/Makefile @@ -10,7 +10,8 @@ CPPFLAGS += -I$(srctree)/arch/ia64/sn/include obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ - huberror.o io_init.o iomv.o klconflib.o sn2/ + huberror.o io_init.o iomv.o klconflib.o pio_phys.o \ + sn2/ obj-$(CONFIG_IA64_GENERIC) += machvec.o obj-$(CONFIG_SGI_TIOCX) += tiocx.o obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o diff --git a/arch/ia64/sn/kernel/pio_phys.S b/arch/ia64/sn/kernel/pio_phys.S new file mode 100644 index 0000000..3c7d48d --- /dev/null +++ b/arch/ia64/sn/kernel/pio_phys.S @@ -0,0 +1,71 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. + * + * This file contains macros used to access MMR registers via + * uncached physical addresses. + * pio_phys_read_mmr - read an MMR + * pio_phys_write_mmr - write an MMR + * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0 + * Second MMR will be skipped if address is NULL + * + * Addresses passed to these routines should be uncached physical addresses + * ie., 0x80000.... + */ + + + +#include +#include + +GLOBAL_ENTRY(pio_phys_read_mmr) + .prologue + .regstk 1,0,0,0 + .body + mov r2=psr + rsm psr.i | psr.dt + ;; + srlz.d + ld8.acq r8=[r32] + ;; + mov psr.l=r2;; + srlz.d + br.ret.sptk.many rp +END(pio_phys_read_mmr) + +GLOBAL_ENTRY(pio_phys_write_mmr) + .prologue + .regstk 2,0,0,0 + .body + mov r2=psr + rsm psr.i | psr.dt + ;; + srlz.d + st8.rel [r32]=r33 + ;; + mov psr.l=r2;; + srlz.d + br.ret.sptk.many rp +END(pio_phys_write_mmr) + +GLOBAL_ENTRY(pio_atomic_phys_write_mmrs) + .prologue + .regstk 4,0,0,0 + .body + mov r2=psr + cmp.ne p9,p0=r34,r0; + rsm psr.i | psr.dt | psr.ic + ;; + srlz.d + st8.rel [r32]=r33 +(p9) st8.rel [r34]=r35 + ;; + mov psr.l=r2;; + srlz.d + br.ret.sptk.many rp +END(pio_atomic_phys_write_mmrs) + + diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 5b84836..8b6d5c8 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved. */ #include @@ -498,6 +498,7 @@ void __init sn_setup(char **cmdline_p) * for sn. */ pm_power_off = ia64_sn_power_down; + current->thread.flags |= IA64_THREAD_MIGRATION; } /** @@ -660,7 +661,8 @@ void __init sn_cpu_init(void) SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3}; u64 *pio; pio = is_shub1() ? pio1 : pio2; - pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]); + pda->pio_write_status_addr = + (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]); pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0; } diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 24eefb2..fbcfc48 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -93,6 +93,27 @@ static inline unsigned long wait_piowc(v return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0; } +/** + * sn_migrate - SN-specific task migration actions + * @task: Task being migrated to new CPU + * + * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order. + * Context switching user threads which have memory-mapped MMIO may cause + * PIOs to issue from seperate CPUs, thus the PIO writes must be drained + * from the previous CPU's Shub before execution resumes on the new CPU. + */ +void sn_migrate(struct task_struct *task) +{ + pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu); + volatile unsigned long *adr = last_pda->pio_write_status_addr; + unsigned long val = last_pda->pio_write_status_val; + + /* Drain PIO writes from old CPU's Shub */ + while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) + != val)) + cpu_relax(); +} + void sn_tlb_migrate_finish(struct mm_struct *mm) { /* flush_tlb_mm is inefficient if more than 1 users of mm */ diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index e52831e..fa073cc 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -15,6 +15,124 @@ #include #include #include +#include + +/* + * 1/26/2006 + * + * WAR for SGI PV 944642. For revA TIOCE, need to use the following recipe + * (taken from the above PV) before and after accessing tioce internal MMR's + * to avoid tioce lockups. + * + * The recipe as taken from the PV: + * + * if(mmr address < 0x45000) { + * if(mmr address == 0 or 0x80) + * mmr wrt or read address 0xc0 + * else if(mmr address == 0x148 or 0x200) + * mmr wrt or read address 0x28 + * else + * mmr wrt or read address 0x158 + * + * do desired mmr access (rd or wrt) + * + * if(mmr address == 0x100) + * mmr wrt or read address 0x38 + * mmr wrt or read address 0xb050 + * } else + * do desired mmr access + * + * According to hw, we can use reads instead of writes to the above addres + * + * Note this WAR can only to be used for accessing internal MMR's in the + * TIOCE Coretalk Address Range 0x0 - 0x07ff_ffff. This includes the + * "Local CE Registers and Memories" and "PCI Compatible Config Space" address + * spaces from table 2-1 of the "CE Programmer's Reference Overview" document. + * + * All registers defined in struct tioce will meet that criteria. + */ + +static void inline +tioce_mmr_war_pre(struct tioce_kernel *kern, void *mmr_addr) +{ + u64 mmr_base; + u64 mmr_offset; + + if (kern->ce_common->ce_rev != TIOCE_REV_A) + return; + + mmr_base = kern->ce_common->ce_pcibus.bs_base; + mmr_offset = (u64)mmr_addr - mmr_base; + + if (mmr_offset < 0x45000) { + u64 mmr_war_offset; + + if (mmr_offset == 0 || mmr_offset == 0x80) + mmr_war_offset = 0xc0; + else if (mmr_offset == 0x148 || mmr_offset == 0x200) + mmr_war_offset = 0x28; + else + mmr_war_offset = 0x158; + + readq_relaxed((void *)(mmr_base + mmr_war_offset)); + } +} + +static void inline +tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr) +{ + u64 mmr_base; + u64 mmr_offset; + + if (kern->ce_common->ce_rev != TIOCE_REV_A) + return; + + mmr_base = kern->ce_common->ce_pcibus.bs_base; + mmr_offset = (u64)mmr_addr - mmr_base; + + if (mmr_offset < 0x45000) { + if (mmr_offset == 0x100) + readq_relaxed((void *)(mmr_base + 0x38)); + readq_relaxed((void *)(mmr_base + 0xb050)); + } +} + +/* load mmr contents into a variable */ +#define tioce_mmr_load(kern, mmrp, varp) do {\ + tioce_mmr_war_pre(kern, mmrp); \ + *(varp) = readq_relaxed(mmrp); \ + tioce_mmr_war_post(kern, mmrp); \ +} while (0) + +/* store variable contents into mmr */ +#define tioce_mmr_store(kern, mmrp, varp) do {\ + tioce_mmr_war_pre(kern, mmrp); \ + writeq(*varp, mmrp); \ + tioce_mmr_war_post(kern, mmrp); \ +} while (0) + +/* store immediate value into mmr */ +#define tioce_mmr_storei(kern, mmrp, val) do {\ + tioce_mmr_war_pre(kern, mmrp); \ + writeq(val, mmrp); \ + tioce_mmr_war_post(kern, mmrp); \ +} while (0) + +/* set bits (immediate value) into mmr */ +#define tioce_mmr_seti(kern, mmrp, bits) do {\ + u64 tmp; \ + tioce_mmr_load(kern, mmrp, &tmp); \ + tmp |= (bits); \ + tioce_mmr_store(kern, mmrp, &tmp); \ +} while (0) + +/* clear bits (immediate value) into mmr */ +#define tioce_mmr_clri(kern, mmrp, bits) do { \ + u64 tmp; \ + tioce_mmr_load(kern, mmrp, &tmp); \ + tmp &= ~(bits); \ + tioce_mmr_store(kern, mmrp, &tmp); \ +} while (0) /** * Bus address ranges for the 5 flavors of TIOCE DMA @@ -62,9 +180,9 @@ #define TIOCE_ATE_M40 2 #define TIOCE_ATE_M40S 3 -#define KB(x) ((x) << 10) -#define MB(x) ((x) << 20) -#define GB(x) ((x) << 30) +#define KB(x) ((u64)(x) << 10) +#define MB(x) ((u64)(x) << 20) +#define GB(x) ((u64)(x) << 30) /** * tioce_dma_d64 - create a DMA mapping using 64-bit direct mode @@ -151,7 +269,7 @@ tioce_alloc_map(struct tioce_kernel *ce_ int last; int entries; int nates; - int pagesize; + u64 pagesize; u64 *ate_shadow; u64 *ate_reg; u64 addr; @@ -228,7 +346,7 @@ tioce_alloc_map(struct tioce_kernel *ce_ ate = ATE_MAKE(addr, pagesize); ate_shadow[i + j] = ate; - writeq(ate, &ate_reg[i + j]); + tioce_mmr_storei(ce_kern, &ate_reg[i + j], ate); addr += pagesize; } @@ -272,7 +390,8 @@ tioce_dma_d32(struct pci_dev *pdev, u64 u64 tmp; ce_kern->ce_port[port].dirmap_shadow = ct_upper; - writeq(ct_upper, &ce_mmr->ce_ure_dir_map[port]); + tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port], + ct_upper); tmp = ce_mmr->ce_ure_dir_map[port]; dma_ok = 1; } else @@ -344,7 +463,8 @@ tioce_dma_unmap(struct pci_dev *pdev, dm if (TIOCE_D32_ADDR(bus_addr)) { if (--ce_kern->ce_port[port].dirmap_refcnt == 0) { ce_kern->ce_port[port].dirmap_shadow = 0; - writeq(0, &ce_mmr->ce_ure_dir_map[port]); + tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port], + 0); } } else { struct tioce_dmamap *map; @@ -365,7 +485,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dm } else if (--map->refcnt == 0) { for (i = 0; i < map->ate_count; i++) { map->ate_shadow[i] = 0; - map->ate_hw[i] = 0; + tioce_mmr_storei(ce_kern, &map->ate_hw[i], 0); } list_del(&map->ce_dmamap_list); @@ -486,7 +606,7 @@ tioce_do_dma_map(struct pci_dev *pdev, u spin_unlock_irqrestore(&ce_kern->ce_lock, flags); dma_map_done: - if (mapaddr & barrier) + if (mapaddr && barrier) mapaddr = tioce_dma_barrier(mapaddr, 1); return mapaddr; @@ -541,17 +661,61 @@ tioce_error_intr_handler(int irq, void * soft->ce_pcibus.bs_persist_segment, soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0); + if (ret_stuff.v0) + panic("tioce_error_intr_handler: Fatal TIOCE error"); + return IRQ_HANDLED; } /** + * tioce_reserve_m32 - reserve M32 ate's for the indicated address range + * @tioce_kernel: TIOCE context to reserve ate's for + * @base: starting bus address to reserve + * @limit: last bus address to reserve + * + * If base/limit falls within the range of bus space mapped through the + * M32 space, reserve the resources corresponding to the range. + */ +static void +tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit) +{ + int ate_index, last_ate, ps; + struct tioce *ce_mmr; + + if (!TIOCE_M32_ADDR(base)) + return; + + ce_mmr = (struct tioce *)ce_kern->ce_common->ce_pcibus.bs_base; + ps = ce_kern->ce_ate3240_pagesize; + ate_index = ATE_PAGE(base, ps); + last_ate = ate_index + ATE_NPAGES(base, limit-base+1, ps) - 1; + + if (ate_index < 64) + ate_index = 64; + + while (ate_index <= last_ate) { + u64 ate; + + ate = ATE_MAKE(0xdeadbeef, ps); + ce_kern->ce_ate3240_shadow[ate_index] = ate; + tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_ate3240[ate_index], + ate); + ate_index++; + } +} + +/** * tioce_kern_init - init kernel structures related to a given TIOCE * @tioce_common: ptr to a cached tioce_common struct that originated in prom - */ static struct tioce_kernel * + */ +static struct tioce_kernel * tioce_kern_init(struct tioce_common *tioce_common) { int i; + int ps; + int dev; u32 tmp; + unsigned int seg, bus; struct tioce *tioce_mmr; struct tioce_kernel *tioce_kern; @@ -572,9 +736,10 @@ tioce_kern_init(struct tioce_common *tio * here to use pci_read_config_xxx() so use the raw_pci_ops vector. */ - raw_pci_ops->read(tioce_common->ce_pcibus.bs_persist_segment, - tioce_common->ce_pcibus.bs_persist_busnum, - PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1, &tmp); + seg = tioce_common->ce_pcibus.bs_persist_segment; + bus = tioce_common->ce_pcibus.bs_persist_busnum; + + raw_pci_ops->read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp); tioce_kern->ce_port1_secondary = (u8) tmp; /* @@ -583,18 +748,76 @@ tioce_kern_init(struct tioce_common *tio */ tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base; - __sn_clrq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_PAGESIZE_MASK); - __sn_setq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_256K_PAGESIZE); - tioce_kern->ce_ate3240_pagesize = KB(256); + tioce_mmr_clri(tioce_kern, &tioce_mmr->ce_ure_page_map, + CE_URE_PAGESIZE_MASK); + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_ure_page_map, + CE_URE_256K_PAGESIZE); + ps = tioce_kern->ce_ate3240_pagesize = KB(256); for (i = 0; i < TIOCE_NUM_M40_ATES; i++) { tioce_kern->ce_ate40_shadow[i] = 0; - writeq(0, &tioce_mmr->ce_ure_ate40[i]); + tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate40[i], 0); } for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) { tioce_kern->ce_ate3240_shadow[i] = 0; - writeq(0, &tioce_mmr->ce_ure_ate3240[i]); + tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate3240[i], 0); + } + + /* + * Reserve ATE's corresponding to reserved address ranges. These + * include: + * + * Memory space covered by each PPB mem base/limit register + * Memory space covered by each PPB prefetch base/limit register + * + * These bus ranges are for pio (downstream) traffic only, and so + * cannot be used for DMA. + */ + + for (dev = 1; dev <= 2; dev++) { + u64 base, limit; + + /* mem base/limit */ + + raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), + PCI_MEMORY_BASE, 2, &tmp); + base = (u64)tmp << 16; + + raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), + PCI_MEMORY_LIMIT, 2, &tmp); + limit = (u64)tmp << 16; + limit |= 0xfffffUL; + + if (base < limit) + tioce_reserve_m32(tioce_kern, base, limit); + + /* + * prefetch mem base/limit. The tioce ppb's have 64-bit + * decoders, so read the upper portions w/o checking the + * attributes. + */ + + raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), + PCI_PREF_MEMORY_BASE, 2, &tmp); + base = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16; + + raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), + PCI_PREF_BASE_UPPER32, 4, &tmp); + base |= (u64)tmp << 32; + + raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), + PCI_PREF_MEMORY_LIMIT, 2, &tmp); + + limit = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16; + limit |= 0xfffffUL; + + raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), + PCI_PREF_LIMIT_UPPER32, 4, &tmp); + limit |= (u64)tmp << 32; + + if ((base < limit) && TIOCE_M32_ADDR(base)) + tioce_reserve_m32(tioce_kern, base, limit); } return tioce_kern; @@ -614,6 +837,7 @@ tioce_force_interrupt(struct sn_irq_info { struct pcidev_info *pcidev_info; struct tioce_common *ce_common; + struct tioce_kernel *ce_kern; struct tioce *ce_mmr; u64 force_int_val; @@ -629,6 +853,29 @@ tioce_force_interrupt(struct sn_irq_info ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; + ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private; + + /* + * TIOCE Rev A workaround (PV 945826), force an interrupt by writing + * the TIO_INTx register directly (1/26/2006) + */ + if (ce_common->ce_rev == TIOCE_REV_A) { + u64 int_bit_mask = (1ULL << sn_irq_info->irq_int_bit); + u64 status; + + tioce_mmr_load(ce_kern, &ce_mmr->ce_adm_int_status, &status); + if (status & int_bit_mask) { + u64 force_irq = (1 << 8) | sn_irq_info->irq_irq; + u64 ctalk = sn_irq_info->irq_xtalkaddr; + u64 nasid, offset; + + nasid = (ctalk & CTALK_NASID_MASK) >> CTALK_NASID_SHFT; + offset = (ctalk & CTALK_NODE_OFFSET); + HUB_S(TIO_IOSPACE_ADDR(nasid, offset), force_irq); + } + + return; + } /* * irq_int_bit is originally set up by prom, and holds the interrupt @@ -666,7 +913,7 @@ tioce_force_interrupt(struct sn_irq_info default: return; } - writeq(force_int_val, &ce_mmr->ce_adm_force_int); + tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_force_int, force_int_val); } /** @@ -685,6 +932,7 @@ tioce_target_interrupt(struct sn_irq_inf { struct pcidev_info *pcidev_info; struct tioce_common *ce_common; + struct tioce_kernel *ce_kern; struct tioce *ce_mmr; int bit; u64 vector; @@ -695,14 +943,15 @@ tioce_target_interrupt(struct sn_irq_inf ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; + ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private; bit = sn_irq_info->irq_int_bit; - __sn_setq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit)); + tioce_mmr_seti(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit)); vector = (u64)sn_irq_info->irq_irq << INTR_VECTOR_SHFT; vector |= sn_irq_info->irq_xtalkaddr; - writeq(vector, &ce_mmr->ce_adm_int_dest[bit]); - __sn_clrq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit)); + tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_int_dest[bit], vector); + tioce_mmr_clri(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit)); tioce_force_interrupt(sn_irq_info); } @@ -721,7 +970,11 @@ tioce_target_interrupt(struct sn_irq_inf static void * tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) { + int my_nasid; + cnodeid_t my_cnode, mem_cnode; struct tioce_common *tioce_common; + struct tioce_kernel *tioce_kern; + struct tioce *tioce_mmr; /* * Allocate kernel bus soft and copy from prom. @@ -734,11 +987,23 @@ tioce_bus_fixup(struct pcibus_bussoft *p memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common)); tioce_common->ce_pcibus.bs_base |= __IA64_UNCACHED_OFFSET; - if (tioce_kern_init(tioce_common) == NULL) { + tioce_kern = tioce_kern_init(tioce_common); + if (tioce_kern == NULL) { kfree(tioce_common); return NULL; } + /* + * Clear out any transient errors before registering the error + * interrupt handler. + */ + + tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base; + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL); + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias, + ~0ULL); + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, ~0ULL); + if (request_irq(SGI_PCIASIC_ERROR, tioce_error_intr_handler, SA_SHIRQ, "TIOCE error", (void *)tioce_common)) @@ -750,6 +1015,21 @@ tioce_bus_fixup(struct pcibus_bussoft *p tioce_common->ce_pcibus.bs_persist_segment, tioce_common->ce_pcibus.bs_persist_busnum); + /* + * identify closest nasid for memory allocations + */ + + my_nasid = NASID_GET(tioce_common->ce_pcibus.bs_base); + my_cnode = nasid_to_cnodeid(my_nasid); + + if (sn_hwperf_get_nearest_node(my_cnode, &mem_cnode, NULL) < 0) { + printk(KERN_WARNING "tioce_bus_fixup: failed to find " + "closest node with MEM to TIO node %d\n", my_cnode); + mem_cnode = (cnodeid_t)-1; /* use any node */ + } + + controller->node = mem_cnode; + return tioce_common; } diff --git a/drivers/char/snsc.h b/drivers/char/snsc.h index a9efc13..8a98169 100644 --- a/drivers/char/snsc.h +++ b/drivers/char/snsc.h @@ -5,7 +5,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved. */ /* @@ -70,6 +70,9 @@ struct sysctl_data_s { #define EV_CLASS_TEST_WARNING 0x6000ul #define EV_CLASS_PWRD_NOTIFY 0x8000ul +/* ENV class codes */ +#define ENV_PWRDN_PEND 0x4101ul + #define EV_SEVERITY_POWER_STABLE 0x0000ul #define EV_SEVERITY_POWER_LOW_WARNING 0x0100ul #define EV_SEVERITY_POWER_HIGH_WARNING 0x0200ul diff --git a/drivers/char/snsc_event.c b/drivers/char/snsc_event.c index baaa365..a4fa507 100644 --- a/drivers/char/snsc_event.c +++ b/drivers/char/snsc_event.c @@ -5,7 +5,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved. */ /* @@ -187,7 +187,8 @@ scdrv_event_severity(int code) static void scdrv_dispatch_event(char *event, int len) { - int code, esp_code, src; + static int snsc_shutting_down = 0; + int code, esp_code, src, class; char desc[CHUNKSIZE]; char *severity; @@ -199,9 +200,25 @@ scdrv_dispatch_event(char *event, int le /* how urgent is the message? */ severity = scdrv_event_severity(code); - if ((code & EV_CLASS_MASK) == EV_CLASS_PWRD_NOTIFY) { + class = (code & EV_CLASS_MASK); + + if (class == EV_CLASS_PWRD_NOTIFY || code == ENV_PWRDN_PEND) { struct task_struct *p; + if (snsc_shutting_down) + return; + + snsc_shutting_down = 1; + + /* give a message for each type of event */ + if (class == EV_CLASS_PWRD_NOTIFY) + printk(KERN_NOTICE "Power off indication received." + " Sending SIGPWR to init...\n"); + else if (code == ENV_PWRDN_PEND) + printk(KERN_CRIT "WARNING: Shutting down the system" + " due to a critical environmental condition." + " Sending SIGPWR to init...\n"); + /* give a SIGPWR signal to init proc */ /* first find init's task */ @@ -210,12 +227,11 @@ scdrv_dispatch_event(char *event, int le if (p->pid == 1) break; } - if (p) { /* we found init's task */ - printk(KERN_EMERG "Power off indication received. Initiating power fail sequence...\n"); + if (p) { force_sig(SIGPWR, p); - } else { /* failed to find init's task - just give message(s) */ - printk(KERN_WARNING "Failed to find init proc to handle power off!\n"); - printk("%s|$(0x%x)%s\n", severity, esp_code, desc); + } else { + printk(KERN_ERR "Failed to signal init!\n"); + snsc_shutting_down = 0; /* so can try again (?) */ } read_unlock(&tasklist_lock); } else { diff --git a/drivers/sn/ioc4.c b/drivers/sn/ioc4.c index ea75b3d..67140a5 100644 --- a/drivers/sn/ioc4.c +++ b/drivers/sn/ioc4.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include @@ -54,11 +54,10 @@ * Submodule management * ************************/ -static LIST_HEAD(ioc4_devices); -static DECLARE_RWSEM(ioc4_devices_rwsem); +static DEFINE_MUTEX(ioc4_mutex); +static LIST_HEAD(ioc4_devices); static LIST_HEAD(ioc4_submodules); -static DECLARE_RWSEM(ioc4_submodules_rwsem); /* Register an IOC4 submodule */ int @@ -66,15 +65,13 @@ ioc4_register_submodule(struct ioc4_subm { struct ioc4_driver_data *idd; - down_write(&ioc4_submodules_rwsem); + mutex_lock(&ioc4_mutex); list_add(&is->is_list, &ioc4_submodules); - up_write(&ioc4_submodules_rwsem); /* Initialize submodule for each IOC4 */ if (!is->is_probe) - return 0; + goto out; - down_read(&ioc4_devices_rwsem); list_for_each_entry(idd, &ioc4_devices, idd_list) { if (is->is_probe(idd)) { printk(KERN_WARNING @@ -84,8 +81,8 @@ ioc4_register_submodule(struct ioc4_subm pci_name(idd->idd_pdev)); } } - up_read(&ioc4_devices_rwsem); - + out: + mutex_unlock(&ioc4_mutex); return 0; } @@ -95,15 +92,13 @@ ioc4_unregister_submodule(struct ioc4_su { struct ioc4_driver_data *idd; - down_write(&ioc4_submodules_rwsem); + mutex_lock(&ioc4_mutex); list_del(&is->is_list); - up_write(&ioc4_submodules_rwsem); /* Remove submodule for each IOC4 */ if (!is->is_remove) - return; + goto out; - down_read(&ioc4_devices_rwsem); list_for_each_entry(idd, &ioc4_devices, idd_list) { if (is->is_remove(idd)) { printk(KERN_WARNING @@ -113,7 +108,8 @@ ioc4_unregister_submodule(struct ioc4_su pci_name(idd->idd_pdev)); } } - up_read(&ioc4_devices_rwsem); + out: + mutex_unlock(&ioc4_mutex); } /********************* @@ -312,12 +308,11 @@ ioc4_probe(struct pci_dev *pdev, const s /* Track PCI-device specific data */ idd->idd_serial_data = NULL; pci_set_drvdata(idd->idd_pdev, idd); - down_write(&ioc4_devices_rwsem); + + mutex_lock(&ioc4_mutex); list_add(&idd->idd_list, &ioc4_devices); - up_write(&ioc4_devices_rwsem); /* Add this IOC4 to all submodules */ - down_read(&ioc4_submodules_rwsem); list_for_each_entry(is, &ioc4_submodules, is_list) { if (is->is_probe && is->is_probe(idd)) { printk(KERN_WARNING @@ -327,7 +322,7 @@ ioc4_probe(struct pci_dev *pdev, const s pci_name(idd->idd_pdev)); } } - up_read(&ioc4_submodules_rwsem); + mutex_unlock(&ioc4_mutex); return 0; @@ -351,7 +346,7 @@ ioc4_remove(struct pci_dev *pdev) idd = pci_get_drvdata(pdev); /* Remove this IOC4 from all submodules */ - down_read(&ioc4_submodules_rwsem); + mutex_lock(&ioc4_mutex); list_for_each_entry(is, &ioc4_submodules, is_list) { if (is->is_remove && is->is_remove(idd)) { printk(KERN_WARNING @@ -361,7 +356,7 @@ ioc4_remove(struct pci_dev *pdev) pci_name(idd->idd_pdev)); } } - up_read(&ioc4_submodules_rwsem); + mutex_unlock(&ioc4_mutex); /* Release resources */ iounmap(idd->idd_misc_regs); @@ -377,9 +372,9 @@ ioc4_remove(struct pci_dev *pdev) pci_disable_device(pdev); /* Remove and free driver data */ - down_write(&ioc4_devices_rwsem); + mutex_lock(&ioc4_mutex); list_del(&idd->idd_list); - up_write(&ioc4_devices_rwsem); + mutex_unlock(&ioc4_mutex); kfree(idd); } diff --git a/include/asm-ia64/intel_intrin.h b/include/asm-ia64/intel_intrin.h index a7122d8..d069b6a 100644 --- a/include/asm-ia64/intel_intrin.h +++ b/include/asm-ia64/intel_intrin.h @@ -5,113 +5,10 @@ * * Copyright (C) 2002,2003 Jun Nakajima * Copyright (C) 2002,2003 Suresh Siddha + * Copyright (C) 2005,2006 Hongjiu Lu * */ -#include - -void __lfetch(int lfhint, void *y); -void __lfetch_excl(int lfhint, void *y); -void __lfetch_fault(int lfhint, void *y); -void __lfetch_fault_excl(int lfhint, void *y); - -/* In the following, whichFloatReg should be an integer from 0-127 */ -void __ldfs(const int whichFloatReg, void *src); -void __ldfd(const int whichFloatReg, void *src); -void __ldfe(const int whichFloatReg, void *src); -void __ldf8(const int whichFloatReg, void *src); -void __ldf_fill(const int whichFloatReg, void *src); -void __stfs(void *dst, const int whichFloatReg); -void __stfd(void *dst, const int whichFloatReg); -void __stfe(void *dst, const int whichFloatReg); -void __stf8(void *dst, const int whichFloatReg); -void __stf_spill(void *dst, const int whichFloatReg); - -void __st1_rel(void *dst, const __s8 value); -void __st2_rel(void *dst, const __s16 value); -void __st4_rel(void *dst, const __s32 value); -void __st8_rel(void *dst, const __s64 value); -__u8 __ld1_acq(void *src); -__u16 __ld2_acq(void *src); -__u32 __ld4_acq(void *src); -__u64 __ld8_acq(void *src); - -__u64 __fetchadd4_acq(__u32 *addend, const int increment); -__u64 __fetchadd4_rel(__u32 *addend, const int increment); -__u64 __fetchadd8_acq(__u64 *addend, const int increment); -__u64 __fetchadd8_rel(__u64 *addend, const int increment); - -__u64 __getf_exp(double d); - -/* OS Related Itanium(R) Intrinsics */ - -/* The names to use for whichReg and whichIndReg below come from - the include file asm/ia64regs.h */ - -__u64 __getIndReg(const int whichIndReg, __s64 index); -__u64 __getReg(const int whichReg); - -void __setIndReg(const int whichIndReg, __s64 index, __u64 value); -void __setReg(const int whichReg, __u64 value); - -void __mf(void); -void __mfa(void); -void __synci(void); -void __itcd(__s64 pa); -void __itci(__s64 pa); -void __itrd(__s64 whichTransReg, __s64 pa); -void __itri(__s64 whichTransReg, __s64 pa); -void __ptce(__s64 va); -void __ptcl(__s64 va, __s64 pagesz); -void __ptcg(__s64 va, __s64 pagesz); -void __ptcga(__s64 va, __s64 pagesz); -void __ptri(__s64 va, __s64 pagesz); -void __ptrd(__s64 va, __s64 pagesz); -void __invala (void); -void __invala_gr(const int whichGeneralReg /* 0-127 */ ); -void __invala_fr(const int whichFloatReg /* 0-127 */ ); -void __nop(const int); -void __fc(__u64 *addr); -void __sum(int mask); -void __rum(int mask); -void __ssm(int mask); -void __rsm(int mask); -__u64 __thash(__s64); -__u64 __ttag(__s64); -__s64 __tpa(__s64); - -/* Intrinsics for implementing get/put_user macros */ -void __st_user(const char *tableName, __u64 addr, char size, char relocType, __u64 val); -void __ld_user(const char *tableName, __u64 addr, char size, char relocType); - -/* This intrinsic does not generate code, it creates a barrier across which - * the compiler will not schedule data access instructions. - */ -void __memory_barrier(void); - -void __isrlz(void); -void __dsrlz(void); - -__u64 _m64_mux1(__u64 a, const int n); -__u64 __thash(__u64); - -/* Lock and Atomic Operation Related Intrinsics */ -__u64 _InterlockedExchange8(volatile __u8 *trgt, __u8 value); -__u64 _InterlockedExchange16(volatile __u16 *trgt, __u16 value); -__s64 _InterlockedExchange(volatile __u32 *trgt, __u32 value); -__s64 _InterlockedExchange64(volatile __u64 *trgt, __u64 value); - -__u64 _InterlockedCompareExchange8_rel(volatile __u8 *dest, __u64 xchg, __u64 comp); -__u64 _InterlockedCompareExchange8_acq(volatile __u8 *dest, __u64 xchg, __u64 comp); -__u64 _InterlockedCompareExchange16_rel(volatile __u16 *dest, __u64 xchg, __u64 comp); -__u64 _InterlockedCompareExchange16_acq(volatile __u16 *dest, __u64 xchg, __u64 comp); -__u64 _InterlockedCompareExchange_rel(volatile __u32 *dest, __u64 xchg, __u64 comp); -__u64 _InterlockedCompareExchange_acq(volatile __u32 *dest, __u64 xchg, __u64 comp); -__u64 _InterlockedCompareExchange64_rel(volatile __u64 *dest, __u64 xchg, __u64 comp); -__u64 _InterlockedCompareExchange64_acq(volatile __u64 *dest, __u64 xchg, __u64 comp); - -__s64 _m64_dep_mi(const int v, __s64 s, const int p, const int len); -__s64 _m64_shrp(__s64 a, __s64 b, const int count); -__s64 _m64_popcnt(__s64 a); +#include #define ia64_barrier() __memory_barrier() @@ -122,15 +19,16 @@ __s64 _m64_popcnt(__s64 a); #define ia64_getreg __getReg #define ia64_setreg __setReg -#define ia64_hint(x) +#define ia64_hint __hint +#define ia64_hint_pause __hint_pause -#define ia64_mux1_brcst 0 -#define ia64_mux1_mix 8 -#define ia64_mux1_shuf 9 -#define ia64_mux1_alt 10 -#define ia64_mux1_rev 11 +#define ia64_mux1_brcst _m64_mux1_brcst +#define ia64_mux1_mix _m64_mux1_mix +#define ia64_mux1_shuf _m64_mux1_shuf +#define ia64_mux1_alt _m64_mux1_alt +#define ia64_mux1_rev _m64_mux1_rev -#define ia64_mux1 _m64_mux1 +#define ia64_mux1(x,v) _m_to_int64(_m64_mux1(_m_from_int64(x), (v))) #define ia64_popcnt _m64_popcnt #define ia64_getf_exp __getf_exp #define ia64_shrp _m64_shrp @@ -158,7 +56,7 @@ __s64 _m64_popcnt(__s64 a); #define ia64_stf8 __stf8 #define ia64_stf_spill __stf_spill -#define ia64_mf __mf +#define ia64_mf __mf #define ia64_mfa __mfa #define ia64_fetchadd4_acq __fetchadd4_acq @@ -234,10 +132,10 @@ __s64 _m64_popcnt(__s64 a); /* Values for lfhint in __lfetch and __lfetch_fault */ -#define ia64_lfhint_none 0 -#define ia64_lfhint_nt1 1 -#define ia64_lfhint_nt2 2 -#define ia64_lfhint_nta 3 +#define ia64_lfhint_none __lfhint_none +#define ia64_lfhint_nt1 __lfhint_nt1 +#define ia64_lfhint_nt2 __lfhint_nt2 +#define ia64_lfhint_nta __lfhint_nta #define ia64_lfetch __lfetch #define ia64_lfetch_excl __lfetch_excl @@ -254,4 +152,6 @@ do { \ } \ } while (0) +#define __builtin_trap() __break(0); + #endif /* _ASM_IA64_INTEL_INTRIN_H */ diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h index ca5ea99..c3e4ed8 100644 --- a/include/asm-ia64/machvec.h +++ b/include/asm-ia64/machvec.h @@ -20,6 +20,7 @@ struct scatterlist; struct page; struct mm_struct; struct pci_bus; +struct task_struct; typedef void ia64_mv_setup_t (char **); typedef void ia64_mv_cpu_init_t (void); @@ -34,6 +35,7 @@ typedef int ia64_mv_pci_legacy_read_t (s u8 size); typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val, u8 size); +typedef void ia64_mv_migrate_t(struct task_struct * task); /* DMA-mapping interface: */ typedef void ia64_mv_dma_init (void); @@ -85,6 +87,11 @@ machvec_noop_mm (struct mm_struct *mm) { } +static inline void +machvec_noop_task (struct task_struct *task) +{ +} + extern void machvec_setup (char **); extern void machvec_timer_interrupt (int, void *, struct pt_regs *); extern void machvec_dma_sync_single (struct device *, dma_addr_t, size_t, int); @@ -146,6 +153,7 @@ extern void machvec_tlb_migrate_finish ( # define platform_readw_relaxed ia64_mv.readw_relaxed # define platform_readl_relaxed ia64_mv.readl_relaxed # define platform_readq_relaxed ia64_mv.readq_relaxed +# define platform_migrate ia64_mv.migrate # endif /* __attribute__((__aligned__(16))) is required to make size of the @@ -194,6 +202,7 @@ struct ia64_machine_vector { ia64_mv_readw_relaxed_t *readw_relaxed; ia64_mv_readl_relaxed_t *readl_relaxed; ia64_mv_readq_relaxed_t *readq_relaxed; + ia64_mv_migrate_t *migrate; } __attribute__((__aligned__(16))); /* align attrib? see above comment */ #define MACHVEC_INIT(name) \ @@ -238,6 +247,7 @@ struct ia64_machine_vector { platform_readw_relaxed, \ platform_readl_relaxed, \ platform_readq_relaxed, \ + platform_migrate, \ } extern struct ia64_machine_vector ia64_mv; @@ -386,5 +396,8 @@ extern ia64_mv_dma_supported swiotlb_dm #ifndef platform_readq_relaxed # define platform_readq_relaxed __ia64_readq_relaxed #endif +#ifndef platform_migrate +# define platform_migrate machvec_noop_task +#endif #endif /* _ASM_IA64_MACHVEC_H */ diff --git a/include/asm-ia64/machvec_sn2.h b/include/asm-ia64/machvec_sn2.h index 03d00fa..da1d437 100644 --- a/include/asm-ia64/machvec_sn2.h +++ b/include/asm-ia64/machvec_sn2.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2003, 2006 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2002-2003,2006 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -66,6 +66,7 @@ extern ia64_mv_dma_sync_single_for_devic extern ia64_mv_dma_sync_sg_for_device sn_dma_sync_sg_for_device; extern ia64_mv_dma_mapping_error sn_dma_mapping_error; extern ia64_mv_dma_supported sn_dma_supported; +extern ia64_mv_migrate_t sn_migrate; /* * This stuff has dual use! @@ -115,6 +116,7 @@ extern ia64_mv_dma_supported sn_dma_sup #define platform_dma_sync_sg_for_device sn_dma_sync_sg_for_device #define platform_dma_mapping_error sn_dma_mapping_error #define platform_dma_supported sn_dma_supported +#define platform_migrate sn_migrate #include diff --git a/include/asm-ia64/mca.h b/include/asm-ia64/mca.h index c7d9c9e..bfbbb8d 100644 --- a/include/asm-ia64/mca.h +++ b/include/asm-ia64/mca.h @@ -131,6 +131,8 @@ struct ia64_mca_cpu { /* Array of physical addresses of each CPU's MCA area. */ extern unsigned long __per_cpu_mca[NR_CPUS]; +extern int cpe_vector; +extern int ia64_cpe_irq; extern void ia64_mca_init(void); extern void ia64_mca_cpu_init(void *); extern void ia64_os_mca_dispatch(void); diff --git a/include/asm-ia64/mutex.h b/include/asm-ia64/mutex.h index 458c1f7..5a3224f 100644 --- a/include/asm-ia64/mutex.h +++ b/include/asm-ia64/mutex.h @@ -1,9 +1,92 @@ /* - * Pull in the generic implementation for the mutex fastpath. + * ia64 implementation of the mutex fastpath. * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) + * Copyright (C) 2006 Ken Chen + * + */ + +#ifndef _ASM_MUTEX_H +#define _ASM_MUTEX_H + +/** + * __mutex_fastpath_lock - try to take the lock by moving the count + * from 1 to a 0 value + * @count: pointer of type atomic_t + * @fail_fn: function to call if the original value was not 1 + * + * Change the count from 1 to a value lower than 1, and call if + * it wasn't 1 originally. This function MUST leave the value lower than + * 1 even when the "1" assertion wasn't true. + */ +static inline void +__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) +{ + if (unlikely(ia64_fetchadd4_acq(count, -1) != 1)) + fail_fn(count); +} + +/** + * __mutex_fastpath_lock_retval - try to take the lock by moving the count + * from 1 to a 0 value + * @count: pointer of type atomic_t + * @fail_fn: function to call if the original value was not 1 + * + * Change the count from 1 to a value lower than 1, and call if + * it wasn't 1 originally. This function returns 0 if the fastpath succeeds, + * or anything the slow path function returns. + */ +static inline int +__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) +{ + if (unlikely(ia64_fetchadd4_acq(count, -1) != 1)) + return fail_fn(count); + return 0; +} + +/** + * __mutex_fastpath_unlock - try to promote the count from 0 to 1 + * @count: pointer of type atomic_t + * @fail_fn: function to call if the original value was not 0 + * + * Try to promote the count from 0 to 1. If it wasn't 0, call . + * In the failure case, this function is allowed to either set the value to + * 1, or to set it to a value lower than 1. + * + * If the implementation sets it to a value of lower than 1, then the + * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs + * to return 0 otherwise. + */ +static inline void +__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) +{ + int ret = ia64_fetchadd4_rel(count, 1); + if (unlikely(ret < 0)) + fail_fn(count); +} + +#define __mutex_slowpath_needs_to_unlock() 1 + +/** + * __mutex_fastpath_trylock - try to acquire the mutex, without waiting + * + * @count: pointer of type atomic_t + * @fail_fn: fallback function + * + * Change the count from 1 to a value lower than 1, and return 0 (failure) + * if it wasn't 1 originally, or return 1 (success) otherwise. This function + * MUST leave the value lower than 1 even when the "1" assertion wasn't true. + * Additionally, if the value was < 0 originally, this function must not leave + * it to 0 on failure. + * + * If the architecture has no effective trylock variant, it should call the + * spinlock-based trylock variant unconditionally. */ +static inline int +__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) +{ + if (likely(cmpxchg_acq(count, 1, 0)) == 1) + return 1; + return 0; +} -#include +#endif diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index 23c8e1b..128fefd 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h @@ -50,7 +50,8 @@ #define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */ #define IA64_THREAD_UAC_NOPRINT (__IA64_UL(1) << 3) /* don't log unaligned accesses */ #define IA64_THREAD_UAC_SIGBUS (__IA64_UL(1) << 4) /* generate SIGBUS on unaligned acc. */ - /* bit 5 is currently unused */ +#define IA64_THREAD_MIGRATION (__IA64_UL(1) << 5) /* require migration + sync at ctx sw */ #define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6) /* don't log any fpswa faults */ #define IA64_THREAD_FPEMU_SIGFPE (__IA64_UL(1) << 7) /* send a SIGFPE for fpswa faults */ diff --git a/include/asm-ia64/signal.h b/include/asm-ia64/signal.h index 608168d..5e328ed 100644 --- a/include/asm-ia64/signal.h +++ b/include/asm-ia64/signal.h @@ -158,8 +158,6 @@ struct k_sigaction { #define ptrace_signal_deliver(regs, cookie) do { } while (0) -void set_sigdelayed(pid_t pid, int signo, int code, void __user *addr); - #endif /* __KERNEL__ */ # endif /* !__ASSEMBLY__ */ diff --git a/include/asm-ia64/sn/addrs.h b/include/asm-ia64/sn/addrs.h index 2c32e4b..1d9efe5 100644 --- a/include/asm-ia64/sn/addrs.h +++ b/include/asm-ia64/sn/addrs.h @@ -283,5 +283,13 @@ #define REMOTE_HUB_L(n, a) HUB_L(REMOTE_HUB_ADDR((n), (a))) #define REMOTE_HUB_S(n, a, d) HUB_S(REMOTE_HUB_ADDR((n), (a)), (d)) +/* + * Coretalk address breakdown + */ +#define CTALK_NASID_SHFT 40 +#define CTALK_NASID_MASK (0x3FFFULL << CTALK_NASID_SHFT) +#define CTALK_CID_SHFT 38 +#define CTALK_CID_MASK (0x3ULL << CTALK_CID_SHFT) +#define CTALK_NODE_OFFSET 0x3FFFFFFFFF #endif /* _ASM_IA64_SN_ADDRS_H */ diff --git a/include/asm-ia64/sn/rw_mmr.h b/include/asm-ia64/sn/rw_mmr.h index f40fd1a..2d78f4c 100644 --- a/include/asm-ia64/sn/rw_mmr.h +++ b/include/asm-ia64/sn/rw_mmr.h @@ -3,15 +3,14 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2002-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (C) 2002-2006 Silicon Graphics, Inc. All Rights Reserved. */ #ifndef _ASM_IA64_SN_RW_MMR_H #define _ASM_IA64_SN_RW_MMR_H /* - * This file contains macros used to access MMR registers via - * uncached physical addresses. + * This file that access MMRs via uncached physical addresses. * pio_phys_read_mmr - read an MMR * pio_phys_write_mmr - write an MMR * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0 @@ -22,53 +21,8 @@ */ -extern inline long -pio_phys_read_mmr(volatile long *mmr) -{ - long val; - asm volatile - ("mov r2=psr;;" - "rsm psr.i | psr.dt;;" - "srlz.i;;" - "ld8.acq %0=[%1];;" - "mov psr.l=r2;;" - "srlz.i;;" - : "=r"(val) - : "r"(mmr) - : "r2"); - return val; -} - - - -extern inline void -pio_phys_write_mmr(volatile long *mmr, long val) -{ - asm volatile - ("mov r2=psr;;" - "rsm psr.i | psr.dt;;" - "srlz.i;;" - "st8.rel [%0]=%1;;" - "mov psr.l=r2;;" - "srlz.i;;" - :: "r"(mmr), "r"(val) - : "r2", "memory"); -} - -extern inline void -pio_atomic_phys_write_mmrs(volatile long *mmr1, long val1, volatile long *mmr2, long val2) -{ - asm volatile - ("mov r2=psr;;" - "rsm psr.i | psr.dt | psr.ic;;" - "cmp.ne p9,p0=%2,r0;" - "srlz.i;;" - "st8.rel [%0]=%1;" - "(p9) st8.rel [%2]=%3;;" - "mov psr.l=r2;;" - "srlz.i;;" - :: "r"(mmr1), "r"(val1), "r"(mmr2), "r"(val2) - : "p9", "r2", "memory"); -} +extern long pio_phys_read_mmr(volatile long *mmr); +extern void pio_phys_write_mmr(volatile long *mmr, long val); +extern void pio_atomic_phys_write_mmrs(volatile long *mmr1, long val1, volatile long *mmr2, long val2); #endif /* _ASM_IA64_SN_RW_MMR_H */ diff --git a/include/asm-ia64/sn/tioce.h b/include/asm-ia64/sn/tioce.h index d4c9907..893468e 100644 --- a/include/asm-ia64/sn/tioce.h +++ b/include/asm-ia64/sn/tioce.h @@ -11,7 +11,7 @@ /* CE ASIC part & mfgr information */ #define TIOCE_PART_NUM 0xCE00 -#define TIOCE_MFGR_NUM 0x36 +#define TIOCE_SRC_ID 0x01 #define TIOCE_REV_A 0x1 /* CE Virtual PPB Vendor/Device IDs */ @@ -20,7 +20,7 @@ /* CE Host Bridge Vendor/Device IDs */ #define CE_HOST_BRIDGE_VENDOR_ID 0x10a9 -#define CE_HOST_BRIDGE_DEVICE_ID 0x4003 +#define CE_HOST_BRIDGE_DEVICE_ID 0x4001 #define TIOCE_NUM_M40_ATES 4096 @@ -463,6 +463,25 @@ typedef volatile struct tioce { u64 ce_end_of_struct; /* 0x044400 */ } tioce_t; +/* ce_lsiX_gb_cfg1 register bit masks & shifts */ +#define CE_LSI_GB_CFG1_RXL0S_THS_SHFT 0 +#define CE_LSI_GB_CFG1_RXL0S_THS_MASK (0xffULL << 0) +#define CE_LSI_GB_CFG1_RXL0S_SMP_SHFT 8 +#define CE_LSI_GB_CFG1_RXL0S_SMP_MASK (0xfULL << 8); +#define CE_LSI_GB_CFG1_RXL0S_ADJ_SHFT 12 +#define CE_LSI_GB_CFG1_RXL0S_ADJ_MASK (0x7ULL << 12) +#define CE_LSI_GB_CFG1_RXL0S_FLT_SHFT 15 +#define CE_LSI_GB_CFG1_RXL0S_FLT_MASK (0x1ULL << 15) +#define CE_LSI_GB_CFG1_LPBK_SEL_SHFT 16 +#define CE_LSI_GB_CFG1_LPBK_SEL_MASK (0x3ULL << 16) +#define CE_LSI_GB_CFG1_LPBK_EN_SHFT 18 +#define CE_LSI_GB_CFG1_LPBK_EN_MASK (0x1ULL << 18) +#define CE_LSI_GB_CFG1_RVRS_LB_SHFT 19 +#define CE_LSI_GB_CFG1_RVRS_LB_MASK (0x1ULL << 19) +#define CE_LSI_GB_CFG1_RVRS_CLK_SHFT 20 +#define CE_LSI_GB_CFG1_RVRS_CLK_MASK (0x3ULL << 20) +#define CE_LSI_GB_CFG1_SLF_TS_SHFT 24 +#define CE_LSI_GB_CFG1_SLF_TS_MASK (0xfULL << 24) /* ce_adm_int_mask/ce_adm_int_status register bit defines */ #define CE_ADM_INT_CE_ERROR_SHFT 0 @@ -592,6 +611,11 @@ typedef volatile struct tioce { #define CE_URE_RD_MRG_ENABLE (0x1ULL << 0) #define CE_URE_WRT_MRG_ENABLE1 (0x1ULL << 4) #define CE_URE_WRT_MRG_ENABLE2 (0x1ULL << 5) +#define CE_URE_WRT_MRG_TIMER_SHFT 12 +#define CE_URE_WRT_MRG_TIMER_MASK (0x7FFULL << CE_URE_WRT_MRG_TIMER_SHFT) +#define CE_URE_WRT_MRG_TIMER(x) (((u64)(x) << \ + CE_URE_WRT_MRG_TIMER_SHFT) & \ + CE_URE_WRT_MRG_TIMER_MASK) #define CE_URE_RSPQ_BYPASS_DISABLE (0x1ULL << 24) #define CE_URE_UPS_DAT1_PAR_DISABLE (0x1ULL << 32) #define CE_URE_UPS_HDR1_PAR_DISABLE (0x1ULL << 33) @@ -653,8 +677,12 @@ typedef volatile struct tioce { #define CE_URE_SI (0x1ULL << 0) #define CE_URE_ELAL_SHFT 4 #define CE_URE_ELAL_MASK (0x7ULL << CE_URE_ELAL_SHFT) +#define CE_URE_ELAL_SET(n) (((u64)(n) << CE_URE_ELAL_SHFT) & \ + CE_URE_ELAL_MASK) #define CE_URE_ELAL1_SHFT 8 #define CE_URE_ELAL1_MASK (0x7ULL << CE_URE_ELAL1_SHFT) +#define CE_URE_ELAL1_SET(n) (((u64)(n) << CE_URE_ELAL1_SHFT) & \ + CE_URE_ELAL1_MASK) #define CE_URE_SCC (0x1ULL << 12) #define CE_URE_PN1_SHFT 16 #define CE_URE_PN1_MASK (0xFFULL << CE_URE_PN1_SHFT) @@ -675,8 +703,12 @@ typedef volatile struct tioce { #define CE_URE_HPC (0x1ULL << 6) #define CE_URE_SPLV_SHFT 7 #define CE_URE_SPLV_MASK (0xFFULL << CE_URE_SPLV_SHFT) +#define CE_URE_SPLV_SET(n) (((u64)(n) << CE_URE_SPLV_SHFT) & \ + CE_URE_SPLV_MASK) #define CE_URE_SPLS_SHFT 15 #define CE_URE_SPLS_MASK (0x3ULL << CE_URE_SPLS_SHFT) +#define CE_URE_SPLS_SET(n) (((u64)(n) << CE_URE_SPLS_SHFT) & \ + CE_URE_SPLS_MASK) #define CE_URE_PSN1_SHFT 19 #define CE_URE_PSN1_MASK (0x1FFFULL << CE_URE_PSN1_SHFT) #define CE_URE_PSN2_SHFT 32 diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h index 0625387..cd4233d 100644 --- a/include/asm-ia64/system.h +++ b/include/asm-ia64/system.h @@ -244,6 +244,13 @@ extern void ia64_load_extra (struct task __ia64_save_fpu((prev)->thread.fph); \ } \ __switch_to(prev, next, last); \ + /* "next" in old context is "current" in new context */ \ + if (unlikely((current->thread.flags & IA64_THREAD_MIGRATION) && \ + (task_cpu(current) != \ + task_thread_info(current)->last_cpu))) { \ + platform_migrate(current); \ + task_thread_info(current)->last_cpu = task_cpu(current); \ + } \ } while (0) #else # define switch_to(prev,next,last) __switch_to(prev, next, last) diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h index 1d6518f..56394a2 100644 --- a/include/asm-ia64/thread_info.h +++ b/include/asm-ia64/thread_info.h @@ -26,16 +26,10 @@ struct thread_info { struct exec_domain *exec_domain;/* execution domain */ __u32 flags; /* thread_info flags (see TIF_*) */ __u32 cpu; /* current CPU */ + __u32 last_cpu; /* Last CPU thread ran on */ mm_segment_t addr_limit; /* user-level address space limit */ int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ struct restart_block restart_block; - struct { - int signo; - int code; - void __user *addr; - unsigned long start_time; - pid_t pid; - } sigdelayed; /* Saved information for TIF_SIGDELAYED */ }; #define THREAD_SIZE KERNEL_STACK_SIZE @@ -89,7 +83,6 @@ struct thread_info { #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ #define TIF_SYSCALL_TRACE 3 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ -#define TIF_SIGDELAYED 5 /* signal delayed from MCA/INIT/NMI/PMI context */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 17 #define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */ @@ -101,13 +94,12 @@ struct thread_info { #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_SIGDELAYED (1 << TIF_SIGDELAYED) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_MCA_INIT (1 << TIF_MCA_INIT) #define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED) /* "work to do on user-return" bits */ -#define TIF_ALLWORK_MASK (_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SIGDELAYED) +#define TIF_ALLWORK_MASK (_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */ #define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT))