GIT aa92b7e0584efc587e9693c2d1a9444416d692b9 git+ssh://master.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6.git#test commit 40eee8ab3441cc4d4fcb7cfbdbd339e7b583f846 Author: Khalid Aziz Date: Thu Oct 12 13:19:45 2006 -0600 [IA64] Fix compile failure with CONFIG_KEXEC but not CONFIG_CRASH_DUMP 2.6.18 kernel patched with kexec/kdump patch from Tony's test tree fails to compile if CONFIG_KEXEC is turned on but CONFIG_CRASH_DUMP is not. Following patch fixes this. Signed-off-by: Khalid Aziz Acked-by: Simon Horman Signed-off-by: Tony Luck commit e1983ff22209e0de0595abe9b61d30078ef3ccb9 Author: Michal Piotrowski Date: Tue Oct 10 14:25:29 2006 -0700 [IA64] kill #include "linux/config.h" config.h is obsolete. This patch removes all #include "linux/config.h". (specifically the one in arch/ia64/kernel/relocate_kernel.S) Signed-off-by: Michal Piotrowski Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Tony Luck commit dc52454505ea87f0e6d4dab534acdd29ef33e7df Author: Zou Nan hai Date: Mon Oct 2 13:51:30 2006 -0700 [IA64] Kexec/Kdump for Itanium Distillation of work largely by Zou Nan hai (who gets Author credit because 'git' only seems to support one author) but also inspired by Khalid Aziz, Simon Horman, and a few others whom I'm too lazy to look up in the mailing list archives, but whose support I appreciate. Signed-off-by: Tony Luck arch/ia64/Kconfig | 23 ++ arch/ia64/kernel/Makefile | 1 arch/ia64/kernel/crash.c | 238 ++++++++++++++++++ arch/ia64/kernel/efi.c | 65 +++++ arch/ia64/kernel/entry.S | 2 arch/ia64/kernel/iosapic.c | 22 ++ arch/ia64/kernel/machine_kexec.c | 138 ++++++++++ arch/ia64/kernel/relocate_kernel.S | 488 ++++++++++++++++++++++++++++++++++++ arch/ia64/kernel/setup.c | 37 +++ arch/ia64/kernel/smp.c | 56 ++++ include/asm-ia64/kexec.h | 53 ++++ include/asm-ia64/meminit.h | 3 include/asm-ia64/smp.h | 3 include/linux/kexec.h | 4 include/linux/sysctl.h | 1 kernel/kexec.c | 1 16 files changed, 1129 insertions(+), 6 deletions(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 70f7eb9..56c0a0f 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -433,6 +433,29 @@ config IA64_ESI source "drivers/sn/Kconfig" +config KEXEC + bool "kexec system call (EXPERIMENTAL)" + depends on EXPERIMENTAL && !IA64_HP_SIM + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is indepedent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similiarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + +config CRASH_DUMP + bool "kernel crash dumps (EXPERIMENTAL)" + depends on EXPERIMENTAL && IA64_MCA_RECOVERY && !IA64_HP_SIM + help + Generate crash dump after being started by kexec. + source "drivers/firmware/Kconfig" source "fs/Kconfig.binfmt" diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index cfa099b..8ae384e 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_PCI_MSI) += msi_ia64.o diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c new file mode 100644 index 0000000..c48519e --- /dev/null +++ b/arch/ia64/kernel/crash.c @@ -0,0 +1,238 @@ +/* + * arch/ia64/kernel/crash.c + * + * Architecture specific (ia64) functions for kexec based crash dumps. + * + * Created by: Khalid Aziz + * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. + * Copyright (C) 2005 Intel Corp Zou Nan hai + * + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +int kdump_status[NR_CPUS]; +atomic_t kdump_cpu_freezed; +int kdump_on_init = 1; + +ssize_t +copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + void *vaddr; + + if (!csize) + return 0; + vaddr = __va(pfn<n_namesz = strlen(name) + 1; + note->n_descsz = data_len; + note->n_type = type; + buf += (sizeof(*note) + 3)/4; + memcpy(buf, name, note->n_namesz); + buf += (note->n_namesz + 3)/4; + memcpy(buf, data, data_len); + buf += (data_len + 3)/4; + return buf; +} + +static void +final_note(void *buf) +{ + memset(buf, 0, sizeof(struct elf_note)); +} + +extern void ia64_dump_cpu_regs(void *); + +static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus); + +void +crash_save_this_cpu() +{ + void *buf; + unsigned long cfm, sof, sol; + + int cpu = smp_processor_id(); + struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu); + + elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg); + memset(prstatus, 0, sizeof(*prstatus)); + prstatus->pr_pid = current->pid; + + ia64_dump_cpu_regs(dst); + cfm = dst[43]; + sol = (cfm >> 7) & 0x7f; + sof = cfm & 0x7f; + dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46], + sof - sol); + + buf = (u64 *) per_cpu_ptr(crash_notes, cpu); + if (!buf) + return; + buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus, + sizeof(*prstatus)); + final_note(buf); +} + +static int +kdump_wait_cpu_freeze(void) +{ + int cpu_num = num_online_cpus() - 1; + int timeout = 1000; + while(timeout-- > 0) { + if (atomic_read(&kdump_cpu_freezed) == cpu_num) + return 0; + udelay(1000); + } + return 1; +} + +static int kdump_sending_init; + +void +machine_crash_shutdown(struct pt_regs *pt) +{ + /* This function is only called after the system + * has paniced or is otherwise in a critical state. + * The minimum amount of code to allow a kexec'd kernel + * to run successfully needs to happen here. + * + * In practice this means shooting down the other cpus in + * an SMP system. + */ + kexec_disable_iosapic(); +#ifdef CONFIG_SMP + kdump_smp_send_stop(); + if (kdump_wait_cpu_freeze() && kdump_on_init) { + /* not all cpu response to IPI, send INIT to freeze them */ + kdump_sending_init = 1; + mb(); + kdump_smp_send_init(); + } +#endif +} + +static void +machine_kdump_on_init(void) +{ + local_irq_disable(); + kexec_disable_iosapic(); + machine_kexec(ia64_kimage); +} + +void +kdump_cpu_freeze(struct unw_frame_info *info, void *arg) +{ + local_irq_disable(); + crash_save_this_cpu(); + current->thread.ksp = (__u64)info->sw - 16; + atomic_inc(&kdump_cpu_freezed); + kdump_status[smp_processor_id()] = 1; + mb(); + for (;;) + cpu_relax(); +} + +static int +kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) +{ + struct ia64_mca_notify_die *nd; + struct die_args *args = data; + + if (!kdump_on_init) + return NOTIFY_DONE; + + if (val != DIE_INIT_MONARCH_ENTER && val != DIE_INIT_SLAVE_ENTER) + return NOTIFY_DONE; + + nd = (struct ia64_mca_notify_die *)args->err; + /* Reason code 1 means machine check rendezous*/ + if (nd->sos->rv_rc == 1) + return NOTIFY_DONE; + + if (kdump_sending_init) + unw_init_running(kdump_cpu_freeze, NULL); + + switch (val) { + case DIE_INIT_MONARCH_ENTER: + machine_kdump_on_init(); + break; + case DIE_INIT_SLAVE_ENTER: + unw_init_running(kdump_cpu_freeze, NULL); + break; + } + return NOTIFY_DONE; +} + +#ifdef CONFIG_SYSCTL +static ctl_table kdump_on_init_table[] = { + { + .ctl_name = KERN_KDUMP_ON_INIT, + .procname = "kdump_on_init", + .data = &kdump_on_init, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0 } +}; + +static ctl_table sys_table[] = { + { + .ctl_name = CTL_KERN, + .procname = "kernel", + .mode = 0555, + .child = kdump_on_init_table, + }, + { .ctl_name = 0 } +}; +#endif + +static int +machine_crash_setup(void) +{ + char *from = strstr(saved_command_line, "elfcorehdr="); + static struct notifier_block kdump_init_notifier_nb = { + .notifier_call = kdump_init_notifier, + }; + int ret; +#ifdef CONFIG_PROC_VMCORE + if (from) + elfcorehdr_addr = memparse(from+11, &from); +#endif +#ifdef CONFIG_CRASH_DUMP + saved_max_pfn = (unsigned long)-1; +#endif + if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0) + return ret; +#ifdef CONFIG_SYSCTL + register_sysctl_table(sys_table, 0); +#endif + return 0; +} + +__initcall(machine_crash_setup); + diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index bb8770a..a01ba73 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -26,6 +26,7 @@ #include #include #include #include +#include #include #include @@ -41,7 +42,7 @@ extern efi_status_t efi_call_phys (void struct efi efi; EXPORT_SYMBOL(efi); static efi_runtime_services_t *runtime; -static unsigned long mem_limit = ~0UL, max_addr = ~0UL; +static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL; #define efi_call_virt(f, args...) (*(f))(args) @@ -421,6 +422,8 @@ efi_init (void) mem_limit = memparse(cp + 4, &cp); } else if (memcmp(cp, "max_addr=", 9) == 0) { max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); + } else if (memcmp(cp, "min_addr=", 9) == 0) { + min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); } else { while (*cp != ' ' && *cp) ++cp; @@ -428,6 +431,8 @@ efi_init (void) ++cp; } } + if (min_addr != 0UL) + printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20); if (max_addr != ~0UL) printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20); @@ -894,7 +899,8 @@ find_memmap_space (void) as = max(contig_low, md->phys_addr); ae = min(contig_high, efi_md_end(md)); - /* keep within max_addr= command line arg */ + /* keep within max_addr= and min_addr= command line arg */ + as = max(as, min_addr); ae = min(ae, max_addr); if (ae <= as) continue; @@ -1004,7 +1010,8 @@ efi_memmap_init(unsigned long *s, unsign } else ae = efi_md_end(md); - /* keep within max_addr= command line arg */ + /* keep within max_addr= and min_addr= command line arg */ + as = max(as, min_addr); ae = min(ae, max_addr); if (ae <= as) continue; @@ -1116,6 +1123,58 @@ efi_initialize_iomem_resources(struct re */ insert_resource(res, code_resource); insert_resource(res, data_resource); +#ifdef CONFIG_KEXEC + insert_resource(res, &efi_memmap_res); + insert_resource(res, &boot_param_res); + if (crashk_res.end > crashk_res.start) + insert_resource(res, &crashk_res); +#endif } } } + +#ifdef CONFIG_KEXEC +/* find a block of memory aligned to 64M exclude reserved regions + * rsvd_regions are sorted + */ +unsigned long +kdump_find_rsvd_region(unsigned long size, struct rsvd_region *r, int n) +{ + int i; + u64 start, end; + u64 alignment = 1UL << _PAGE_SIZE_64M; + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (!efi_wb(md)) + continue; + start = ALIGN(md->phys_addr, alignment); + end = efi_md_end(md); + for (i = 0; i < n; i++) { + if (__pa(r[i].start) >= start && __pa(r[i].end) < end) { + if (__pa(r[i].start) > start + size) + return start; + start = ALIGN(__pa(r[i].end), alignment); + if (i < n - 1 + && __pa(r[i + 1].start) < start + size) + continue; + else + break; + } + } + if (end > start + size) + return start; + } + + printk(KERN_WARNING + "Cannot reserve 0x%lx byte of memory for crashdump\n", size); + return ~0UL; +} +#endif diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 3390b7c..15234ed 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1575,7 +1575,7 @@ sys_call_table: data8 sys_mq_timedreceive // 1265 data8 sys_mq_notify data8 sys_mq_getsetattr - data8 sys_ni_syscall // reserved for kexec_load + data8 sys_kexec_load data8 sys_ni_syscall // reserved for vserver data8 sys_waitid // 1270 data8 sys_add_key diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 9bf15fe..00cf506 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -288,6 +288,28 @@ nop (unsigned int irq) /* do nothing... */ } + +#ifdef CONFIG_KEXEC +void +kexec_disable_iosapic(void) +{ + struct iosapic_intr_info *info; + struct iosapic_rte_info *rte; + ia64_vector vec = 0; + + for (info = iosapic_intr_info; info < + iosapic_intr_info + IA64_NUM_VECTORS; ++info, ++vec) { + list_for_each_entry(rte, &info->rtes, + rte_list) { + iosapic_write(rte->addr, + IOSAPIC_RTE_LOW(rte->rte_index), + IOSAPIC_MASK); + iosapic_eoi(rte->addr, vec); + } + } +} +#endif + static void mask_irq (unsigned int irq) { diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c new file mode 100644 index 0000000..2bbdabe --- /dev/null +++ b/arch/ia64/kernel/machine_kexec.c @@ -0,0 +1,138 @@ +/* + * arch/ia64/kernel/machine_kexec.c + * + * Handle transition of Linux booting another kernel + * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P. + * Copyright (C) 2005 Khalid Aziz + * Copyright (C) 2006 Intel Corp, Zou Nan hai + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef void (*relocate_new_kernel_t)(unsigned long, unsigned long, + struct ia64_boot_param *, unsigned long); + +struct kimage *ia64_kimage; + +struct resource efi_memmap_res = { + .name = "EFI Memory Map", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +struct resource boot_param_res = { + .name = "Boot parameter", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + + +/* + * Do what every setup is needed on image and the + * reboot code buffer to allow us to avoid allocations + * later. + */ +int machine_kexec_prepare(struct kimage *image) +{ + void *control_code_buffer; + const unsigned long *func; + + func = (unsigned long *)&relocate_new_kernel; + /* Pre-load control code buffer to minimize work in kexec path */ + control_code_buffer = page_address(image->control_code_page); + memcpy((void *)control_code_buffer, (const void *)func[0], + relocate_new_kernel_size); + flush_icache_range((unsigned long)control_code_buffer, + (unsigned long)control_code_buffer + relocate_new_kernel_size); + ia64_kimage = image; + + return 0; +} + +void machine_kexec_cleanup(struct kimage *image) +{ +} + +void machine_shutdown(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + { + int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + cpu_down(cpu); + } + } +#elif defined(CONFIG_SMP) + smp_call_function(kexec_stop_this_cpu, (void *)ia64_kimage->start, 0, 0); +#endif + kexec_disable_iosapic(); +} + +/* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. + */ +extern void *efi_get_pal_addr(void); +static void ia64_machine_kexec(struct unw_frame_info *info, void *arg) +{ + struct kimage *image = arg; + relocate_new_kernel_t rnk; + void *pal_addr = efi_get_pal_addr(); + unsigned long code_addr = (unsigned long)page_address(image->control_code_page); + unsigned long vector; + int ii; + + if (image->type == KEXEC_TYPE_CRASH) { + crash_save_this_cpu(); + current->thread.ksp = (__u64)info->sw - 16; + } + + /* Interrupts aren't acceptable while we reboot */ + local_irq_disable(); + + /* Mask CMC and Performance Monitor interrupts */ + ia64_setreg(_IA64_REG_CR_PMV, 1 << 16); + ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16); + + /* Mask ITV and Local Redirect Registers */ + ia64_set_itv(1 << 16); + ia64_set_lrr0(1 << 16); + ia64_set_lrr1(1 << 16); + + /* terminate possible nested in-service interrupts */ + for (ii = 0; ii < 16; ii++) + ia64_eoi(); + + /* unmask TPR and clear any pending interrupts */ + ia64_setreg(_IA64_REG_CR_TPR, 0); + ia64_srlz_d(); + vector = ia64_get_ivr(); + while (vector != IA64_SPURIOUS_INT_VECTOR) { + ia64_eoi(); + vector = ia64_get_ivr(); + } + rnk = (relocate_new_kernel_t)&code_addr; + (*rnk)(image->head, image->start, ia64_boot_param, + GRANULEROUNDDOWN((unsigned long) pal_addr)); + BUG(); +} + +void machine_kexec(struct kimage *image) +{ + unw_init_running(ia64_machine_kexec, image); + for(;;); +} diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S new file mode 100644 index 0000000..bd6ab16 --- /dev/null +++ b/arch/ia64/kernel/relocate_kernel.S @@ -0,0 +1,488 @@ +/* + * arch/ia64/kernel/relocate_kernel.S + * + * Relocate kexec'able kernel and start it + * + * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. + * Copyright (C) 2005 Khalid Aziz + * Copyright (C) 2005 Intel Corp, Zou Nan hai + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ +#include +#include +#include +#include +#include + +/* + * Must be relocatable PIC code callable as a C function +*/ +GLOBAL_ENTRY(relocate_new_kernel) + .prologue + alloc r31=ar.pfs,4,0,0,0 + .body +.reloc_entry: +{ + rsm psr.i| psr.ic + mov r2=ip +} + ;; +{ + flushrs // must be first insn in group + srlz.i +} + ;; + dep r2=0,r2,61,3 // to physical address + ;; + //first switch to physical mode + add r3=1f-.reloc_entry, r2 + movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC + mov ar.rsc=0 // put RSE in enforced lazy mode + ;; + add sp=(memory_stack_end - 16 - .reloc_entry),r2 + add r8=(register_stack - .reloc_entry),r2 + ;; + mov r18=ar.rnat + mov ar.bspstore=r8 + ;; + mov cr.ipsr=r16 + mov cr.iip=r3 + mov cr.ifs=r0 + srlz.i + ;; + mov ar.rnat=r18 + rfi + ;; +1: + //physical mode code begin + mov b6=in1 + dep r28=0,in2,61,3 //to physical address + + // purge all TC entries +#define O(member) IA64_CPUINFO_##member##_OFFSET + GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2 + ;; + addl r17=O(PTCE_STRIDE),r2 + addl r2=O(PTCE_BASE),r2 + ;; + ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base + ld4 r19=[r2],4 // r19=ptce_count[0] + ld4 r21=[r17],4 // r21=ptce_stride[0] + ;; + ld4 r20=[r2] // r20=ptce_count[1] + ld4 r22=[r17] // r22=ptce_stride[1] + mov r24=r0 + ;; + adds r20=-1,r20 + ;; +#undef O +2: + cmp.ltu p6,p7=r24,r19 +(p7) br.cond.dpnt.few 4f + mov ar.lc=r20 +3: + ptc.e r18 + ;; + add r18=r22,r18 + br.cloop.sptk.few 3b + ;; + add r18=r21,r18 + add r24=1,r24 + ;; + br.sptk.few 2b +4: + srlz.i + ;; + //purge TR entry for kernel text and data + movl r16=KERNEL_START + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + ;; + ptr.i r16, r18 + ptr.d r16, r18 + ;; + srlz.i + ;; + + // purge TR entry for percpu data + movl r16=PERCPU_ADDR + mov r18=PERCPU_PAGE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.d + ;; + + // purge TR entry for pal code + mov r16=in3 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.i r16,r18 + ;; + srlz.i + ;; + + // purge TR entry for stack + mov r16=IA64_KR(CURRENT_STACK) + ;; + shl r16=r16,IA64_GRANULE_SHIFT + movl r19=PAGE_OFFSET + ;; + add r16=r19,r16 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.i + ;; + + //copy segments + movl r16=PAGE_MASK + mov r30=in0 // in0 is page_list + br.sptk.few .dest_page + ;; +.loop: + ld8 r30=[in0], 8;; +.dest_page: + tbit.z p0, p6=r30, 0;; // 0x1 dest page +(p6) and r17=r30, r16 +(p6) br.cond.sptk.few .loop;; + + tbit.z p0, p6=r30, 1;; // 0x2 indirect page +(p6) and in0=r30, r16 +(p6) br.cond.sptk.few .loop;; + + tbit.z p0, p6=r30, 2;; // 0x4 end flag +(p6) br.cond.sptk.few .end_loop;; + + tbit.z p6, p0=r30, 3;; // 0x8 source page +(p6) br.cond.sptk.few .loop + + and r18=r30, r16 + + // simple copy page, may optimize later + movl r14=PAGE_SIZE/8 - 1;; + mov ar.lc=r14;; +1: + ld8 r14=[r18], 8;; + st8 [r17]=r14, 8;; + fc.i r17 + br.ctop.sptk.few 1b + br.sptk.few .loop + ;; + +.end_loop: + sync.i // for fc.i + ;; + srlz.i + ;; + srlz.d + ;; + br.call.sptk.many b0=b6;; + +.align 32 +memory_stack: + .fill 8192, 1, 0 +memory_stack_end: +register_stack: + .fill 8192, 1, 0 +register_stack_end: +relocate_new_kernel_end: +END(relocate_new_kernel) + +GLOBAL_ENTRY(kexec_fake_sal_rendez) + .prologue + alloc r31=ar.pfs,3,0,0,0 + .body +.rendez_entry: + rsm psr.i | psr.ic + mov r25=ip + ;; + { + flushrs + srlz.i + } + ;; + /* See where I am running, and compute gp */ + { + mov ar.rsc = 0 /* Put RSE in enforce lacy, LE mode */ + mov gp = ip /* gp == relocate_new_kernel */ + } + + movl r8=0x00000100000000 + ;; + mov cr.iva=r8 + /* Transition from virtual to physical mode */ + srlz.i + ;; + add r17=5f-.rendez_entry, r25 + movl r16=(IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_IC | IA64_PSR_MFL) + ;; + tpa r17=r17 + mov cr.ipsr=r16 + ;; + mov cr.iip=r17 + mov cr.ifs=r0 + ;; + rfi + ;; +5: + mov b6=in0 /* _start addr */ + mov r8=in1 /* ap_wakeup_vector */ + mov r26=in2 /* PAL addr */ + ;; + /* Purge kernel TRs */ + movl r16=KERNEL_START + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + ;; + ptr.i r16,r18 + ptr.d r16,r18 + ;; + srlz.i + ;; + srlz.d + ;; + /* Purge percpu TR */ + movl r16=PERCPU_ADDR + mov r18=PERCPU_PAGE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.d + ;; + /* Purge PAL TR */ + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.i r26,r18 + ;; + srlz.i + ;; + /* Purge stack TR */ + mov r16=IA64_KR(CURRENT_STACK) + ;; + shl r16=r16,IA64_GRANULE_SHIFT + movl r19=PAGE_OFFSET + ;; + add r16=r19,r16 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.i + ;; + + /* Ensure we can read and clear external interrupts */ + mov cr.tpr=r0 + srlz.d + + shr.u r9=r8,6 /* which irr */ + ;; + and r8=63,r8 /* bit offset into irr */ + ;; + mov r10=1;; + ;; + shl r10=r10,r8 /* bit mask off irr we want */ + cmp.eq p6,p0=0,r9 + ;; +(p6) br.cond.sptk.few check_irr0 + cmp.eq p7,p0=1,r9 + ;; +(p7) br.cond.sptk.few check_irr1 + cmp.eq p8,p0=2,r9 + ;; +(p8) br.cond.sptk.few check_irr2 + cmp.eq p9,p0=3,r9 + ;; +(p9) br.cond.sptk.few check_irr3 + +check_irr0: + mov r8=cr.irr0 + ;; + and r8=r8,r10 + ;; + cmp.eq p6,p0=0,r8 +(p6) br.cond.sptk.few check_irr0 + br.few call_start + +check_irr1: + mov r8=cr.irr1 + ;; + and r8=r8,r10 + ;; + cmp.eq p6,p0=0,r8 +(p6) br.cond.sptk.few check_irr1 + br.few call_start + +check_irr2: + mov r8=cr.irr2 + ;; + and r8=r8,r10 + ;; + cmp.eq p6,p0=0,r8 +(p6) br.cond.sptk.few check_irr2 + br.few call_start + +check_irr3: + mov r8=cr.irr3 + ;; + and r8=r8,r10 + ;; + cmp.eq p6,p0=0,r8 +(p6) br.cond.sptk.few check_irr3 + br.few call_start + +call_start: + mov cr.eoi=r0 + ;; + srlz.d + ;; + mov r8=cr.ivr + ;; + srlz.d + ;; + cmp.eq p0,p6=15,r8 +(p6) br.cond.sptk.few call_start + br.sptk.few b6 +kexec_fake_sal_rendez_end: +END(kexec_fake_sal_rendez) + + .global relocate_new_kernel_size +relocate_new_kernel_size: + data8 kexec_fake_sal_rendez_end - relocate_new_kernel + +GLOBAL_ENTRY(ia64_dump_cpu_regs) + .prologue + alloc loc0=ar.pfs,1,2,0,0 + .body + mov ar.rsc=0 // put RSE in enforced lazy mode + add loc1=4*8, in0 // save r4 and r5 first + ;; +{ + flushrs // flush dirty regs to backing store + srlz.i +} + st8 [loc1]=r4, 8 + ;; + st8 [loc1]=r5, 8 + ;; + add loc1=32*8, in0 + mov r4=ar.rnat + ;; + st8 [in0]=r0, 8 // r0 + st8 [loc1]=r4, 8 // rnat + mov r5=pr + ;; + st8 [in0]=r1, 8 // r1 + st8 [loc1]=r5, 8 // pr + mov r4=b0 + ;; + st8 [in0]=r2, 8 // r2 + st8 [loc1]=r4, 8 // b0 + mov r5=b1; + ;; + st8 [in0]=r3, 24 // r3 + st8 [loc1]=r5, 8 // b1 + mov r4=b2 + ;; + st8 [in0]=r6, 8 // r6 + st8 [loc1]=r4, 8 // b2 + mov r5=b3 + ;; + st8 [in0]=r7, 8 // r7 + st8 [loc1]=r5, 8 // b3 + mov r4=b4 + ;; + st8 [in0]=r8, 8 // r8 + st8 [loc1]=r4, 8 // b4 + mov r5=b5 + ;; + st8 [in0]=r9, 8 // r9 + st8 [loc1]=r5, 8 // b5 + mov r4=b6 + ;; + st8 [in0]=r10, 8 // r10 + st8 [loc1]=r5, 8 // b6 + mov r5=b7 + ;; + st8 [in0]=r11, 8 // r11 + st8 [loc1]=r5, 8 // b7 + mov r4=b0 + ;; + st8 [in0]=r12, 8 // r12 + st8 [loc1]=r4, 8 // ip + mov r5=loc0 + ;; + st8 [in0]=r13, 8 // r13 + extr.u r5=r5, 0, 38 // ar.pfs.pfm + mov r4=r0 // user mask + ;; + st8 [in0]=r14, 8 // r14 + st8 [loc1]=r5, 8 // cfm + ;; + st8 [in0]=r15, 8 // r15 + st8 [loc1]=r4, 8 // user mask + mov r5=ar.rsc + ;; + st8 [in0]=r16, 8 // r16 + st8 [loc1]=r5, 8 // ar.rsc + mov r4=ar.bsp + ;; + st8 [in0]=r17, 8 // r17 + st8 [loc1]=r4, 8 // ar.bsp + mov r5=ar.bspstore + ;; + st8 [in0]=r18, 8 // r18 + st8 [loc1]=r5, 8 // ar.bspstore + mov r4=ar.rnat + ;; + st8 [in0]=r19, 8 // r19 + st8 [loc1]=r4, 8 // ar.rnat + mov r5=ar.ccv + ;; + st8 [in0]=r20, 8 // r20 + st8 [loc1]=r5, 8 // ar.ccv + mov r4=ar.unat + ;; + st8 [in0]=r21, 8 // r21 + st8 [loc1]=r4, 8 // ar.unat + mov r5 = ar.fpsr + ;; + st8 [in0]=r22, 8 // r22 + st8 [loc1]=r5, 8 // ar.fpsr + mov r4 = ar.unat + ;; + st8 [in0]=r23, 8 // r23 + st8 [loc1]=r4, 8 // unat + mov r5 = ar.fpsr + ;; + st8 [in0]=r24, 8 // r24 + st8 [loc1]=r5, 8 // fpsr + mov r4 = ar.pfs + ;; + st8 [in0]=r25, 8 // r25 + st8 [loc1]=r4, 8 // ar.pfs + mov r5 = ar.lc + ;; + st8 [in0]=r26, 8 // r26 + st8 [loc1]=r5, 8 // ar.lc + mov r4 = ar.ec + ;; + st8 [in0]=r27, 8 // r27 + st8 [loc1]=r4, 8 // ar.ec + mov r5 = ar.csd + ;; + st8 [in0]=r28, 8 // r28 + st8 [loc1]=r5, 8 // ar.csd + mov r4 = ar.ssd + ;; + st8 [in0]=r29, 8 // r29 + st8 [loc1]=r4, 8 // ar.ssd + ;; + st8 [in0]=r30, 8 // r30 + ;; + st8 [in0]=r31, 8 // r31 + mov ar.pfs=loc0 + ;; + br.ret.sptk.many rp +END(ia64_dump_cpu_regs) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index c4caa80..28751be 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -43,6 +43,8 @@ #include #include #include #include +#include +#include #include #include @@ -252,6 +254,41 @@ #endif efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end); n++; +#ifdef CONFIG_KEXEC + /* crashkernel=size@offset specifies the size to reserve for a crash + * kernel.(offset is ingored for keep compatibility with other archs) + * By reserving this memory we guarantee that linux never set's it + * up as a DMA target.Useful for holding code to do something + * appropriate after a kernel panic. + */ + { + char *from = strstr(saved_command_line, "crashkernel="); + unsigned long base, size; + if (from) { + size = memparse(from + 12, &from); + if (size) { + sort_regions(rsvd_region, n); + base = kdump_find_rsvd_region(size, + rsvd_region, n); + if (base != ~0UL) { + rsvd_region[n].start = + (unsigned long)__va(base); + rsvd_region[n].end = + (unsigned long)__va(base + size); + n++; + crashk_res.start = base; + crashk_res.end = base + size - 1; + } + } + } + efi_memmap_res.start = ia64_boot_param->efi_memmap; + efi_memmap_res.end = efi_memmap_res.start + + ia64_boot_param->efi_memmap_size; + boot_param_res.start = __pa(ia64_boot_param); + boot_param_res.end = boot_param_res.start + + sizeof(*ia64_boot_param); + } +#endif /* end of memory marker */ rsvd_region[n].start = ~0UL; rsvd_region[n].end = ~0UL; diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 657ac99..71ed6b5 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -30,6 +30,7 @@ #include #include #include #include +#include #include #include @@ -66,6 +67,7 @@ static volatile struct call_data_struct #define IPI_CALL_FUNC 0 #define IPI_CPU_STOP 1 +#define IPI_KDUMP_CPU_STOP 3 /* This needs to be cacheline aligned because it is written to by *other* CPUs. */ static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned; @@ -84,6 +86,34 @@ unlock_ipi_calllock(void) spin_unlock_irq(&call_lock); } +#ifdef CONFIG_KEXEC +/* + * Stop the CPU and put it in fake SAL rendezvous. This allows CPU to wake + * up with IPI from boot processor + */ +void +kexec_stop_this_cpu (void *func) +{ + unsigned long pta, impl_va_bits, pal_base; + + /* + * Remove this CPU by putting it into fake SAL rendezvous + */ + cpu_clear(smp_processor_id(), cpu_online_map); + max_xtp(); + ia64_eoi(); + + /* Disable VHPT */ + impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61))); + pta = POW2(61) - POW2(vmlpt_bits); + ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | 0); + + local_irq_disable(); + pal_base = __get_cpu_var(ia64_mca_pal_base); + kexec_fake_sal_rendez(func, ap_wakeup_vector, pal_base); +} +#endif + static void stop_this_cpu (void) { @@ -155,7 +185,11 @@ handle_IPI (int irq, void *dev_id, struc case IPI_CPU_STOP: stop_this_cpu(); break; - +#ifdef CONFIG_CRASH_DUMP + case IPI_KDUMP_CPU_STOP: + unw_init_running(kdump_cpu_freeze, NULL); + break; +#endif default: printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which); break; @@ -213,6 +247,26 @@ send_IPI_self (int op) send_IPI_single(smp_processor_id(), op); } +#ifdef CONFIG_KEXEC +void +kdump_smp_send_stop(void) +{ + send_IPI_allbutself(IPI_KDUMP_CPU_STOP); +} + +void +kdump_smp_send_init(void) +{ + unsigned int cpu, self_cpu; + self_cpu = smp_processor_id(); + for_each_online_cpu(cpu) { + if (cpu != self_cpu) { + if(kdump_status[cpu] == 0) + platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0); + } + } +} +#endif /* * Called with preeemption disabled. */ diff --git a/include/asm-ia64/kexec.h b/include/asm-ia64/kexec.h new file mode 100644 index 0000000..7eb8ca2 --- /dev/null +++ b/include/asm-ia64/kexec.h @@ -0,0 +1,53 @@ +#ifndef _ASM_IA64_KEXEC_H +#define _ASM_IA64_KEXEC_H + + +/* Maximum physical address we can use pages from */ +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) +/* Maximum address we can reach in physical address mode */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) +/* Maximum address we can use for the control code buffer */ +#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE + +#define KEXEC_CONTROL_CODE_SIZE (8192 + 8192 + 4096) + +/* The native architecture */ +#define KEXEC_ARCH KEXEC_ARCH_IA_64 + +#define MAX_NOTE_BYTES 1024 + +#define pte_bits 3 +#define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits) +#define POW2(n) (1ULL << (n)) + +#define kexec_flush_icache_page(page) do { \ + unsigned long page_addr = (unsigned long)page_address(page); \ + flush_icache_range(page_addr, page_addr + PAGE_SIZE); \ + } while(0) + +extern struct kimage *ia64_kimage; +DECLARE_PER_CPU(u64, ia64_mca_pal_base); +const extern unsigned int relocate_new_kernel_size; +volatile extern long kexec_rendez; +extern void relocate_new_kernel(unsigned long, unsigned long, + struct ia64_boot_param *, unsigned long); +extern void kexec_fake_sal_rendez(void *start, unsigned long wake_up, + unsigned long pal_base); +static inline void +crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) +{ +} +extern struct resource efi_memmap_res; +extern struct resource boot_param_res; +extern void kdump_smp_send_stop(void); +extern void kdump_smp_send_init(void); +extern void kexec_disable_iosapic(void); +extern void crash_save_this_cpu(void); +struct rsvd_region; +extern unsigned long kdump_find_rsvd_region(unsigned long size, + struct rsvd_region *rsvd_regions, int n); +extern void kdump_cpu_freeze(struct unw_frame_info *info, void *arg); +extern int kdump_status[]; +extern atomic_t kdump_cpu_freezed; + +#endif /* _ASM_IA64_KEXEC_H */ diff --git a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h index c3b1f86..c8df759 100644 --- a/include/asm-ia64/meminit.h +++ b/include/asm-ia64/meminit.h @@ -15,11 +15,12 @@ #define meminit_h * - initrd (optional) * - command line string * - kernel code & data + * - crash dumping code reserved region * - Kernel memory map built from EFI memory map * * More could be added if necessary */ -#define IA64_MAX_RSVD_REGIONS 6 +#define IA64_MAX_RSVD_REGIONS 7 struct rsvd_region { unsigned long start; /* virtual address of beginning of element */ diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h index 60fd4ae..8beb83b 100644 --- a/include/asm-ia64/smp.h +++ b/include/asm-ia64/smp.h @@ -127,6 +127,9 @@ extern void lock_ipi_calllock(void); extern void unlock_ipi_calllock(void); extern void identify_siblings (struct cpuinfo_ia64 *); extern int is_multithreading_enabled(void); +#ifdef CONFIG_KEXEC +extern void kexec_stop_this_cpu(void *); +#endif #else diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 6427949..c790e08 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -108,6 +108,10 @@ int kexec_should_crash(struct task_struc extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; +#ifndef kexec_flush_icache_page +#define kexec_flush_icache_page(page) +#endif + #define KEXEC_ON_CRASH 0x00000001 #define KEXEC_ARCH_MASK 0xffff0000 diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 1b24bd4..1749cd7 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -152,6 +152,7 @@ enum KERN_MAX_LOCK_DEPTH=74, KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ + KERN_KDUMP_ON_INIT=77, /* int: ia64 kdump with INIT */ }; diff --git a/kernel/kexec.c b/kernel/kexec.c index fcdd5d2..05aada2 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -851,6 +851,7 @@ static int kimage_load_crash_segment(str memset(ptr + uchunk, 0, mchunk - uchunk); } result = copy_from_user(ptr, buf, uchunk); + kexec_flush_icache_page(page); kunmap(page); if (result) { result = (result < 0) ? result : -EIO;