===== Makefile 1.550 vs edited ===== --- 1.550/Makefile 2004-12-03 12:56:59 -08:00 +++ edited/Makefile 2005-01-04 10:12:53 -08:00 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 10 -EXTRAVERSION =-rc3 +EXTRAVERSION =-rc3-kexec NAME=Woozy Numbat # *DOCUMENTATION* ===== arch/ia64/Kconfig 1.82 vs edited ===== --- 1.82/arch/ia64/Kconfig 2004-09-30 11:49:13 -07:00 +++ edited/arch/ia64/Kconfig 2004-12-15 18:18:36 -08:00 @@ -291,6 +291,23 @@ little bigger and slows down execution a bit, but it is generally a good idea to turn this on. If you're unsure, say Y. +config KEXEC + bool "kexec system call (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is indepedent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similiarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + config IA64_PALINFO tristate "/proc/pal support" help ===== arch/ia64/kernel/Makefile 1.36 vs edited ===== --- 1.36/arch/ia64/kernel/Makefile 2004-10-18 22:26:50 -07:00 +++ edited/arch/ia64/kernel/Makefile 2004-12-15 18:18:58 -08:00 @@ -17,6 +17,7 @@ obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o obj-$(CONFIG_IA64_CYCLONE) += cyclone.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o mca_recovery-y += mca_drv.o mca_drv_asm.o ===== arch/ia64/kernel/efi.c 1.39 vs edited ===== --- 1.39/arch/ia64/kernel/efi.c 2004-10-26 16:43:15 -07:00 +++ edited/arch/ia64/kernel/efi.c 2004-12-15 18:18:36 -08:00 @@ -17,6 +17,12 @@ * * Goutham Rao: * Skip non-WB memory and ignore empty memory ranges. + * + * Rewrote efi_memap_walk() to create a linked list of available + * memory regions instead of editing EFI memory map in place + * - Khalid Aziz + * + * Added initial support for kexec - Khalid Aziz */ #include #include @@ -28,7 +34,6 @@ #include #include -#include #include #include #include @@ -38,26 +43,30 @@ extern efi_status_t efi_call_phys (void *, ...); struct efi efi; +#ifdef CONFIG_KEXEC +unsigned long kexec_reboot = 0; +#endif EXPORT_SYMBOL(efi); static efi_runtime_services_t *runtime; static unsigned long mem_limit = ~0UL, max_addr = ~0UL; +kern_memdesc_t *memdesc_area, *memdesc_end; +kern_memdesc_t *kern_memmap = NULL; +unsigned long efi_total_mem = 0; #define efi_call_virt(f, args...) (*(f))(args) -#define STUB_GET_TIME(prefix, adjust_arg) \ -static efi_status_t \ -prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc) \ -{ \ - struct ia64_fpreg fr[6]; \ - efi_time_cap_t *atc = NULL; \ - efi_status_t ret; \ - \ - if (tc) \ - atc = adjust_arg(tc); \ - ia64_save_scratch_fpregs(fr); \ - ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), adjust_arg(tm), atc); \ - ia64_load_scratch_fpregs(fr); \ - return ret; \ +#define STUB_GET_TIME(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_status_t ret; \ + \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), adjust_arg(tm), \ + adjust_arg(tc)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ } #define STUB_SET_TIME(prefix, adjust_arg) \ @@ -92,14 +101,11 @@ prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm) \ { \ struct ia64_fpreg fr[6]; \ - efi_time_t *atm = NULL; \ efi_status_t ret; \ \ - if (tm) \ - atm = adjust_arg(tm); \ ia64_save_scratch_fpregs(fr); \ ret = efi_call_##prefix((efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time), \ - enabled, atm); \ + enabled, adjust_arg(tm)); \ ia64_load_scratch_fpregs(fr); \ return ret; \ } @@ -110,14 +116,11 @@ unsigned long *data_size, void *data) \ { \ struct ia64_fpreg fr[6]; \ - u32 *aattr = NULL; \ efi_status_t ret; \ \ - if (attr) \ - aattr = adjust_arg(attr); \ ia64_save_scratch_fpregs(fr); \ ret = efi_call_##prefix((efi_get_variable_t *) __va(runtime->get_variable), \ - adjust_arg(name), adjust_arg(vendor), aattr, \ + adjust_arg(name), adjust_arg(vendor), adjust_arg(attr), \ adjust_arg(data_size), adjust_arg(data)); \ ia64_load_scratch_fpregs(fr); \ return ret; \ @@ -173,41 +176,33 @@ unsigned long data_size, efi_char16_t *data) \ { \ struct ia64_fpreg fr[6]; \ - efi_char16_t *adata = NULL; \ - \ - if (data) \ - adata = adjust_arg(data); \ \ ia64_save_scratch_fpregs(fr); \ efi_call_##prefix((efi_reset_system_t *) __va(runtime->reset_system), \ - reset_type, status, data_size, adata); \ + reset_type, status, data_size, adjust_arg(data)); \ /* should not return, but just in case... */ \ ia64_load_scratch_fpregs(fr); \ } -#define phys_ptr(arg) ((__typeof__(arg)) ia64_tpa(arg)) - -STUB_GET_TIME(phys, phys_ptr) -STUB_SET_TIME(phys, phys_ptr) -STUB_GET_WAKEUP_TIME(phys, phys_ptr) -STUB_SET_WAKEUP_TIME(phys, phys_ptr) -STUB_GET_VARIABLE(phys, phys_ptr) -STUB_GET_NEXT_VARIABLE(phys, phys_ptr) -STUB_SET_VARIABLE(phys, phys_ptr) -STUB_GET_NEXT_HIGH_MONO_COUNT(phys, phys_ptr) -STUB_RESET_SYSTEM(phys, phys_ptr) - -#define id(arg) arg - -STUB_GET_TIME(virt, id) -STUB_SET_TIME(virt, id) -STUB_GET_WAKEUP_TIME(virt, id) -STUB_SET_WAKEUP_TIME(virt, id) -STUB_GET_VARIABLE(virt, id) -STUB_GET_NEXT_VARIABLE(virt, id) -STUB_SET_VARIABLE(virt, id) -STUB_GET_NEXT_HIGH_MONO_COUNT(virt, id) -STUB_RESET_SYSTEM(virt, id) +STUB_GET_TIME(phys, __pa) +STUB_SET_TIME(phys, __pa) +STUB_GET_WAKEUP_TIME(phys, __pa) +STUB_SET_WAKEUP_TIME(phys, __pa) +STUB_GET_VARIABLE(phys, __pa) +STUB_GET_NEXT_VARIABLE(phys, __pa) +STUB_SET_VARIABLE(phys, __pa) +STUB_GET_NEXT_HIGH_MONO_COUNT(phys, __pa) +STUB_RESET_SYSTEM(phys, __pa) + +STUB_GET_TIME(virt, ) +STUB_SET_TIME(virt, ) +STUB_GET_WAKEUP_TIME(virt, ) +STUB_SET_WAKEUP_TIME(virt, ) +STUB_GET_VARIABLE(virt, ) +STUB_GET_NEXT_VARIABLE(virt, ) +STUB_SET_VARIABLE(virt, ) +STUB_GET_NEXT_HIGH_MONO_COUNT(virt, ) +STUB_RESET_SYSTEM(virt, ) void efi_gettimeofday (struct timespec *ts) @@ -215,7 +210,7 @@ efi_time_t tm; memset(ts, 0, sizeof(ts)); - if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS) + if ((*efi.get_time)(&tm, 0) != EFI_SUCCESS) return; ts->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second); @@ -223,11 +218,8 @@ } static int -is_available_memory (efi_memory_desc_t *md) +is_kern_available_memory (kern_memdesc_t *md) { - if (!(md->attribute & EFI_MEMORY_WB)) - return 0; - switch (md->type) { case EFI_LOADER_CODE: case EFI_LOADER_DATA: @@ -245,37 +237,46 @@ * is being ignored. */ static void -trim_bottom (efi_memory_desc_t *md, u64 start_addr) +granule_align_bottom (kern_memdesc_t *md, u64 start_addr) { u64 num_skipped_pages; - if (md->phys_addr >= start_addr || !md->num_pages) + if (md->start >= start_addr || !md->num_pages) return; - num_skipped_pages = (start_addr - md->phys_addr) >> EFI_PAGE_SHIFT; + num_skipped_pages = (start_addr - md->start) >> EFI_PAGE_SHIFT; if (num_skipped_pages > md->num_pages) num_skipped_pages = md->num_pages; - if (is_available_memory(md)) - printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole " - "at 0x%lx\n", __FUNCTION__, - (num_skipped_pages << EFI_PAGE_SHIFT) >> 10, - md->phys_addr, start_addr - IA64_GRANULE_SIZE); + if (is_kern_available_memory(md)) + printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at " + "0x%lx due to granule hole at 0x%lx\n", __FUNCTION__, + (num_skipped_pages << EFI_PAGE_SHIFT) >> 10, + md->start, start_addr - IA64_GRANULE_SIZE); /* - * NOTE: Don't set md->phys_addr to START_ADDR because that could cause the memory - * descriptor list to become unsorted. In such a case, md->num_pages will be - * zero, so the Right Thing will happen. + * NOTE: Don't set md->start to start_addr because that could + * cause the memory descriptor list to become unsorted. In such + * a case, md->num_pages will be zero, so the Right Thing will happen. */ - md->phys_addr += num_skipped_pages << EFI_PAGE_SHIFT; + md->start += num_skipped_pages << EFI_PAGE_SHIFT; md->num_pages -= num_skipped_pages; + + /* + * NOTE: Since memory descriptors from EFI are normally aligned + * at EFI page size boundaries and granule size is multiple of + * EFI page size, we should end up with a nice granule + * aligned address for md->start. If these assumptions were + * to break in future, we will need to ensure md->start is + * granule aligned at this point. + */ } static void -trim_top (efi_memory_desc_t *md, u64 end_addr) +granule_align_top (kern_memdesc_t *md, u64 end_addr) { u64 num_dropped_pages, md_end_addr; - md_end_addr = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); + md_end_addr = md->start + (md->num_pages << EFI_PAGE_SHIFT); if (md_end_addr <= end_addr || !md->num_pages) return; @@ -284,128 +285,235 @@ if (num_dropped_pages > md->num_pages) num_dropped_pages = md->num_pages; - if (is_available_memory(md)) + if (is_kern_available_memory(md)) printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole " "at 0x%lx\n", __FUNCTION__, (num_dropped_pages << EFI_PAGE_SHIFT) >> 10, - md->phys_addr, end_addr); + md->start, end_addr); md->num_pages -= num_dropped_pages; } +/* + * Allocate a node for kernel memory descriptor. These allocations are never + * freed. + */ +static kern_memdesc_t +*memdesc_alloc(void) +{ + if (memdesc_area >= memdesc_end) + return(NULL); + return(memdesc_area++); +} + /* - * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that - * has memory that is available for OS use. + * Walks the EFI memory map and calls CALLBACK once for each EFI + * memory descriptor that has memory that is available for OS use. */ void efi_memmap_walk (efi_freemem_callback_t callback, void *arg) { - int prev_valid = 0; - struct range { - u64 start; - u64 end; - } prev, curr; + kern_memdesc_t *memnode; + u64 start, end; + + memnode= kern_memmap; + + while (memnode != NULL) { + if ((!is_kern_available_memory(memnode)) || (memnode->trimmed)) { + memnode = memnode->next; + continue; + } + start = PAGE_OFFSET + memnode->start; + end = (start + (memnode->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK; + + switch (memnode->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + if ((*callback)(start, end, arg) < 0) + return; + } + memnode = memnode->next; + } +} + +/* + * Trim memory size in accordance with "mem=" and "max_addr=" parameters + */ +void +efi_trim_memory(void) +{ + kern_memdesc_t *memnode, *newnode; + unsigned long total_mem = 0; + unsigned long new_pages; + + /* Do we have a memory size limit? If not, we are done */ + if ((mem_limit == ~0UL) && (max_addr == ~0UL)) + return; + + /* + * walk the memory map to find where does the limit kick in + */ + memnode = kern_memmap; + while (memnode != NULL) { + total_mem += memnode->num_pages << EFI_PAGE_SHIFT; + + if (memnode->start + (memnode->num_pages << EFI_PAGE_SHIFT) > max_addr) { + new_pages = (max_addr - memnode->start) >> EFI_PAGE_SHIFT; + break; + } else + if (total_mem > mem_limit) { + new_pages = memnode->num_pages - + ((total_mem - mem_limit) >> EFI_PAGE_SHIFT); + break; + } + memnode = memnode->next; + } + + /* Allocate a new node that will hold split range */ + if ((newnode = memdesc_alloc()) == NULL) { + printk("Failed to allocate space to split memory map for max_addr\n"); + return; + } + newnode->start = memnode->start + (new_pages << EFI_PAGE_SHIFT); + newnode->num_pages = memnode->num_pages - new_pages; + newnode->type = memnode->type; + newnode->trimmed = 1; + memnode->num_pages = new_pages; + newnode->prev = memnode; + newnode->next = memnode->next; + memnode->next = newnode; + newnode->next->prev = newnode; + + /* Now walk the rest of ranges and mark them trimmed */ + memnode = newnode->next; + while (memnode != NULL) { + memnode->trimmed = 1; + memnode = memnode->next; + } +} + +/* + * Walk the EFI memory map and gather all memory available for kernel + * to use. + */ +void +efi_gather_memory(void) +{ void *efi_map_start, *efi_map_end, *p, *q; efi_memory_desc_t *md, *check_md; - u64 efi_desc_size, start, end, granule_addr, last_granule_addr, first_non_wb_addr = 0; - unsigned long total_mem = 0; + u64 efi_desc_size, granule_addr, last_granule_addr, prev_end; + u64 first_non_wb_addr = 0; + int no_allocate = 0; + kern_memdesc_t *newnode, *prevnode; efi_map_start = __va(ia64_boot_param->efi_memmap); efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; efi_desc_size = ia64_boot_param->efi_memdesc_size; + prevnode = newnode = NULL; for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { md = p; - /* skip over non-WB memory descriptors; that's all we're interested in... */ + /* We only care about WB memory descriptors */ if (!(md->attribute & EFI_MEMORY_WB)) continue; + if (!no_allocate && (newnode = memdesc_alloc()) == NULL) { + printk("Failed to allocate node for kernel memory descriptor\n"); + machine_restart(NULL); + } + newnode->type = md->type; + newnode->trimmed = 0; + newnode->start = md->phys_addr; + newnode->num_pages = md->num_pages; + newnode->next = newnode->prev = NULL; + if (kern_memmap == NULL) + kern_memmap = newnode; + /* + * Do we need to granule align? * granule_addr is the base of md's first granule. - * [granule_addr - first_non_wb_addr) is guaranteed to + * (granule_addr - first_non_wb_addr) is guaranteed to * be contiguous WB memory. */ - granule_addr = GRANULEROUNDDOWN(md->phys_addr); + granule_addr = GRANULEROUNDDOWN(newnode->start); first_non_wb_addr = max(first_non_wb_addr, granule_addr); - if (first_non_wb_addr < md->phys_addr) { - trim_bottom(md, granule_addr + IA64_GRANULE_SIZE); - granule_addr = GRANULEROUNDDOWN(md->phys_addr); + if (first_non_wb_addr < newnode->start) { + granule_align_bottom(newnode, granule_addr + IA64_GRANULE_SIZE); + granule_addr = GRANULEROUNDDOWN(newnode->start); first_non_wb_addr = max(first_non_wb_addr, granule_addr); } - for (q = p; q < efi_map_end; q += efi_desc_size) { + first_non_wb_addr += newnode->start << EFI_PAGE_SHIFT; + + for (q = p+efi_desc_size; q < efi_map_end; q += efi_desc_size) { check_md = q; if ((check_md->attribute & EFI_MEMORY_WB) && - (check_md->phys_addr == first_non_wb_addr)) + (check_md->phys_addr == first_non_wb_addr)) { first_non_wb_addr += check_md->num_pages << EFI_PAGE_SHIFT; + } else - break; /* non-WB or hole */ + break; /* non-WB, non-available or hole */ } last_granule_addr = GRANULEROUNDDOWN(first_non_wb_addr); - if (last_granule_addr < md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) - trim_top(md, last_granule_addr); + if (last_granule_addr < newnode->start + (newnode->num_pages << EFI_PAGE_SHIFT)) + granule_align_top(newnode, last_granule_addr); - if (is_available_memory(md)) { - if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) >= max_addr) { - if (md->phys_addr >= max_addr) - continue; - md->num_pages = (max_addr - md->phys_addr) >> EFI_PAGE_SHIFT; - first_non_wb_addr = max_addr; - } - if (total_mem >= mem_limit) - continue; - - if (total_mem + (md->num_pages << EFI_PAGE_SHIFT) > mem_limit) { - unsigned long limit_addr = md->phys_addr; - - limit_addr += mem_limit - total_mem; - limit_addr = GRANULEROUNDDOWN(limit_addr); - - if (md->phys_addr > limit_addr) - continue; - - md->num_pages = (limit_addr - md->phys_addr) >> - EFI_PAGE_SHIFT; - first_non_wb_addr = max_addr = md->phys_addr + - (md->num_pages << EFI_PAGE_SHIFT); - } - total_mem += (md->num_pages << EFI_PAGE_SHIFT); + /* + * Are we left with any pages after all the alignment? + * If not, we will simply reuse the node we just allocated + * and not allocate a new one. + */ + if (!newnode->num_pages) { + no_allocate = 1; + continue; + } else + no_allocate = 0; - if (md->num_pages == 0) + /* + * Can we coalesce this memory range with previous one + */ + if (prevnode) { + prev_end = prevnode->start + (prevnode->num_pages << EFI_PAGE_SHIFT); + if ((prev_end == newnode->start) && + (is_kern_available_memory(newnode)) && + (is_kern_available_memory(prevnode))) { + prevnode->num_pages += newnode->num_pages; + no_allocate = 1; + efi_total_mem += (newnode->num_pages << EFI_PAGE_SHIFT); continue; - - curr.start = PAGE_OFFSET + md->phys_addr; - curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT); - - if (!prev_valid) { - prev = curr; - prev_valid = 1; - } else { - if (curr.start < prev.start) - printk(KERN_ERR "Oops: EFI memory table not ordered!\n"); - - if (prev.end == curr.start) { - /* merge two consecutive memory ranges */ - prev.end = curr.end; - } else { - start = PAGE_ALIGN(prev.start); - end = prev.end & PAGE_MASK; - if ((end > start) && (*callback)(start, end, arg) < 0) - return; - prev = curr; - } } } + if (is_kern_available_memory(newnode)) + efi_total_mem += (newnode->num_pages << EFI_PAGE_SHIFT); + + /* Link this node in the list */ + if (prevnode != NULL) { + newnode->prev = prevnode; + prevnode->next = newnode; + } + prevnode = newnode; } - if (prev_valid) { - start = PAGE_ALIGN(prev.start); - end = prev.end & PAGE_MASK; - if (end > start) - (*callback)(start, end, arg); + /* print EFI memory map: */ + { + efi_memory_desc_t *md; + void *p; + int i; + + printk("KHALID: EFI mem map after creation of kern mem map:\n"); + for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) { + md = p; + printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n", + i, md->type, md->attribute, md->phys_addr, + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), + md->num_pages >> (20 - EFI_PAGE_SHIFT)); + } } } @@ -479,6 +587,9 @@ * Cannot write to CRx with PSR.ic=1 */ psr = ia64_clear_ic(); +#if CONFIG_KEXEC + ia64_ptr(0x01, vaddr & mask, IA64_GRANULE_SHIFT); +#endif ia64_itr(0x1, IA64_TR_PALCODE, vaddr & mask, pte_val(pfn_pte(md->phys_addr >> PAGE_SHIFT, PAGE_KERNEL)), IA64_GRANULE_SHIFT); @@ -508,16 +619,24 @@ for (cp = saved_command_line; *cp; ) { if (memcmp(cp, "mem=", 4) == 0) { cp += 4; - mem_limit = memparse(cp, &end); + mem_limit = memparse(cp, &end) - 2; if (end != cp) break; cp = end; } else if (memcmp(cp, "max_addr=", 9) == 0) { cp += 9; - max_addr = GRANULEROUNDDOWN(memparse(cp, &end)); + max_addr = memparse(cp, &end) - 1; if (end != cp) break; cp = end; +#ifdef CONFIG_KEXEC + } else if (memcmp(cp, "kexec_reboot", 12) == 0) { + cp += 12; + kexec_reboot = 1; + if (end != cp) + break; + cp = end; +#endif } else { while (*cp != ' ' && *cp) ++cp; @@ -662,10 +781,17 @@ } } - status = efi_call_phys(__va(runtime->set_virtual_address_map), +#ifdef CONFIG_KEXEC + if (kexec_reboot == 0) + status = efi_call_phys(__va(runtime->set_virtual_address_map), ia64_boot_param->efi_memmap_size, efi_desc_size, ia64_boot_param->efi_memdesc_version, ia64_boot_param->efi_memmap); + else { + printk(KERN_INFO "kexec'd kernel: Not virtualizing EFI\n"); + status = EFI_SUCCESS; + } +#endif if (status != EFI_SUCCESS) { printk(KERN_WARNING "warning: unable to switch EFI into virtual mode " "(status=%lu)\n", status); ===== arch/ia64/kernel/entry.S 1.69 vs edited ===== --- 1.69/arch/ia64/kernel/entry.S 2004-11-23 12:18:30 -08:00 +++ edited/arch/ia64/kernel/entry.S 2004-12-15 18:19:27 -08:00 @@ -1525,7 +1525,7 @@ data8 sys_mq_timedreceive // 1265 data8 sys_mq_notify data8 sys_mq_getsetattr - data8 sys_ni_syscall // reserved for kexec_load + data8 sys_kexec_load data8 sys_ni_syscall // reserved for vserver data8 sys_waitid // 1270 data8 sys_add_key ===== arch/ia64/kernel/setup.c 1.81 vs edited ===== --- 1.81/arch/ia64/kernel/setup.c 2004-11-18 23:03:10 -08:00 +++ edited/arch/ia64/kernel/setup.c 2004-12-15 18:17:14 -08:00 @@ -15,6 +15,7 @@ * 02/01/00 R.Seth fixed get_cpuinfo for SMP * 01/07/99 S.Eranian added the support for command line argument * 06/24/99 W.Drummond added boot_cpu_data. + * 12/14/05 K. Aziz Added code to find space to store new EFI memory map */ #include #include @@ -74,6 +75,9 @@ struct io_space io_space[MAX_IO_SPACES]; EXPORT_SYMBOL(io_space); unsigned int num_io_spaces; +extern u64 memdesc_area; +extern u64 memdesc_end; + unsigned char aux_device_present = 0xaa; /* XXX remove this when legacy I/O is gone */ @@ -156,6 +160,109 @@ } } +static int +is_available_memory (efi_memory_desc_t *md) +{ + if (!(md->attribute & EFI_MEMORY_WB)) + return 0; + + switch (md->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + return 1; + } + return 0; +} + +#define MEM_DESC_SAFETY_MARGIN 8 + +static int +find_memmap_space(int *marker) +{ + void *efi_map_start, *efi_map_end, *p, *q; + u64 efi_desc_size, space_needed, granule_addr, nxt_granule_addr; + u64 smallest_block = UINT_MAX; + u64 small_block_addr, block_size; + efi_memory_desc_t *md, *check_md; + + /* + * Look for the first big enough chunk of memory to hold EFI memory + * map, preferably not granule aligned since we will end up throwing + * non-aligned part anyway. So we might as well make use of it. + */ + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + /* + * We will allocate enough memory to hold as many nodes as + * there are in EFI memory map. We will then allocate another + * MEM_DESC_SAFETY_MARGIN for safety margin in case we have to + * split some of the memory ranges and thus create new nodes + */ + space_needed = sizeof(kern_memdesc_t)*(ia64_boot_param->efi_memmap_size/efi_desc_size + MEM_DESC_SAFETY_MARGIN); + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + + /* skip over non-WB and non-available memory descriptors */ + if (!is_available_memory(md)) + continue; + block_size = md->num_pages << EFI_PAGE_SHIFT; + + /* Look for any contiguous blocks of memory */ + for (q = p; q < efi_map_end; q += efi_desc_size) { + check_md = q; + + if ((check_md->attribute & EFI_MEMORY_WB) && + (check_md->phys_addr == md->phys_addr)) { + block_size += check_md->num_pages << EFI_PAGE_SHIFT; + p += efi_desc_size; + } + else + break; + } + + if ((block_size < smallest_block) && + (block_size >= space_needed)) { + smallest_block = block_size; + small_block_addr = md->phys_addr; + } + +#if 0 + granule_addr = md->phys_addr & ~(IA64_GRANULE_SIZE - 1); + if (granule_addr < md->phys_addr) { + /* We found a granule hole. Is it big enough? */ + nxt_granule_addr = granule_addr + IA64_GRANULE_SIZE; + if ((nxt_granule_addr - md->phys_addr) >= space_needed) { + rsvd_region[*marker].start = memdesc_area = + (unsigned long) __va(md->phys_addr); + rsvd_region[*marker].end = memdesc_end = + (unsigned long) __va(md->phys_addr) + space_needed; + (*marker)++; + return 0; + } + } +#endif + } + + /* + * Oops, we did not find a non granule aligned chunk of memory. + * We will simply allocate a chunk of memory from one of the smaller + * blocks of memory + */ + rsvd_region[*marker].start = small_block_addr; + rsvd_region[*marker].end = memdesc_end = + small_block_addr + space_needed; + memdesc_area = __va(small_block_addr); + memdesc_end = memdesc_area + space_needed; + (*marker)++; + return 0; +} + /** * reserve_memory - setup reserved memory areas * @@ -195,6 +302,11 @@ n++; } #endif + + if (find_memmap_space(&n) != 0) { + printk(KERN_EMERG "Failed to find space to build kernel EFI memory map"); + machine_restart(NULL); + } /* end of memory marker */ rsvd_region[n].start = ~0UL; ===== arch/ia64/mm/contig.c 1.9 vs edited ===== --- 1.9/arch/ia64/mm/contig.c 2004-10-05 11:19:51 -07:00 +++ edited/arch/ia64/mm/contig.c 2004-12-15 18:17:14 -08:00 @@ -146,6 +146,9 @@ reserve_memory(); + efi_gather_memory(); + efi_trim_memory(); + /* first find highest page frame number */ max_pfn = 0; efi_memmap_walk(find_max_pfn, &max_pfn); ===== arch/ia64/mm/discontig.c 1.24 vs edited ===== --- 1.24/arch/ia64/mm/discontig.c 2004-10-26 12:23:47 -07:00 +++ edited/arch/ia64/mm/discontig.c 2004-12-22 09:58:44 -08:00 @@ -429,6 +429,9 @@ reserve_memory(); + efi_gather_memory(); + efi_trim_memory(); + if (numnodes == 0) { printk(KERN_ERR "node info missing!\n"); numnodes = 1; @@ -456,6 +459,9 @@ pernode = mem_data[node].pernode_addr; pernodesize = mem_data[node].pernode_size; map = pernode + pernodesize; + + printk("node %d start 0x%016lx, end 0x%016lx\n", node, + bdp->node_boot_start, bdp->node_low_pfn << PAGE_SHIFT); /* Sanity check... */ if (!pernode) ===== arch/ia64/mm/init.c 1.72 vs edited ===== --- 1.72/arch/ia64/mm/init.c 2004-11-24 22:42:43 -08:00 +++ edited/arch/ia64/mm/init.c 2004-12-22 10:51:54 -08:00 @@ -551,14 +551,20 @@ #endif high_memory = __va(max_low_pfn * PAGE_SIZE); + printk("max_low_pfn = 0x%016lx\n", max_low_pfn); kclist_add(&kcore_mem, __va(0), max_low_pfn * PAGE_SIZE); kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); kclist_add(&kcore_kernel, _stext, _end - _stext); - for_each_pgdat(pgdat) + printk("freeing pgdat memory\n"); + + for_each_pgdat(pgdat) { + printk("freeing node %d\n", pgdat->node_id); totalram_pages += free_all_bootmem_node(pgdat); + } + printk("counting reserved pages\n"); reserved_pages = 0; efi_memmap_walk(count_reserved_pages, &reserved_pages); ===== arch/ia64/sn/kernel/setup.c 1.43 vs edited ===== --- 1.43/arch/ia64/sn/kernel/setup.c 2004-11-02 12:32:14 -08:00 +++ edited/arch/ia64/sn/kernel/setup.c 2004-12-22 10:34:14 -08:00 @@ -516,6 +516,7 @@ continue; BUG(); /* All nodes must have klconfig tables! */ } + klgraph_header = (u64)__va(klgraph_header); cnodeid = nasid_to_cnodeid(nasid); root_lboard[cnodeid] = (lboard_t *) NODE_OFFSET_TO_LBOARD((nasid), ===== include/asm-ia64/meminit.h 1.3 vs edited ===== --- 1.3/include/asm-ia64/meminit.h 2003-10-10 16:17:05 -07:00 +++ edited/include/asm-ia64/meminit.h 2004-12-15 18:17:14 -08:00 @@ -29,6 +29,14 @@ extern struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1]; extern int num_rsvd_regions; +typedef struct kern_memdesc { + u32 type; + u32 trimmed; + u64 start; + u64 num_pages; + struct kern_memdesc *next, *prev; +} kern_memdesc_t; + extern void find_memory (void); extern void reserve_memory (void); extern void find_initrd (void); ===== include/asm-ia64/mmu_context.h 1.20 vs edited ===== --- 1.20/include/asm-ia64/mmu_context.h 2004-10-05 11:24:12 -07:00 +++ edited/include/asm-ia64/mmu_context.h 2004-12-15 18:18:37 -08:00 @@ -166,5 +166,7 @@ #define switch_mm(prev_mm,next_mm,next_task) activate_mm(prev_mm, next_mm) +extern void use_mm(struct mm_struct *mm); + # endif /* ! __ASSEMBLY__ */ #endif /* _ASM_IA64_MMU_CONTEXT_H */ ===== include/asm-ia64/sn/klconfig.h 1.1 vs edited ===== --- 1.1/include/asm-ia64/sn/klconfig.h 2004-10-11 13:03:57 -07:00 +++ edited/include/asm-ia64/sn/klconfig.h 2004-12-22 10:10:53 -08:00 @@ -29,7 +29,7 @@ } kl_config_hdr_t; -#define NODE_OFFSET_TO_LBOARD(nasid,off) (lboard_t*)(NODE_CAC_BASE(nasid) + (off)) +#define NODE_OFFSET_TO_LBOARD(nasid,off) (lboard_t*)(NODE_CAC_BASE(nasid) | (off)) /* * The KLCONFIG area is organized as a LINKED LIST of BOARDs. A BOARD ===== kernel/sys.c 1.100 vs edited ===== --- 1.100/kernel/sys.c 2004-12-15 18:13:01 -08:00 +++ edited/kernel/sys.c 2004-12-15 18:18:38 -08:00 @@ -444,7 +444,7 @@ return -EINVAL; } notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); - system_state = SYSTEM_BOOTING; + system_state = SYSTEM_RESTART; device_shutdown(); printk(KERN_EMERG "Starting new kernel\n"); machine_shutdown();