From: Paul Mackerras This lays the groundwork for using __thread for per-cpu variables. The toolchain will put initialized __thread variables into a .tdata section, and uninitialized __thread variables into a .tbss section. Thus the module loader needs to cope with two per-cpu sections, of which one is initialized and the other must be zeroed. With this patch, the module loader looks for .tdata and .tbss, and if it finds neither, falls back to the .data.percpu section that we currently use. This patch extends the various percpu_modcopy implementations to take two size parameters: the first is the amount to initialize and the second is the amount to zero following that. At the moment all percpu_modcopy implementations simply check that the amount to zero is zero. I have a following patch that makes 64-bit powerpc use __thread for per-cpu variables. Signed-off-by: Paul Mackerras Cc: Rusty Russell Signed-off-by: Andrew Morton --- arch/ia64/kernel/module.c | 4 + include/asm-ia64/percpu.h | 3 - include/asm-powerpc/percpu.h | 3 - include/asm-s390/percpu.h | 3 - include/asm-sparc64/percpu.h | 3 - include/asm-x86_64/percpu.h | 3 - kernel/module.c | 89 ++++++++++++++++++++++++--------- 7 files changed, 78 insertions(+), 30 deletions(-) diff -puN arch/ia64/kernel/module.c~allow-for-per-cpu-data-being-in-tdata-and-tbss-sections arch/ia64/kernel/module.c --- devel/arch/ia64/kernel/module.c~allow-for-per-cpu-data-being-in-tdata-and-tbss-sections 2006-05-11 15:19:05.000000000 -0700 +++ devel-akpm/arch/ia64/kernel/module.c 2006-05-11 15:19:05.000000000 -0700 @@ -944,9 +944,11 @@ module_arch_cleanup (struct module *mod) #ifdef CONFIG_SMP void -percpu_modcopy (void *pcpudst, const void *src, unsigned long size) +percpu_modcopy (void *pcpudst, const void *src, unsigned long size, + unsigned long zero_size) { unsigned int i; + BUG_ON(zero_size != 0); for_each_possible_cpu(i) { memcpy(pcpudst + __per_cpu_offset[i], src, size); } diff -puN include/asm-ia64/percpu.h~allow-for-per-cpu-data-being-in-tdata-and-tbss-sections include/asm-ia64/percpu.h --- devel/include/asm-ia64/percpu.h~allow-for-per-cpu-data-being-in-tdata-and-tbss-sections 2006-05-11 15:19:05.000000000 -0700 +++ devel-akpm/include/asm-ia64/percpu.h 2006-05-11 15:19:05.000000000 -0700 @@ -44,7 +44,8 @@ DECLARE_PER_CPU(unsigned long, local_per #define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __ia64_per_cpu_var(local_per_cpu_offset))) #define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __ia64_per_cpu_var(local_per_cpu_offset))) -extern void percpu_modcopy(void *pcpudst, const void *src, unsigned long size); +extern void percpu_modcopy(void *pcpudst, const void *src, + unsigned long init_sz, unsigned long zero_sz); extern void setup_per_cpu_areas (void); extern void *per_cpu_init(void); diff -puN include/asm-powerpc/percpu.h~allow-for-per-cpu-data-being-in-tdata-and-tbss-sections include/asm-powerpc/percpu.h --- devel/include/asm-powerpc/percpu.h~allow-for-per-cpu-data-being-in-tdata-and-tbss-sections 2006-05-11 15:19:05.000000000 -0700 +++ devel-akpm/include/asm-powerpc/percpu.h 2006-05-11 15:19:05.000000000 -0700 @@ -25,9 +25,10 @@ #define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset())) /* A macro to avoid #include hell... */ -#define percpu_modcopy(pcpudst, src, size) \ +#define percpu_modcopy(pcpudst, src, size, zero_size) \ do { \ unsigned int __i; \ + BUG_ON(zero_size != 0); \ for_each_possible_cpu(__i) \ memcpy((pcpudst)+__per_cpu_offset(__i), \ (src), (size)); \ diff -puN include/asm-s390/percpu.h~allow-for-per-cpu-data-being-in-tdata-and-tbss-sections include/asm-s390/percpu.h --- devel/include/asm-s390/percpu.h~allow-for-per-cpu-data-being-in-tdata-and-tbss-sections 2006-05-11 15:19:05.000000000 -0700 +++ devel-akpm/include/asm-s390/percpu.h 2006-05-11 15:19:05.000000000 -0700 @@ -44,9 +44,10 @@ extern unsigned long __per_cpu_offset[NR #define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu]) /* A macro to avoid #include hell... */ -#define percpu_modcopy(pcpudst, src, size) \ +#define percpu_modcopy(pcpudst, src, size, zero_size) \ do { \ unsigned int __i; \ + BUG_ON(zero_size != 0); \ for_each_possible_cpu(__i) \ memcpy((pcpudst)+__per_cpu_offset[__i], \ (src), (size)); \ diff -puN include/asm-sparc64/percpu.h~allow-for-per-cpu-data-being-in-tdata-and-tbss-sections include/asm-sparc64/percpu.h --- devel/include/asm-sparc64/percpu.h~allow-for-per-cpu-data-being-in-tdata-and-tbss-sections 2006-05-11 15:19:05.000000000 -0700 +++ devel-akpm/include/asm-sparc64/percpu.h 2006-05-11 15:19:05.000000000 -0700 @@ -24,9 +24,10 @@ register unsigned long __local_per_cpu_o #define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __local_per_cpu_offset)) /* A macro to avoid #include hell... */ -#define percpu_modcopy(pcpudst, src, size) \ +#define percpu_modcopy(pcpudst, src, size, zero_size) \ do { \ unsigned int __i; \ + BUG_ON(zero_size != 0); \ for_each_possible_cpu(__i) \ memcpy((pcpudst)+__per_cpu_offset(__i), \ (src), (size)); \ diff -puN include/asm-x86_64/percpu.h~allow-for-per-cpu-data-being-in-tdata-and-tbss-sections include/asm-x86_64/percpu.h --- devel/include/asm-x86_64/percpu.h~allow-for-per-cpu-data-being-in-tdata-and-tbss-sections 2006-05-11 15:19:05.000000000 -0700 +++ devel-akpm/include/asm-x86_64/percpu.h 2006-05-11 15:19:05.000000000 -0700 @@ -24,9 +24,10 @@ #define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset())) /* A macro to avoid #include hell... */ -#define percpu_modcopy(pcpudst, src, size) \ +#define percpu_modcopy(pcpudst, src, size, zero_size) \ do { \ unsigned int __i; \ + BUG_ON(zero_size != 0); \ for_each_possible_cpu(__i) \ memcpy((pcpudst)+__per_cpu_offset(__i), \ (src), (size)); \ diff -puN kernel/module.c~allow-for-per-cpu-data-being-in-tdata-and-tbss-sections kernel/module.c --- devel/kernel/module.c~allow-for-per-cpu-data-being-in-tdata-and-tbss-sections 2006-05-11 15:19:05.000000000 -0700 +++ devel-akpm/kernel/module.c 2006-05-11 15:19:05.000000000 -0700 @@ -355,11 +355,28 @@ static void percpu_modfree(void *freeme) } } -static unsigned int find_pcpusec(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - const char *secstrings) +static void find_pcpusecs(Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, + const char *secstrings, + unsigned int pcpuindex[2]) { - return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); + /* + * Some architectures use __thread for per-cpu variables, + * and that generates .tdata and .tbss sections. + */ + pcpuindex[0] = find_sec(hdr, sechdrs, secstrings, ".tdata"); + pcpuindex[1] = find_sec(hdr, sechdrs, secstrings, ".tbss"); + if (pcpuindex[0]) + return; + if (pcpuindex[1] && !pcpuindex[0]) { + /* move .tbss to pcpuindex[0], it makes things easier later */ + pcpuindex[0] = pcpuindex[1]; + pcpuindex[1] = 0; + return; + } + + /* look for the generic .data.percpu if no .tdata */ + pcpuindex[0] = find_sec(hdr, sechdrs, secstrings, ".data.percpu"); } static int percpu_modinit(void) @@ -390,17 +407,20 @@ static inline void percpu_modfree(void * { BUG(); } -static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - const char *secstrings) +static inline void find_pcpusecs(Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, + const char *secstrings, + unsigned int pcpuindex[2]) { - return 0; + pcpuindex[0] = 0; + pcpuindex[1] = 0; } static inline void percpu_modcopy(void *pcpudst, const void *src, - unsigned long size) + unsigned long init_sz, unsigned long zero_sz) { - /* pcpusec should be 0, and size of that section should be 0. */ - BUG_ON(size != 0); + /* there should be no per-cpu data to copy or clear */ + BUG_ON(init_sz != 0); + BUG_ON(zero_sz != 0); } #endif /* CONFIG_SMP */ @@ -1122,7 +1142,7 @@ static int simplify_symbols(Elf_Shdr *se unsigned int symindex, const char *strtab, unsigned int versindex, - unsigned int pcpuindex, + unsigned int pcpuindex[2], struct module *mod) { Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr; @@ -1166,9 +1186,13 @@ static int simplify_symbols(Elf_Shdr *se default: /* Divert to percpu allocation if a percpu var. */ - if (sym[i].st_shndx == pcpuindex) + if (sym[i].st_shndx == pcpuindex[0]) secbase = (unsigned long)mod->percpu; - else + else if (sym[i].st_shndx == pcpuindex[1]) { + /* .tbss follows .tdata */ + secbase = (unsigned long)mod->percpu + + sechdrs[pcpuindex[0]].sh_size; + } else secbase = sechdrs[sym[i].st_shndx].sh_addr; sym[i].st_value += secbase; break; @@ -1402,8 +1426,9 @@ static struct module *load_module(void _ char *secstrings, *args, *modmagic, *strtab = NULL; unsigned int i, symindex = 0, strindex = 0, setupindex, exindex, exportindex, modindex, obsparmindex, infoindex, gplindex, - crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex, + crcindex, gplcrcindex, versindex, pcpuindex[2], gplfutureindex, gplfuturecrcindex; + unsigned int pcpusize = 0, pcpuinitsize = 0; struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ @@ -1492,7 +1517,7 @@ static struct module *load_module(void _ obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm"); versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); - pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); + find_pcpusecs(hdr, sechdrs, secstrings, pcpuindex); /* Don't keep modinfo section */ sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; @@ -1540,16 +1565,32 @@ static struct module *load_module(void _ if (err < 0) goto free_mod; - if (pcpuindex) { - /* We have a special allocation for this section. */ - percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, - sechdrs[pcpuindex].sh_addralign, - mod->name); + if (pcpuindex[0]) { + /* We have a special allocation for these sections. */ + unsigned int align = 0; + int i, j; + + i = pcpuindex[0]; + pcpusize = sechdrs[i].sh_size; + align = sechdrs[i].sh_addralign; + sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; + if (sechdrs[i].sh_type != SHT_NOBITS) + pcpuinitsize = pcpusize; + if (pcpuindex[1]) { + /* have both .tdata and .tbss */ + j = pcpuindex[1]; + pcpusize = ALIGN(pcpusize, sechdrs[j].sh_addralign); + sechdrs[i].sh_size = pcpusize; + if (sechdrs[j].sh_addralign > align) + align = sechdrs[j].sh_addralign; + pcpusize += sechdrs[j].sh_size; + sechdrs[j].sh_flags &= ~(unsigned long)SHF_ALLOC; + } + percpu = percpu_modalloc(pcpusize, align, mod->name); if (!percpu) { err = -ENOMEM; goto free_mod; } - sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; mod->percpu = percpu; } @@ -1678,8 +1719,8 @@ static struct module *load_module(void _ sort_extable(extable, extable + mod->num_exentries); /* Finally, copy percpu area over. */ - percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, - sechdrs[pcpuindex].sh_size); + percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex[0]].sh_addr, + pcpuinitsize, pcpusize - pcpuinitsize); add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); _