x86-64 TIF flags for debug regs and io bitmap in ctxsw From: Stephane Eranian Hello, Following my discussion with Andi. Here is a patch that introduces two new TIF flags to simplify the context switch code in __switch_to(). The idea is to minimize the number of cache lines accessed in the common case, i.e., when neither the debug registers nor the I/O bitmap are used. This patch covers the x86-64 modifications. A patch for i386 follows. Changelog: - add TIF_DEBUG to track when debug registers are active - add TIF_IO_BITMAP to track when I/O bitmap is used - modify __switch_to() to use the new TIF flags : eranian@hpl.hp.com Signed-off-by: Andi Kleen --- arch/x86_64/ia32/ptrace32.c | 4 ++ arch/x86_64/kernel/ioport.c | 1 arch/x86_64/kernel/process.c | 73 ++++++++++++++++++++++----------------- arch/x86_64/kernel/ptrace.c | 8 +++- include/asm-x86_64/thread_info.h | 7 +++ 5 files changed, 60 insertions(+), 33 deletions(-) Index: linux/arch/x86_64/ia32/ptrace32.c =================================================================== --- linux.orig/arch/x86_64/ia32/ptrace32.c +++ linux/arch/x86_64/ia32/ptrace32.c @@ -117,6 +117,10 @@ static int putreg32(struct task_struct * if ((0x5454 >> ((val >> (16 + 4*i)) & 0xf)) & 1) return -EIO; child->thread.debugreg7 = val; + if (val) + set_tsk_thread_flag(child, TIF_DEBUG); + else + clear_tsk_thread_flag(child, TIF_DEBUG); break; default: Index: linux/arch/x86_64/kernel/ioport.c =================================================================== --- linux.orig/arch/x86_64/kernel/ioport.c +++ linux/arch/x86_64/kernel/ioport.c @@ -56,6 +56,7 @@ asmlinkage long sys_ioperm(unsigned long memset(bitmap, 0xff, IO_BITMAP_BYTES); t->io_bitmap_ptr = bitmap; + set_thread_flag(TIF_IO_BITMAP); } /* Index: linux/arch/x86_64/kernel/process.c =================================================================== --- linux.orig/arch/x86_64/kernel/process.c +++ linux/arch/x86_64/kernel/process.c @@ -350,6 +350,7 @@ void exit_thread(void) kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; + clear_thread_flag(TIF_IO_BITMAP); /* * Careful, clear this in the TSS too: */ @@ -369,6 +370,7 @@ void flush_thread(void) if (t->flags & _TIF_IA32) current_thread_info()->status |= TS_COMPAT; } + t->flags &= ~_TIF_DEBUG; tsk->thread.debugreg0 = 0; tsk->thread.debugreg1 = 0; @@ -461,7 +463,7 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); - if (unlikely(me->thread.io_bitmap_ptr != NULL)) { + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { p->thread.io_bitmap_max = 0; @@ -469,6 +471,7 @@ int copy_thread(int nr, unsigned long cl } memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES); + set_tsk_thread_flag(p, TIF_IO_BITMAP); } /* @@ -498,6 +501,40 @@ out: */ #define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r) +static inline void __switch_to_xtra(struct task_struct *prev_p, + struct task_struct *next_p, + struct tss_struct *tss) +{ + struct thread_struct *prev, *next; + + prev = &prev_p->thread, + next = &next_p->thread; + + if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { + loaddebug(next, 0); + loaddebug(next, 1); + loaddebug(next, 2); + loaddebug(next, 3); + /* no 4 and 5 */ + loaddebug(next, 6); + loaddebug(next, 7); + } + + if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { + /* + * Copy the relevant range of the IO bitmap. + * Normally this is 128 bytes or less: + */ + memcpy(tss->io_bitmap, next->io_bitmap_ptr, + max(prev->io_bitmap_max, next->io_bitmap_max)); + } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { + /* + * Clear any possible leftover bits: + */ + memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); + } +} + /* * switch_to(x,y) should switch tasks from x to y. * @@ -586,37 +623,11 @@ __switch_to(struct task_struct *prev_p, task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); /* - * Now maybe reload the debug registers + * Now maybe reload the debug registers and handle I/O bitmaps */ - if (unlikely(next->debugreg7)) { - loaddebug(next, 0); - loaddebug(next, 1); - loaddebug(next, 2); - loaddebug(next, 3); - /* no 4 and 5 */ - loaddebug(next, 6); - loaddebug(next, 7); - } - - - /* - * Handle the IO bitmap - */ - if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) { - if (next->io_bitmap_ptr) - /* - * Copy the relevant range of the IO bitmap. - * Normally this is 128 bytes or less: - */ - memcpy(tss->io_bitmap, next->io_bitmap_ptr, - max(prev->io_bitmap_max, next->io_bitmap_max)); - else { - /* - * Clear any possible leftover bits: - */ - memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); - } - } + if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) + || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) + __switch_to_xtra(prev_p, next_p, tss); return prev_p; } Index: linux/arch/x86_64/kernel/ptrace.c =================================================================== --- linux.orig/arch/x86_64/kernel/ptrace.c +++ linux/arch/x86_64/kernel/ptrace.c @@ -420,9 +420,13 @@ long arch_ptrace(struct task_struct *chi if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1) break; if (i == 4) { - child->thread.debugreg7 = data; + child->thread.debugreg7 = data; + if (data) + set_tsk_thread_flag(child, TIF_DEBUG); + else + clear_tsk_thread_flag(child, TIF_DEBUG); ret = 0; - } + } break; } break; Index: linux/include/asm-x86_64/thread_info.h =================================================================== --- linux.orig/include/asm-x86_64/thread_info.h +++ linux/include/asm-x86_64/thread_info.h @@ -120,6 +120,8 @@ static inline struct thread_info *stack_ #define TIF_FORK 18 /* ret_from_fork */ #define TIF_ABI_PENDING 19 #define TIF_MEMDIE 20 +#define TIF_DEBUG 21 /* uses debug registers */ +#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ #define _TIF_SYSCALL_TRACE (1<