diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 8657c73..fe517dc 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -609,6 +609,77 @@ static inline void disable_tsc(struct ta } } +static void save_tx(void) +{ + unsigned long old_txcr, old_txsr; + + /* + * Save into current->thread->tx* + */ + asm(".byte 0x0f, 0x27, 0x00\n\t" /* .byte 0xf2700 # mov txcr, eax */ + "mov %%eax,%0\n\t" : "=r"(old_txcr) : : "eax"); + asm(".byte 0x0f, 0x27, 0x01\n\t" /* .byte 0xf2701 # mov txsr, eax */ + "mov %%eax,%0\n\t" : "=r"(old_txsr) : : "eax"); + current->thread.txcr = old_txcr; + current->thread.txsr = old_txsr; +} + +static void restore_tx(struct task_struct *next) +{ + unsigned long new_txcr, new_txsr; + + new_txcr = next->thread.txcr; + new_txsr = next->thread.txsr; + + /* + * Load new values from next->thread->tx* + */ + asm("mov %0,%%eax\n\t" + ".byte 0x0f, 0x25, 0x00\n\t" /* .byte 0xf2500 # mov eax, txcr */ + : : "r"(new_txcr) : "eax"); + asm("mov %0,%%eax\n\t" + ".byte 0x0f, 0x25, 0x01\n\t" /* .byte 0xf2501 # mov eax, txsr */ + : : "r"(new_txsr) : "eax"); +} + +/** + * prepare_arch_switch - clear TX state for the last task if necessary + * @next: task we're switching to + * + * If the last task was in the middle of a transaction (unbounded or not), + * we have to flush the cache of any existing TX state since it logically + * belongs to the last task. + * + * Notes: + * o is instruction argument order correct (AT&T vs. Intel)? + * o txabort of ring 0 vs. ring 3? + * o if handler is active for TX_LACK_* we don't need to return to the + * handler on resume + * o need to do this at interrupt time rather than context switch time + * for generality (per requirements document section 4) + */ +void tx_arch_switch(struct task_struct *next) +{ + unsigned long txcr; + + asm(".byte 0x0f, 0x27, 0x00\n\t" /* .byte 0xf2700 # mov txcr, eax */ + "mov %%eax,%0\n\t" : "=r"(txcr) : : "eax"); + + save_tx(); + /* + * If a transaction was in progress, we need to set the appropriate + * TXSR bits and jump to the thread's event handler on return. + */ + if (txcr) + current->thread.txsr |= TX_LOSS_READ_MONITOR | + TX_LOSS_WRITE_MONITOR; + + /* txabort should clear txsr and zero 21:22 of txcr */ + asm(".byte 0x0f, 0x3a" ::: "memory"); /* .byte 0xf3ah # txabort */ + + restore_tx(next); +} + /* * switch_to(x,yn) should switch tasks from x to y. * diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index bb6745d..ea22638 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -493,6 +493,75 @@ out: return err; } +static void save_tx(void) +{ + unsigned long old_txcr, old_txsr; + + /* + * Save into current->thread->tx* + */ + asm(".byte 0xf2700\n\t" /* .byte 0xf2700 # mov txcr, eax */ + "mov %%eax,%0\n\t" : : "=r"(oldtxcr) : "eax"); + asm(".byte 0xf2701\n\t" /* .byte 0xf2701 # mov txsr, eax */ + "mov %%eax,%0\n\t" : : "=r"(oldtxsr) : "eax"); + current->thread->txcr = old_txcr; + current->thread->txsr = old_txsr; +} + +static void restore_tx(struct task_struct *next) +{ + unsigned long new_txcr, new_txsr; + + new_txcr = next->thread->txcr; + new_txsr = next->thread->txsr; + + /* + * Load new values from next->thread->tx* + */ + asm("mov %0,%%eax\n\t" + ".byte 0xf2500\n\t" /* .byte 0xf2500 # mov eax, txcr */ + : "r"(new_txcr) : : "eax"); + asm("mov %0,%%eax\n\t" + ".byte 0xf2501\n\t" /* .byte 0xf2501 # mov eax, txsr */ + : "r"(new_txsr) : : "eax"); +} + +/** + * prepare_arch_switch - clear TX state for the last task if necessary + * @next: task we're switching to + * + * If the last task was in the middle of a transaction (unbounded or not), + * we have to flush the cache of any existing TX state since it logically + * belongs to the last task. + * + * Notes: + * o is instruction argument order correct (AT&T vs. Intel)? + * o txabort of ring 0 vs. ring 3? + * o if handler is active for TX_LACK_* we don't need to return to the + * handler on resume + */ +void tx_arch_switch(struct task_struct *next) +{ + unsigned long txcr; + + asm(".byte 0xf2700\n\t" /* .byte 0xf2700 # mov txcr, eax */ + "mov %%eax,%0\n\t" : : "=r"(txcr) : "eax"); + + save_tx(); + /* + * If a transaction was in progress, we need to set the appropriate + * TXSR bits and jump to the thread's event handler on return. + */ + if (txcr) + current->thread->txsr |= TX_LOSS_READ_MONITOR | + TX_LOSS_WRITE_MONITOR; + + /* txabort should clear txsr and zero 21:22 of txcr */ + asm(".byte 0xf3ah" ::: "memory"); /* .byte 0xf3ah # txabort */ + + restore_tx(next) +} + /* * This special macro can be used to load a debugging register */ @@ -598,7 +667,6 @@ __switch_to(struct task_struct *prev_p, loaddebug(next, 7); } - /* * Handle the IO bitmap */ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index b32346d..09b7573 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -144,6 +144,23 @@ #define X86_EFLAGS_VIP 0x00100000 /* Vir #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ /* + * Transaction status register bits + */ +#define TX_LOSS_READ_MONITOR (1 << 0) +#define TX_LOSS_WRITE_MONITOR (1 << 1) +#define TX_LOSS_BUFFERED_DATA (1 << 2) +#define TX_LACK_READ_MONITOR (1 << 3) +#define TX_LACK_WRITE_MONITOR (1 << 4) +#define TX_EXIT_TO_OS (1 << 5) +#define TX_EXIT_TO_VMM (1 << 6) +#define TX_FORBIDDEN_INSTRUCTION (1 << 7) +#define TX_ILLEGAL_MEM_TYPE (1 << 8) +#define TX_RING_3_ENTRY (1 << 9) +#define TX_TX_PARAMETER_CHANGED (1 << 10) +#define TX_FP_MMX_ACCESS (1 << 11) +#define TX_XMM_ACCESS (1 << 12) + +/* * Generic CPUID function * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx * resulting in stale register contents being returned. @@ -461,6 +478,15 @@ struct thread_struct { unsigned long cr2, trap_no, error_code; /* floating point info */ union i387_union i387; +/* tx info */ + unsigned long txcr; + unsigned long txsr; + unsigned long txhandleraddr; + unsigned long txbeginip; + unsigned long txfaultaddr; + unsigned long txiret; + unsigned long txsourceaddr; + unsigned long txdestaddr; /* virtual 86 mode info */ struct vm86_struct __user * vm86_info; unsigned long screen_bitmap; diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h index 49928eb..2514f14 100644 --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -11,6 +11,13 @@ #ifdef __KERNEL__ struct task_struct; /* one of the stranger aspects of C forward declarations.. */ extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next)); +void tx_arch_switch(struct task_struct *next); +#define prepare_arch_switch(next) \ +do { \ + tx_arch_switch(next); \ +} while (0) + + #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ asm volatile("pushl %%ebp\n\t" \ diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 3b3c121..cd93562 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -139,6 +139,23 @@ #define X86_CR4_OSFXSR 0x0200 /* enable #define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ /* + * Transaction status register bits + */ +#define TX_LOSS_READ_MONITOR (1 << 0) +#define TX_LOSS_WRITE_MONITOR (1 << 1) +#define TX_LOSS_BUFFERED_DATA (1 << 2) +#define TX_LACK_READ_MONITOR (1 << 3) +#define TX_LACK_WRITE_MONITOR (1 << 4) +#define TX_EXIT_TO_OS (1 << 5) +#define TX_EXIT_TO_VMM (1 << 6) +#define TX_FORBIDDEN_INSTRUCTION (1 << 7) +#define TX_ILLEGAL_MEM_TYPE (1 << 8) +#define TX_RING_3_ENTRY (1 << 9) +#define TX_TX_PARAMETER_CHANGED (1 << 10) +#define TX_FP_MMX_ACCESS (1 << 11) +#define TX_XMM_ACCESS (1 << 12) + +/* * Save the cr4 feature set we're using (ie * Pentium 4MB enable and PPro Global page * enable), so that any CPU's that boot up @@ -261,6 +278,15 @@ struct thread_struct { unsigned long cr2, trap_no, error_code; /* floating point info */ union i387_union i387 __attribute__((aligned(16))); +/* tx info */ + unsigned long txcr; + unsigned long txsr; + unsigned long txhandleraddr; + unsigned long txbeginip; + unsigned long txfaultaddr; + unsigned long txiret; + unsigned long txsourceaddr; + unsigned long txdestaddr; /* IO permissions. the bitmap could be moved into the GDT, that would make switch faster for a limited number of ioperm using tasks. -AK */ int ioperm; diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h index 6bf170b..1c79f09 100644 --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h @@ -20,6 +20,12 @@ #define RESTORE_CONTEXT "movq %%rbp,%%rs #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" +void tx_arch_switch(struct task_struct *next); +#define prepare_arch_switch(next) \ +do { \ + tx_arch_switch(next); \ +} while (0) + #define switch_to(prev,next,last) \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \