From: Davide Libenzi The following patch implements a lazy TSS's I/O bitmap copy for the x86-64 architecture. Instead of copying the bitmap at every context switch, the TSS's I/O bitmap offset is set to an invalid offset, so that an attempt to access the bitmap from the CPU will trigger a GP fault. It is lazily at that stage that the bitmap is updated, by hence avoiding bitmap copies in cases where the switched task do not perfom any I/O operation. Signed-off-by: Davide Libenzi Signed-off-by: Andrew Morton --- 25-power4-akpm/arch/x86_64/kernel/ioport.c | 13 +++++----- 25-power4-akpm/arch/x86_64/kernel/process.c | 33 +++++++------------------- 25-power4-akpm/arch/x86_64/kernel/traps.c | 21 ++++++++++++++++ 25-power4-akpm/include/asm-x86_64/processor.h | 4 ++- 4 files changed, 41 insertions(+), 30 deletions(-) diff -puN arch/x86_64/kernel/ioport.c~lazy-tsss-i-o-bitmap-copy-for-x86-64 arch/x86_64/kernel/ioport.c --- 25-power4/arch/x86_64/kernel/ioport.c~lazy-tsss-i-o-bitmap-copy-for-x86-64 2004-09-07 22:47:48.018555672 -0700 +++ 25-power4-akpm/arch/x86_64/kernel/ioport.c 2004-09-07 22:47:48.027554304 -0700 @@ -62,12 +62,13 @@ asmlinkage long sys_ioperm(unsigned long */ set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); tss = init_tss + get_cpu(); - if (tss->io_bitmap_base == IO_BITMAP_OFFSET) { /* already active? */ - set_bitmap(tss->io_bitmap, from, num, !turn_on); - } else { - memcpy(tss->io_bitmap, t->io_bitmap_ptr, IO_BITMAP_BYTES); - tss->io_bitmap_base = IO_BITMAP_OFFSET; /* Activate it in the TSS */ - } + + /* + * Sets the lazy trigger so that the next I/O operation will + * reload the correct bitmap. + */ + tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; + put_cpu(); return 0; } diff -puN arch/x86_64/kernel/process.c~lazy-tsss-i-o-bitmap-copy-for-x86-64 arch/x86_64/kernel/process.c --- 25-power4/arch/x86_64/kernel/process.c~lazy-tsss-i-o-bitmap-copy-for-x86-64 2004-09-07 22:47:48.020555368 -0700 +++ 25-power4-akpm/arch/x86_64/kernel/process.c 2004-09-07 22:47:48.028554152 -0700 @@ -485,29 +485,16 @@ struct task_struct *__switch_to(struct t loaddebug(next, 7); } - - /* - * Handle the IO bitmap - */ - if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) { - if (next->io_bitmap_ptr) { - /* - * 2 cachelines copy ... not good, but not that - * bad either. Anyone got something better? - * This only affects processes which use ioperm(). - */ - memcpy(tss->io_bitmap, next->io_bitmap_ptr, IO_BITMAP_BYTES); - tss->io_bitmap_base = IO_BITMAP_OFFSET; - } else { - /* - * a bitmap offset pointing outside of the TSS limit - * causes a nicely controllable SIGSEGV if a process - * tries to use a port IO instruction. The first - * sys_ioperm() call sets up the bitmap properly. - */ - tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; - } - } + /* + * Lazy TSS's I/O bitmap copy. We set an invalid offset here and + * we let the task to get a GPF in case an I/O instruction is performed. + * The handler of the GPF will verify that the faulting task has a valid + * I/O bitmap and, if true, does the real copy and restart the instruction. + * This will save us for redoundant copies when the currently switched task + * does not perform any I/O during its timeslice. + */ + tss->io_bitmap_base = next->io_bitmap_ptr ? INVALID_IO_BITMAP_OFFSET_LAZY: + INVALID_IO_BITMAP_OFFSET; return prev_p; } diff -puN arch/x86_64/kernel/traps.c~lazy-tsss-i-o-bitmap-copy-for-x86-64 arch/x86_64/kernel/traps.c --- 25-power4/arch/x86_64/kernel/traps.c~lazy-tsss-i-o-bitmap-copy-for-x86-64 2004-09-07 22:47:48.021555216 -0700 +++ 25-power4-akpm/arch/x86_64/kernel/traps.c 2004-09-07 22:47:48.029554000 -0700 @@ -488,6 +488,27 @@ DO_ERROR_STACK( 8, SIGSEGV, "double faul asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) { + int cpu = get_cpu(); + struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct thread_struct *tsk_th = ¤t->thread; + + /* + * Perform the lazy TSS's I/O bitmap copy. If the TSS has an + * invalid offset set (the LAZY one) and the faulting thread has + * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS + * and we set the offset field correctly. Then we let the CPU to + * restart the faulting instruction. + */ + if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && + tsk_th->io_bitmap_ptr) { + memcpy(tss->io_bitmap, tsk_th->io_bitmap_ptr, + IO_BITMAP_BYTES); + tss->io_bitmap_base = IO_BITMAP_OFFSET; + put_cpu(); + return; + } + put_cpu(); + conditional_sti(regs); #ifdef CONFIG_CHECKING diff -puN include/asm-x86_64/processor.h~lazy-tsss-i-o-bitmap-copy-for-x86-64 include/asm-x86_64/processor.h --- 25-power4/include/asm-x86_64/processor.h~lazy-tsss-i-o-bitmap-copy-for-x86-64 2004-09-07 22:47:48.023554912 -0700 +++ 25-power4-akpm/include/asm-x86_64/processor.h 2004-09-07 22:47:48.030553848 -0700 @@ -187,6 +187,7 @@ static inline void clear_in_cr4 (unsigne #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) #define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) #define INVALID_IO_BITMAP_OFFSET 0x8000 +#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000 struct i387_fxsave_struct { u16 cwd; @@ -217,13 +218,14 @@ struct tss_struct { u32 reserved4; u16 reserved5; u16 io_bitmap_base; + /* * The extra 1 is there because the CPU will access an * additional byte beyond the end of the IO permission * bitmap. The extra byte must be all 1 bits, and must * be within the limit. Thus we have: * - * 128 bytes, the bitmap itself, for ports 0..0x3ff + * 8192 bytes, the bitmap itself, for ports 0..65535 * 8 bytes, for an extra "long" of ~0UL */ unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; _