From: Davide Libenzi The following patch implements a lazy TSS's I/O bitmap copy for the i386 architecture. Instead of copying the bitmap at every context switch, the TSS's I/O bitmap offset is set to an invalid offset, so that an attempt to access the bitmap from the CPU will trigger a GP fault. It is lazily at that stage that the bitmap is updated, by hence avoiding bitmap copies in cases where the switched task do not perfom any I/O operation. Signed-off-by: Davide Libenzi Signed-off-by: Andrew Morton --- 25-akpm/arch/i386/kernel/ioport.c | 7 +++++-- 25-akpm/arch/i386/kernel/process.c | 26 ++++++++++++-------------- 25-akpm/arch/i386/kernel/traps.c | 29 +++++++++++++++++++++++++++++ 25-akpm/include/asm-i386/processor.h | 7 ++++++- 4 files changed, 52 insertions(+), 17 deletions(-) diff -puN arch/i386/kernel/ioport.c~lazy-tsss-i-o-bitmap-copy-for-i386 arch/i386/kernel/ioport.c --- 25/arch/i386/kernel/ioport.c~lazy-tsss-i-o-bitmap-copy-for-i386 2004-09-03 22:54:46.582091752 -0700 +++ 25-akpm/arch/i386/kernel/ioport.c 2004-09-03 22:54:46.590090536 -0700 @@ -105,8 +105,11 @@ asmlinkage long sys_ioperm(unsigned long t->io_bitmap_max = bytes; - /* Update the TSS: */ - memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); + /* + * Sets the lazy trigger so that the next I/O operation will + * reload the correct bitmap. + */ + tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; put_cpu(); diff -puN arch/i386/kernel/process.c~lazy-tsss-i-o-bitmap-copy-for-i386 arch/i386/kernel/process.c --- 25/arch/i386/kernel/process.c~lazy-tsss-i-o-bitmap-copy-for-i386 2004-09-03 22:54:46.583091600 -0700 +++ 25-akpm/arch/i386/kernel/process.c 2004-09-03 22:56:21.932596272 -0700 @@ -596,20 +596,18 @@ struct task_struct fastcall * __switch_t loaddebug(next, 7); } - if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) { - if (next->io_bitmap_ptr) - /* - * Copy the relevant range of the IO bitmap. - * Normally this is 128 bytes or less: - */ - memcpy(tss->io_bitmap, next->io_bitmap_ptr, - max(prev->io_bitmap_max, next->io_bitmap_max)); - else - /* - * Clear any possible leftover bits: - */ - memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); - } + /* + * Lazy TSS's I/O bitmap copy. We set an invalid offset here and we let + * the task to get a GPF in case an I/O instruction is performed. + * The handler of the GPF will verify that the faulting task has a valid + * I/O bitmap and, if true, does the real copy and restart the + * instruction. This will save us for redoundant copies when the + * currently switched task does not perform any I/O during its + * timeslice. + */ + tss->io_bitmap_base = next->io_bitmap_ptr ? + INVALID_IO_BITMAP_OFFSET_LAZY : + INVALID_IO_BITMAP_OFFSET; perfctr_resume_thread(next); diff -puN arch/i386/kernel/traps.c~lazy-tsss-i-o-bitmap-copy-for-i386 arch/i386/kernel/traps.c --- 25/arch/i386/kernel/traps.c~lazy-tsss-i-o-bitmap-copy-for-i386 2004-09-03 22:54:46.585091296 -0700 +++ 25-akpm/arch/i386/kernel/traps.c 2004-09-03 22:54:46.593090080 -0700 @@ -524,6 +524,35 @@ DO_ERROR_INFO(17, SIGBUS, "alignment che asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) { + int cpu = get_cpu(); + struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct thread_struct *tsk_th = ¤t->thread; + + /* + * Perform the lazy TSS's I/O bitmap copy. If the TSS has an + * invalid offset set (the LAZY one) and the faulting thread has + * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS + * and we set the offset field correctly. Then we let the CPU to + * restart the faulting instruction. + */ + if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && + tsk_th->io_bitmap_ptr) { + memcpy(tss->io_bitmap, tsk_th->io_bitmap_ptr, + tsk_th->io_bitmap_max); + /* + * If the previously set map was extending to higher ports + * than the current one, pad extra space with 0xff (no access). + */ + if (tsk_th->io_bitmap_max < tss->map_size) + memset((char *) tss->io_bitmap + tsk_th->io_bitmap_max, 0xff, + tss->map_size - tsk_th->io_bitmap_max); + tss->map_size = tsk_th->io_bitmap_max; + tss->io_bitmap_base = IO_BITMAP_OFFSET; + put_cpu(); + return; + } + put_cpu(); + if (regs->eflags & VM_MASK) goto gp_in_vm86; diff -puN include/asm-i386/processor.h~lazy-tsss-i-o-bitmap-copy-for-i386 include/asm-i386/processor.h --- 25/include/asm-i386/processor.h~lazy-tsss-i-o-bitmap-copy-for-i386 2004-09-03 22:54:46.586091144 -0700 +++ 25-akpm/include/asm-i386/processor.h 2004-09-03 22:54:46.594089928 -0700 @@ -307,6 +307,7 @@ extern unsigned int mca_pentium_flag; #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) #define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) #define INVALID_IO_BITMAP_OFFSET 0x8000 +#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000 struct i387_fsave_struct { long cwd; @@ -392,9 +393,13 @@ struct tss_struct { */ unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; /* + * Effective size of the currently set I/O bitmap. + */ + unsigned long map_size; + /* * pads the TSS to be cacheline-aligned (size is 0x100) */ - unsigned long __cacheline_filler[37]; + unsigned long __cacheline_filler[36]; /* * .. and then another 0x100 bytes for emergency kernel stack */ _