From: Alan Hourihane AGP allocation/deallocation is suffering major performance issues due to the nature of global_flush_tlb() being called on every change_page_attr() call. For small allocations this isn't really seen, but when you start allocating 50000 pages of AGP space, for say, texture memory, then things can take seconds to complete. In some cases the situation is doubled or even quadrupled in the time due to SMP, or a deallocation, then a new reallocation. I've had a case of upto 20 seconds wait time to deallocate and reallocate AGP space. This patch fixes the problem by making it the caller's responsibility to call global_flush_tlb(), and so removes it from every instance of mapping a page into AGP space until the time that all change_page_attr() changes are done. Cc: Dave Jones Signed-off-by: Andrew Morton --- drivers/char/agp/backend.c | 9 +++++++-- drivers/char/agp/generic.c | 11 ++++++++--- drivers/char/agp/i460-agp.c | 9 ++++++--- drivers/char/agp/intel-agp.c | 5 ++++- 4 files changed, 25 insertions(+), 9 deletions(-) diff -puN drivers/char/agp/backend.c~agp-performance-fixes drivers/char/agp/backend.c --- 25/drivers/char/agp/backend.c~agp-performance-fixes Tue Oct 18 15:22:24 2005 +++ 25-akpm/drivers/char/agp/backend.c Tue Oct 18 15:22:24 2005 @@ -147,6 +147,7 @@ static int agp_backend_initialize(struct printk(KERN_ERR PFX "unable to get memory for scratch page.\n"); return -ENOMEM; } + global_flush_tlb(); bridge->scratch_page_real = virt_to_gart(addr); bridge->scratch_page = @@ -187,9 +188,11 @@ static int agp_backend_initialize(struct return 0; err_out: - if (bridge->driver->needs_scratch_page) + if (bridge->driver->needs_scratch_page) { bridge->driver->agp_destroy_page( gart_to_virt(bridge->scratch_page_real)); + global_flush_tlb(); + } if (got_gatt) bridge->driver->free_gatt_table(bridge); if (got_keylist) { @@ -211,9 +214,11 @@ static void agp_backend_cleanup(struct a bridge->key_list = NULL; if (bridge->driver->agp_destroy_page && - bridge->driver->needs_scratch_page) + bridge->driver->needs_scratch_page) { bridge->driver->agp_destroy_page( gart_to_virt(bridge->scratch_page_real)); + global_flush_tlb(); + } } /* When we remove the global variable agp_bridge from all drivers diff -puN drivers/char/agp/generic.c~agp-performance-fixes drivers/char/agp/generic.c --- 25/drivers/char/agp/generic.c~agp-performance-fixes Tue Oct 18 15:22:24 2005 +++ 25-akpm/drivers/char/agp/generic.c Tue Oct 18 15:22:24 2005 @@ -57,7 +57,8 @@ int map_page_into_agp(struct page *page) { int i; i = change_page_attr(page, 1, PAGE_KERNEL_NOCACHE); - global_flush_tlb(); + /* Caller's responsibility to call global_flush_tlb() for + * performance reasons */ return i; } EXPORT_SYMBOL_GPL(map_page_into_agp); @@ -66,7 +67,8 @@ int unmap_page_from_agp(struct page *pag { int i; i = change_page_attr(page, 1, PAGE_KERNEL); - global_flush_tlb(); + /* Caller's responsibility to call global_flush_tlb() for + * performance reasons */ return i; } EXPORT_SYMBOL_GPL(unmap_page_from_agp); @@ -155,6 +157,7 @@ void agp_free_memory(struct agp_memory * for (i = 0; i < curr->page_count; i++) { curr->bridge->driver->agp_destroy_page(gart_to_virt(curr->memory[i])); } + global_flush_tlb(); } agp_free_key(curr->key); vfree(curr->memory); @@ -212,7 +215,9 @@ struct agp_memory *agp_allocate_memory(s new->memory[i] = virt_to_gart(addr); new->page_count++; } - new->bridge = bridge; + global_flush_tlb(); + + new->bridge = bridge; flush_agp_mappings(); diff -puN drivers/char/agp/i460-agp.c~agp-performance-fixes drivers/char/agp/i460-agp.c --- 25/drivers/char/agp/i460-agp.c~agp-performance-fixes Tue Oct 18 15:22:24 2005 +++ 25-akpm/drivers/char/agp/i460-agp.c Tue Oct 18 15:22:24 2005 @@ -514,9 +514,10 @@ static void *i460_alloc_page (struct agp { void *page; - if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) + if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) { page = agp_generic_alloc_page(agp_bridge); - else + global_flush_tlb(); + } else /* Returning NULL would cause problems */ /* AK: really dubious code. */ page = (void *)~0UL; @@ -525,8 +526,10 @@ static void *i460_alloc_page (struct agp static void i460_destroy_page (void *page) { - if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) + if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) { agp_generic_destroy_page(page); + global_flush_tlb(); + } } #endif /* I460_LARGE_IO_PAGES */ diff -puN drivers/char/agp/intel-agp.c~agp-performance-fixes drivers/char/agp/intel-agp.c --- 25/drivers/char/agp/intel-agp.c~agp-performance-fixes Tue Oct 18 15:22:24 2005 +++ 25-akpm/drivers/char/agp/intel-agp.c Tue Oct 18 15:22:24 2005 @@ -270,6 +270,7 @@ static struct agp_memory *alloc_agpphysm switch (pg_count) { case 1: addr = agp_bridge->driver->agp_alloc_page(agp_bridge); + global_flush_tlb(); break; case 4: /* kludge to get 4 physical pages for ARGB cursor */ @@ -330,9 +331,11 @@ static void intel_i810_free_by_type(stru if(curr->type == AGP_PHYS_MEMORY) { if (curr->page_count == 4) i8xx_destroy_pages(gart_to_virt(curr->memory[0])); - else + else { agp_bridge->driver->agp_destroy_page( gart_to_virt(curr->memory[0])); + global_flush_tlb(); + } vfree(curr->memory); } kfree(curr); _