diff --git a/arch/Kconfig b/arch/Kconfig index 550dab2..830c16a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -106,3 +106,5 @@ config HAVE_CLK The calls support software clock gating and thus are a key power management tool on many systems. +config HAVE_DMA_API_DEBUG + bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bc2fbad..f2cb677 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -40,6 +40,7 @@ config X86 select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS select USER_STACKTRACE_SUPPORT + select HAVE_DMA_API_DEBUG config ARCH_DEFCONFIG string diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index e1983fa..443b315 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -168,8 +168,7 @@ config IOMMU_DEBUG config IOMMU_LEAK bool "IOMMU leak tracing" - depends on DEBUG_KERNEL - depends on IOMMU_DEBUG + depends on IOMMU_DEBUG && DMA_API_DEBUG help Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 132a134..2169dd9 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -7,6 +7,8 @@ */ #include +#include +#include #include #include #include @@ -93,9 +95,14 @@ dma_map_single(struct device *hwdev, void *ptr, size_t size, int direction) { struct dma_mapping_ops *ops = get_dma_ops(hwdev); + dma_addr_t addr; BUG_ON(!valid_dma_direction(direction)); - return ops->map_single(hwdev, virt_to_phys(ptr), size, direction); + addr = ops->map_single(hwdev, virt_to_phys(ptr), size, direction); + debug_dma_map_page(hwdev, virt_to_page(ptr), + (unsigned long)ptr & ~PAGE_MASK, size, + direction, addr, true); + return addr; } static inline void @@ -107,6 +114,7 @@ dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, BUG_ON(!valid_dma_direction(direction)); if (ops->unmap_single) ops->unmap_single(dev, addr, size, direction); + debug_dma_unmap_page(dev, addr, size, direction, true); } static inline int @@ -114,9 +122,13 @@ dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction) { struct dma_mapping_ops *ops = get_dma_ops(hwdev); + int ents; BUG_ON(!valid_dma_direction(direction)); - return ops->map_sg(hwdev, sg, nents, direction); + ents = ops->map_sg(hwdev, sg, nents, direction); + debug_dma_map_sg(hwdev, sg, nents, ents, direction); + + return ents; } static inline void @@ -126,6 +138,7 @@ dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, struct dma_mapping_ops *ops = get_dma_ops(hwdev); BUG_ON(!valid_dma_direction(direction)); + debug_dma_unmap_sg(hwdev, sg, nents, direction); if (ops->unmap_sg) ops->unmap_sg(hwdev, sg, nents, direction); } @@ -139,6 +152,7 @@ dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, BUG_ON(!valid_dma_direction(direction)); if (ops->sync_single_for_cpu) ops->sync_single_for_cpu(hwdev, dma_handle, size, direction); + debug_dma_sync_single_for_cpu(hwdev, dma_handle, size, direction); flush_write_buffers(); } @@ -151,6 +165,7 @@ dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, BUG_ON(!valid_dma_direction(direction)); if (ops->sync_single_for_device) ops->sync_single_for_device(hwdev, dma_handle, size, direction); + debug_dma_sync_single_for_device(hwdev, dma_handle, size, direction); flush_write_buffers(); } @@ -164,6 +179,8 @@ dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, if (ops->sync_single_range_for_cpu) ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, size, direction); + debug_dma_sync_single_range_for_cpu(hwdev, dma_handle, + offset, size, direction); flush_write_buffers(); } @@ -178,6 +195,8 @@ dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, if (ops->sync_single_range_for_device) ops->sync_single_range_for_device(hwdev, dma_handle, offset, size, direction); + debug_dma_sync_single_range_for_device(hwdev, dma_handle, + offset, size, direction); flush_write_buffers(); } @@ -190,6 +209,8 @@ dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(direction)); if (ops->sync_sg_for_cpu) ops->sync_sg_for_cpu(hwdev, sg, nelems, direction); + debug_dma_sync_sg_for_cpu(hwdev, sg, nelems, direction); + flush_write_buffers(); } @@ -202,6 +223,7 @@ dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(direction)); if (ops->sync_sg_for_device) ops->sync_sg_for_device(hwdev, sg, nelems, direction); + debug_dma_sync_sg_for_device(hwdev, sg, nelems, direction); flush_write_buffers(); } @@ -211,16 +233,21 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, int direction) { struct dma_mapping_ops *ops = get_dma_ops(dev); + dma_addr_t addr; BUG_ON(!valid_dma_direction(direction)); - return ops->map_single(dev, page_to_phys(page) + offset, + addr = ops->map_single(dev, page_to_phys(page) + offset, size, direction); + debug_dma_map_page(dev, page, offset, size, direction, addr, false); + + return addr; } static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction) { dma_unmap_single(dev, addr, size, direction); + debug_dma_unmap_page(dev, addr, size, direction, false); } static inline void @@ -285,8 +312,11 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (!ops->alloc_coherent) return NULL; - return ops->alloc_coherent(dev, size, dma_handle, - dma_alloc_coherent_gfp_flags(dev, gfp)); + memory = ops->alloc_coherent(dev, size, dma_handle, + dma_alloc_coherent_gfp_flags(dev, gfp)); + debug_dma_alloc_coherent(dev, size, *dma_handle, memory); + + return memory; } static inline void dma_free_coherent(struct device *dev, size_t size, @@ -299,6 +329,7 @@ static inline void dma_free_coherent(struct device *dev, size_t size, if (dma_release_from_coherent(dev, get_order(size), vaddr)) return; + debug_dma_free_coherent(dev, size, vaddr, bus); if (ops->free_coherent) ops->free_coherent(dev, size, vaddr, bus); } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index b254285..47a8aa4 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -44,6 +44,9 @@ struct device x86_dma_fallback_dev = { }; EXPORT_SYMBOL(x86_dma_fallback_dev); +/* Number of entries preallocated for DMA-API debugging */ +#define PREALLOC_DMA_DEBUG_ENTRIES 32768 + int dma_set_mask(struct device *dev, u64 mask) { if (!dev->dma_mask || !dma_supported(dev, mask)) @@ -265,6 +268,8 @@ EXPORT_SYMBOL(dma_supported); static int __init pci_iommu_init(void) { + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + calgary_iommu_init(); intel_iommu_init(); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index d5768b1..4497159 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -144,48 +144,21 @@ static void flush_gart(void) } #ifdef CONFIG_IOMMU_LEAK - -#define SET_LEAK(x) \ - do { \ - if (iommu_leak_tab) \ - iommu_leak_tab[x] = __builtin_return_address(0);\ - } while (0) - -#define CLEAR_LEAK(x) \ - do { \ - if (iommu_leak_tab) \ - iommu_leak_tab[x] = NULL; \ - } while (0) - /* Debugging aid for drivers that don't free their IOMMU tables */ -static void **iommu_leak_tab; static int leak_trace; static int iommu_leak_pages = 20; static void dump_leak(void) { - int i; static int dump; - if (dump || !iommu_leak_tab) + if (dump) return; dump = 1; - show_stack(NULL, NULL); - /* Very crude. dump some from the end of the table too */ - printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n", - iommu_leak_pages); - for (i = 0; i < iommu_leak_pages; i += 2) { - printk(KERN_DEBUG "%lu: ", iommu_pages-i); - printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], - 0); - printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); - } - printk(KERN_DEBUG "\n"); + show_stack(NULL, NULL); + debug_dma_dump_mappings(NULL); } -#else -# define SET_LEAK(x) -# define CLEAR_LEAK(x) #endif static void iommu_full(struct device *dev, size_t size, int dir) @@ -248,7 +221,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); - SET_LEAK(iommu_page + i); phys_mem += PAGE_SIZE; } return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); @@ -290,7 +262,6 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; - CLEAR_LEAK(iommu_page + i); } free_iommu(iommu_page, npages); } @@ -373,7 +344,6 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE); while (pages--) { iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); - SET_LEAK(iommu_page); addr += PAGE_SIZE; iommu_page++; } @@ -797,11 +767,12 @@ void __init gart_iommu_init(void) #ifdef CONFIG_IOMMU_LEAK if (leak_trace) { - iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, - get_order(iommu_pages*sizeof(void *))); - if (!iommu_leak_tab) + int ret; + + ret = dma_debug_resize_entries(iommu_pages); + if (ret) printk(KERN_DEBUG - "PCI-DMA: Cannot allocate leak trace area\n"); + "PCI-DMA: Cannot trace all the entries\n"); } #endif diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h new file mode 100644 index 0000000..171ad8a --- /dev/null +++ b/include/linux/dma-debug.h @@ -0,0 +1,181 @@ +/* + * Copyright (C) 2008 Advanced Micro Devices, Inc. + * + * Author: Joerg Roedel + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __DMA_DEBUG_H +#define __DMA_DEBUG_H + +#include + +struct device; +struct scatterlist; +struct bus_type; + +#ifdef CONFIG_DMA_API_DEBUG + +extern void dma_debug_add_bus(struct bus_type *bus); + +extern void dma_debug_init(u32 num_entries); + +extern int dma_debug_resize_entries(u32 num_entries); + +extern void debug_dma_map_page(struct device *dev, struct page *page, + size_t offset, size_t size, + int direction, dma_addr_t dma_addr, + bool map_single); + +extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, int direction, bool map_single); + +extern void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, int mapped_ents, int direction); + +extern void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, int dir); + +extern void debug_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t dma_addr, void *virt); + +extern void debug_dma_free_coherent(struct device *dev, size_t size, + void *virt, dma_addr_t addr); + +extern void debug_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, size_t size, + int direction); + +extern void debug_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, + size_t size, int direction); + +extern void debug_dma_sync_single_range_for_cpu(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, + int direction); + +extern void debug_dma_sync_single_range_for_device(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, int direction); + +extern void debug_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, + int nelems, int direction); + +extern void debug_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, + int nelems, int direction); + +extern void debug_dma_dump_mappings(struct device *dev); + +#else /* CONFIG_DMA_API_DEBUG */ + +static inline void dma_debug_add_bus(struct bus_type *bus) +{ +} + +static inline void dma_debug_init(u32 num_entries) +{ +} + +static inline int dma_debug_resize_entries(u32 num_entries) +{ + return 0; +} + +static inline void debug_dma_map_page(struct device *dev, struct page *page, + size_t offset, size_t size, + int direction, dma_addr_t dma_addr, + bool map_single) +{ +} + +static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, int direction, + bool map_single) +{ +} + +static inline void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, int mapped_ents, int direction) +{ +} + +static inline void debug_dma_unmap_sg(struct device *dev, + struct scatterlist *sglist, + int nelems, int dir) +{ +} + +static inline void debug_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t dma_addr, void *virt) +{ +} + +static inline void debug_dma_free_coherent(struct device *dev, size_t size, + void *virt, dma_addr_t addr) +{ +} + +static inline void debug_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, + size_t size, int direction) +{ +} + +static inline void debug_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, + size_t size, int direction) +{ +} + +static inline void debug_dma_sync_single_range_for_cpu(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, + int direction) +{ +} + +static inline void debug_dma_sync_single_range_for_device(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, + int direction) +{ +} + +static inline void debug_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, + int nelems, int direction) +{ +} + +static inline void debug_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, + int nelems, int direction) +{ +} + +static inline void debug_dma_dump_mappings(struct device *dev) +{ +} + +#endif /* CONFIG_DMA_API_DEBUG */ + +#endif /* __DMA_DEBUG_H */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1bcf9cd..d9cbada 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -902,6 +902,17 @@ config DYNAMIC_PRINTK_DEBUG debugging for all modules. This mode can be turned off via the above disable command. +config DMA_API_DEBUG + bool "Enable debugging of DMA-API usage" + depends on HAVE_DMA_API_DEBUG + help + Enable this option to debug the use of the DMA API by device drivers. + With this option you will be able to detect common bugs in device + drivers like double-freeing of DMA mappings or freeing mappings that + were never allocated. + This option causes a performance degredation. Use only if you want + to debug device drivers. If unsure, say N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/Makefile b/lib/Makefile index 32b0e64..50b48cf 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -84,6 +84,8 @@ obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o obj-$(CONFIG_DYNAMIC_PRINTK_DEBUG) += dynamic_printk.o +obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/dma-debug.c b/lib/dma-debug.c new file mode 100644 index 0000000..cdd205d --- /dev/null +++ b/lib/dma-debug.c @@ -0,0 +1,964 @@ +/* + * Copyright (C) 2008 Advanced Micro Devices, Inc. + * + * Author: Joerg Roedel + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define HASH_SIZE 1024ULL +#define HASH_FN_SHIFT 13 +#define HASH_FN_MASK (HASH_SIZE - 1) + +enum { + dma_debug_single, + dma_debug_page, + dma_debug_sg, + dma_debug_coherent, +}; + +#define DMA_DEBUG_STACKTRACE_ENTRIES 5 + +struct dma_debug_entry { + struct list_head list; + struct device *dev; + int type; + phys_addr_t paddr; + u64 dev_addr; + u64 size; + int direction; + int sg_call_ents; + int sg_mapped_ents; +#ifdef CONFIG_STACKTRACE + struct stack_trace stacktrace; + unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES]; +#endif +}; + +struct hash_bucket { + struct list_head list; + spinlock_t lock; +} ____cacheline_aligned_in_smp; + +/* Hash list to save the allocated dma addresses */ +static struct hash_bucket dma_entry_hash[HASH_SIZE]; +/* List of pre-allocated dma_debug_entry's */ +static LIST_HEAD(free_entries); +/* Lock for the list above */ +static DEFINE_SPINLOCK(free_entries_lock); + +/* Global disable flag - will be set in case of an error */ +static bool global_disable __read_mostly; + +/* Global error count */ +static u32 error_count; + +/* Global error show enable*/ +static u32 show_all_errors __read_mostly; +/* Number of errors to show */ +static u32 show_num_errors = 1; + +static u32 num_free_entries; +static u32 min_free_entries; +static u32 nr_total_entries; + +/* number of preallocated entries requested by kernel cmdline */ +static u32 req_entries; + +/* debugfs dentry's for the stuff above */ +static struct dentry *dma_debug_dent __read_mostly; +static struct dentry *global_disable_dent __read_mostly; +static struct dentry *error_count_dent __read_mostly; +static struct dentry *show_all_errors_dent __read_mostly; +static struct dentry *show_num_errors_dent __read_mostly; +static struct dentry *num_free_entries_dent __read_mostly; +static struct dentry *min_free_entries_dent __read_mostly; + +static const char *type2name[4] = { "single", "page", + "scather-gather", "coherent" }; + +static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", + "DMA_FROM_DEVICE", "DMA_NONE" }; + +/* + * The access to some variables in this macro is racy. We can't use atomic_t + * here because all these variables are exported to debugfs. Some of them even + * writeable. This is also the reason why a lock won't help much. But anyway, + * the races are no big deal. Here is why: + * + * error_count: the addition is racy, but the worst thing that can happen is + * that we don't count some errors + * show_num_errors: the subtraction is racy. Also no big deal because in + * worst case this will result in one warning more in the + * system log than the user configured. This variable is + * writeable via debugfs. + */ +static inline void dump_entry_trace(struct dma_debug_entry *entry) +{ +#ifdef CONFIG_STACKTRACE + if (entry) { + printk(KERN_WARNING "Mapped at:\n"); + print_stack_trace(&entry->stacktrace, 0); + } +#endif +} + +#define err_printk(dev, entry, format, arg...) do { \ + error_count += 1; \ + if (show_all_errors || show_num_errors > 0) { \ + WARN(1, "%s %s: " format, \ + dev_driver_string(dev), \ + dev_name(dev) , ## arg); \ + dump_entry_trace(entry); \ + } \ + if (!show_all_errors && show_num_errors > 0) \ + show_num_errors -= 1; \ + } while (0); + +/* + * Hash related functions + * + * Every DMA-API request is saved into a struct dma_debug_entry. To + * have quick access to these structs they are stored into a hash. + */ +static int hash_fn(struct dma_debug_entry *entry) +{ + /* + * Hash function is based on the dma address. + * We use bits 20-27 here as the index into the hash + */ + return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK; +} + +/* + * Request exclusive access to a hash bucket for a given dma_debug_entry. + */ +static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry, + unsigned long *flags) +{ + int idx = hash_fn(entry); + unsigned long __flags; + + spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags); + *flags = __flags; + return &dma_entry_hash[idx]; +} + +/* + * Give up exclusive access to the hash bucket + */ +static void put_hash_bucket(struct hash_bucket *bucket, + unsigned long *flags) +{ + unsigned long __flags = *flags; + + spin_unlock_irqrestore(&bucket->lock, __flags); +} + +/* + * Search a given entry in the hash bucket list + */ +static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket, + struct dma_debug_entry *ref) +{ + struct dma_debug_entry *entry; + + list_for_each_entry(entry, &bucket->list, list) { + if ((entry->dev_addr == ref->dev_addr) && + (entry->dev == ref->dev)) + return entry; + } + + return NULL; +} + +/* + * Add an entry to a hash bucket + */ +static void hash_bucket_add(struct hash_bucket *bucket, + struct dma_debug_entry *entry) +{ + list_add_tail(&entry->list, &bucket->list); +} + +/* + * Remove entry from a hash bucket list + */ +static void hash_bucket_del(struct dma_debug_entry *entry) +{ + list_del(&entry->list); +} + +/* + * Dump mapping entries for debugging purposes + */ +void debug_dma_dump_mappings(struct device *dev) +{ + int idx; + + for (idx = 0; idx < HASH_SIZE; idx++) { + struct hash_bucket *bucket = &dma_entry_hash[idx]; + struct dma_debug_entry *entry; + unsigned long flags; + + spin_lock_irqsave(&bucket->lock, flags); + + list_for_each_entry(entry, &bucket->list, list) { + if (!dev || dev == entry->dev) { + dev_info(entry->dev, + "%s idx %d P=%Lx D=%Lx L=%Lx %s\n", + type2name[entry->type], idx, + (unsigned long long)entry->paddr, + entry->dev_addr, entry->size, + dir2name[entry->direction]); + } + } + + spin_unlock_irqrestore(&bucket->lock, flags); + } +} +EXPORT_SYMBOL(debug_dma_dump_mappings); + +/* + * Wrapper function for adding an entry to the hash. + * This function takes care of locking itself. + */ +static void add_dma_entry(struct dma_debug_entry *entry) +{ + struct hash_bucket *bucket; + unsigned long flags; + + bucket = get_hash_bucket(entry, &flags); + hash_bucket_add(bucket, entry); + put_hash_bucket(bucket, &flags); +} + +static struct dma_debug_entry *__dma_entry_alloc(void) +{ + struct dma_debug_entry *entry; + + entry = list_entry(free_entries.next, struct dma_debug_entry, list); + list_del(&entry->list); + memset(entry, 0, sizeof(*entry)); + + num_free_entries -= 1; + if (num_free_entries < min_free_entries) + min_free_entries = num_free_entries; + + return entry; +} + +/* struct dma_entry allocator + * + * The next two functions implement the allocator for + * struct dma_debug_entries. + */ +static struct dma_debug_entry *dma_entry_alloc(void) +{ + struct dma_debug_entry *entry = NULL; + unsigned long flags; + + spin_lock_irqsave(&free_entries_lock, flags); + + if (list_empty(&free_entries)) { + printk(KERN_ERR "DMA-API: debugging out of memory " + "- disabling\n"); + global_disable = true; + goto out; + } + + entry = __dma_entry_alloc(); + +#ifdef CONFIG_STACKTRACE + entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; + entry->stacktrace.entries = entry->st_entries; + entry->stacktrace.skip = 2; + save_stack_trace(&entry->stacktrace); +#endif + +out: + spin_unlock_irqrestore(&free_entries_lock, flags); + + return entry; +} + +static void dma_entry_free(struct dma_debug_entry *entry) +{ + unsigned long flags; + + /* + * add to beginning of the list - this way the entries are + * more likely cache hot when they are reallocated. + */ + spin_lock_irqsave(&free_entries_lock, flags); + list_add(&entry->list, &free_entries); + num_free_entries += 1; + spin_unlock_irqrestore(&free_entries_lock, flags); +} + +int dma_debug_resize_entries(u32 num_entries) +{ + int i, delta, ret = 0; + unsigned long flags; + struct dma_debug_entry *entry; + LIST_HEAD(tmp); + + spin_lock_irqsave(&free_entries_lock, flags); + + if (nr_total_entries < num_entries) { + delta = num_entries - nr_total_entries; + + spin_unlock_irqrestore(&free_entries_lock, flags); + + for (i = 0; i < delta; i++) { + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + break; + + list_add_tail(&entry->list, &tmp); + } + + spin_lock_irqsave(&free_entries_lock, flags); + + list_splice(&tmp, &free_entries); + nr_total_entries += i; + num_free_entries += i; + } else { + delta = nr_total_entries - num_entries; + + for (i = 0; i < delta && !list_empty(&free_entries); i++) { + entry = __dma_entry_alloc(); + kfree(entry); + } + + nr_total_entries -= i; + } + + if (nr_total_entries != num_entries) + ret = 1; + + spin_unlock_irqrestore(&free_entries_lock, flags); + + return ret; +} +EXPORT_SYMBOL(dma_debug_resize_entries); + +/* + * DMA-API debugging init code + * + * The init code does two things: + * 1. Initialize core data structures + * 2. Preallocate a given number of dma_debug_entry structs + */ + +static int prealloc_memory(u32 num_entries) +{ + struct dma_debug_entry *entry, *next_entry; + int i; + + for (i = 0; i < num_entries; ++i) { + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto out_err; + + list_add_tail(&entry->list, &free_entries); + } + + num_free_entries = num_entries; + min_free_entries = num_entries; + + printk(KERN_INFO "DMA-API: preallocated %d debug entries\n", + num_entries); + + return 0; + +out_err: + + list_for_each_entry_safe(entry, next_entry, &free_entries, list) { + list_del(&entry->list); + kfree(entry); + } + + return -ENOMEM; +} + +static int dma_debug_fs_init(void) +{ + dma_debug_dent = debugfs_create_dir("dma-api", NULL); + if (!dma_debug_dent) { + printk(KERN_ERR "DMA-API: can not create debugfs directory\n"); + return -ENOMEM; + } + + global_disable_dent = debugfs_create_bool("disabled", 0444, + dma_debug_dent, + (u32 *)&global_disable); + if (!global_disable_dent) + goto out_err; + + error_count_dent = debugfs_create_u32("error_count", 0444, + dma_debug_dent, &error_count); + if (!error_count_dent) + goto out_err; + + show_all_errors_dent = debugfs_create_u32("all_errors", 0644, + dma_debug_dent, + &show_all_errors); + if (!show_all_errors_dent) + goto out_err; + + show_num_errors_dent = debugfs_create_u32("num_errors", 0644, + dma_debug_dent, + &show_num_errors); + if (!show_num_errors_dent) + goto out_err; + + num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444, + dma_debug_dent, + &num_free_entries); + if (!num_free_entries_dent) + goto out_err; + + min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444, + dma_debug_dent, + &min_free_entries); + if (!min_free_entries_dent) + goto out_err; + + return 0; + +out_err: + debugfs_remove_recursive(dma_debug_dent); + + return -ENOMEM; +} + +void dma_debug_add_bus(struct bus_type *bus) +{ + /* FIXME: register notifier */ +} + +/* + * Let the architectures decide how many entries should be preallocated. + */ +void dma_debug_init(u32 num_entries) +{ + int i; + + if (global_disable) + return; + + for (i = 0; i < HASH_SIZE; ++i) { + INIT_LIST_HEAD(&dma_entry_hash[i].list); + dma_entry_hash[i].lock = SPIN_LOCK_UNLOCKED; + } + + if (dma_debug_fs_init() != 0) { + printk(KERN_ERR "DMA-API: error creating debugfs entries " + "- disabling\n"); + global_disable = true; + + return; + } + + if (req_entries) + num_entries = req_entries; + + if (prealloc_memory(num_entries) != 0) { + printk(KERN_ERR "DMA-API: debugging out of memory error " + "- disabled\n"); + global_disable = true; + + return; + } + + nr_total_entries = num_free_entries; + + printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n"); +} + +static __init int dma_debug_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (strncmp(str, "off", 3) == 0) { + printk(KERN_INFO "DMA-API: debugging disabled on kernel " + "command line\n"); + global_disable = true; + } + + return 0; +} + +static __init int dma_debug_entries_cmdline(char *str) +{ + int res; + + if (!str) + return -EINVAL; + + res = get_option(&str, &req_entries); + + if (!res) + req_entries = 0; + + return 0; +} + +__setup("dma_debug=", dma_debug_cmdline); +__setup("dma_debug_entries=", dma_debug_entries_cmdline); + +static void check_unmap(struct dma_debug_entry *ref) +{ + struct dma_debug_entry *entry; + struct hash_bucket *bucket; + unsigned long flags; + + if (dma_mapping_error(ref->dev, ref->dev_addr)) { + err_printk(ref->dev, NULL, "DMA-API: device driver tries " + "to free an invalid DMA memory address\n"); + return; + } + + bucket = get_hash_bucket(ref, &flags); + entry = hash_bucket_find(bucket, ref); + + if (!entry) { + err_printk(ref->dev, NULL, "DMA-API: device driver tries " + "to free DMA memory it has not allocated " + "[device address=0x%016llx] [size=%llu bytes]\n", + ref->dev_addr, ref->size); + goto out; + } + + if (ref->size != entry->size) { + err_printk(ref->dev, entry, "DMA-API: device driver frees " + "DMA memory with different size " + "[device address=0x%016llx] [map size=%llu bytes] " + "[unmap size=%llu bytes]\n", + ref->dev_addr, entry->size, ref->size); + } + + if (ref->type != entry->type) { + err_printk(ref->dev, entry, "DMA-API: device driver frees " + "DMA memory with wrong function " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped as %s] [unmapped as %s]\n", + ref->dev_addr, ref->size, + type2name[entry->type], type2name[ref->type]); + } else if ((entry->type == dma_debug_coherent) && + (ref->paddr != entry->paddr)) { + err_printk(ref->dev, entry, "DMA-API: device driver frees " + "DMA memory with different CPU address " + "[device address=0x%016llx] [size=%llu bytes] " + "[cpu alloc address=%p] [cpu free address=%p]", + ref->dev_addr, ref->size, + (void *)entry->paddr, (void *)ref->paddr); + } + + if (ref->sg_call_ents && ref->type == dma_debug_sg && + ref->sg_call_ents != entry->sg_call_ents) { + err_printk(ref->dev, entry, "DMA-API: device driver frees " + "DMA sg list with different entry count " + "[map count=%d] [unmap count=%d]\n", + entry->sg_call_ents, ref->sg_call_ents); + } + + /* + * This may be no bug in reality - but most implementations of the + * DMA API don't handle this properly, so check for it here + */ + if (ref->direction != entry->direction) { + err_printk(ref->dev, entry, "DMA-API: device driver frees " + "DMA memory with different direction " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [unmapped with %s]\n", + ref->dev_addr, ref->size, + dir2name[entry->direction], + dir2name[ref->direction]); + } + + hash_bucket_del(entry); + dma_entry_free(entry); + +out: + put_hash_bucket(bucket, &flags); +} + +static void check_for_stack(struct device *dev, void *addr) +{ + if (object_is_on_stack(addr)) + err_printk(dev, NULL, "DMA-API: device driver maps memory from" + "stack [addr=%p]\n", addr); +} + +static inline bool overlap(void *addr, u64 size, void *start, void *end) +{ + void *addr2 = (char *)addr + size; + + return ((addr >= start && addr < end) || + (addr2 >= start && addr2 < end) || + ((addr < start) && (addr2 >= end))); +} + +static void check_for_illegal_area(struct device *dev, void *addr, u64 size) +{ + if (overlap(addr, size, _text, _etext) || + overlap(addr, size, __start_rodata, __end_rodata)) + err_printk(dev, NULL, "DMA-API: device driver maps " + "memory from kernel text or rodata " + "[addr=%p] [size=%llu]\n", addr, size); +} + +static void check_sync(struct device *dev, dma_addr_t addr, + u64 size, u64 offset, int direction, bool to_cpu) +{ + struct dma_debug_entry ref = { + .dev = dev, + .dev_addr = addr, + .size = size, + .direction = direction, + }; + struct dma_debug_entry *entry; + struct hash_bucket *bucket; + unsigned long flags; + + bucket = get_hash_bucket(&ref, &flags); + + entry = hash_bucket_find(bucket, &ref); + + if (!entry) { + err_printk(dev, NULL, "DMA-API: device driver tries " + "to sync DMA memory it has not allocated " + "[device address=0x%016llx] [size=%llu bytes]\n", + (unsigned long long)addr, size); + goto out; + } + + if ((offset + size) > entry->size) { + err_printk(dev, entry, "DMA-API: device driver syncs" + " DMA memory outside allocated range " + "[device address=0x%016llx] " + "[allocation size=%llu bytes] [sync offset=%llu] " + "[sync size=%llu]\n", entry->dev_addr, entry->size, + offset, size); + } + + if (direction != entry->direction) { + err_printk(dev, entry, "DMA-API: device driver syncs " + "DMA memory with different direction " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [synced with %s]\n", + (unsigned long long)addr, entry->size, + dir2name[entry->direction], + dir2name[direction]); + } + + if (entry->direction == DMA_BIDIRECTIONAL) + goto out; + + if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) && + !(direction == DMA_TO_DEVICE)) + err_printk(dev, entry, "DMA-API: device driver syncs " + "device read-only DMA memory for cpu " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [synced with %s]\n", + (unsigned long long)addr, entry->size, + dir2name[entry->direction], + dir2name[direction]); + + if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) && + !(direction == DMA_FROM_DEVICE)) + err_printk(dev, entry, "DMA-API: device driver syncs " + "device write-only DMA memory to device " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [synced with %s]\n", + (unsigned long long)addr, entry->size, + dir2name[entry->direction], + dir2name[direction]); + +out: + put_hash_bucket(bucket, &flags); + +} + +void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, + size_t size, int direction, dma_addr_t dma_addr, + bool map_single) +{ + struct dma_debug_entry *entry; + + if (unlikely(global_disable)) + return; + + if (unlikely(dma_mapping_error(dev, dma_addr))) + return; + + entry = dma_entry_alloc(); + if (!entry) + return; + + entry->dev = dev; + entry->type = dma_debug_page; + entry->paddr = page_to_phys(page) + offset; + entry->dev_addr = dma_addr; + entry->size = size; + entry->direction = direction; + + if (map_single) + entry->type = dma_debug_single; + + if (!PageHighMem(page)) { + void *addr = ((char *)page_address(page)) + offset; + check_for_stack(dev, addr); + check_for_illegal_area(dev, addr, size); + } + + add_dma_entry(entry); +} +EXPORT_SYMBOL(debug_dma_map_page); + +void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, int direction, bool map_single) +{ + struct dma_debug_entry ref = { + .type = dma_debug_page, + .dev = dev, + .dev_addr = addr, + .size = size, + .direction = direction, + }; + + if (unlikely(global_disable)) + return; + + if (map_single) + ref.type = dma_debug_single; + + check_unmap(&ref); +} +EXPORT_SYMBOL(debug_dma_unmap_page); + +void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, int mapped_ents, int direction) +{ + struct dma_debug_entry *entry; + struct scatterlist *s; + int i; + + if (unlikely(global_disable)) + return; + + for_each_sg(sg, s, mapped_ents, i) { + entry = dma_entry_alloc(); + if (!entry) + return; + + entry->type = dma_debug_sg; + entry->dev = dev; + entry->paddr = sg_phys(s); + entry->size = s->length; + entry->dev_addr = s->dma_address; + entry->direction = direction; + entry->sg_call_ents = nents; + entry->sg_mapped_ents = mapped_ents; + + if (!PageHighMem(sg_page(s))) { + check_for_stack(dev, sg_virt(s)); + check_for_illegal_area(dev, sg_virt(s), s->length); + } + + add_dma_entry(entry); + } +} +EXPORT_SYMBOL(debug_dma_map_sg); + +void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, int dir) +{ + struct dma_debug_entry *entry; + struct scatterlist *s; + int mapped_ents = 0, i; + unsigned long flags; + + if (unlikely(global_disable)) + return; + + for_each_sg(sglist, s, nelems, i) { + + struct dma_debug_entry ref = { + .type = dma_debug_sg, + .dev = dev, + .paddr = sg_phys(s), + .dev_addr = s->dma_address, + .size = s->length, + .direction = dir, + .sg_call_ents = 0, + }; + + if (mapped_ents && i >= mapped_ents) + break; + + if (mapped_ents == 0) { + struct hash_bucket *bucket; + ref.sg_call_ents = nelems; + bucket = get_hash_bucket(&ref, &flags); + entry = hash_bucket_find(bucket, &ref); + if (entry) + mapped_ents = entry->sg_mapped_ents; + put_hash_bucket(bucket, &flags); + } + + check_unmap(&ref); + } +} +EXPORT_SYMBOL(debug_dma_unmap_sg); + +void debug_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t dma_addr, void *virt) +{ + struct dma_debug_entry *entry; + + if (unlikely(global_disable)) + return; + + if (unlikely(virt == NULL)) + return; + + entry = dma_entry_alloc(); + if (!entry) + return; + + entry->type = dma_debug_coherent; + entry->dev = dev; + entry->paddr = virt_to_phys(virt); + entry->size = size; + entry->dev_addr = dma_addr; + entry->direction = DMA_BIDIRECTIONAL; + + add_dma_entry(entry); +} +EXPORT_SYMBOL(debug_dma_alloc_coherent); + +void debug_dma_free_coherent(struct device *dev, size_t size, + void *virt, dma_addr_t addr) +{ + struct dma_debug_entry ref = { + .type = dma_debug_coherent, + .dev = dev, + .paddr = virt_to_phys(virt), + .dev_addr = addr, + .size = size, + .direction = DMA_BIDIRECTIONAL, + }; + + if (unlikely(global_disable)) + return; + + check_unmap(&ref); +} +EXPORT_SYMBOL(debug_dma_free_coherent); + +void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, int direction) +{ + if (unlikely(global_disable)) + return; + + check_sync(dev, dma_handle, size, 0, direction, true); +} +EXPORT_SYMBOL(debug_dma_sync_single_for_cpu); + +void debug_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, + int direction) +{ + if (unlikely(global_disable)) + return; + + check_sync(dev, dma_handle, size, 0, direction, false); +} +EXPORT_SYMBOL(debug_dma_sync_single_for_device); + +void debug_dma_sync_single_range_for_cpu(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, size_t size, + int direction) +{ + if (unlikely(global_disable)) + return; + + check_sync(dev, dma_handle, size, offset, direction, true); +} +EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu); + +void debug_dma_sync_single_range_for_device(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, int direction) +{ + if (unlikely(global_disable)) + return; + + check_sync(dev, dma_handle, size, offset, direction, false); +} +EXPORT_SYMBOL(debug_dma_sync_single_range_for_device); + +void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, int direction) +{ + struct scatterlist *s; + int i; + + if (unlikely(global_disable)) + return; + + for_each_sg(sg, s, nelems, i) { + check_sync(dev, s->dma_address, s->dma_length, 0, + direction, true); + } +} +EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu); + +void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nelems, int direction) +{ + struct scatterlist *s; + int i; + + if (unlikely(global_disable)) + return; + + for_each_sg(sg, s, nelems, i) { + check_sync(dev, s->dma_address, s->dma_length, 0, + direction, false); + } +} +EXPORT_SYMBOL(debug_dma_sync_sg_for_device); +