GIT df80675d844271d7fd9f86b136fe5ce7b210eefd git+ssh://master.kernel.org/pub/scm/linux/kernel/git/jgarzik/misc-2.6.git#pciseg commit Author: Jeff Garzik Date: Sat Sep 30 23:41:07 2006 -0400 [x86] PCI domains: free memory on error Fixes memory leak on error. Signed-off-by: Jeff Garzik commit 3820f066a80fd641ea9df49a7d0ed4c443d15ea5 Author: Muli Ben-Yehuda Date: Sat Sep 30 20:51:48 2006 +0300 [PATCH] x86-64: Calgary IOMMU: update to work with PCI domains On Sat, Sep 30, 2006 at 07:41:50AM -0400, Jeff Garzik wrote: > Muli Ben-Yehuda wrote: > >The patch I posted earlier is all that's needed, if you could merge it > >into #pciseg that would be fine. I'm pondering making one small > >change though: in your pci domains patch, you have this snippet: > > Would you be kind enough to resend the patch with a proper Signed-off-by > line? (and subject/description, etc. while you're at it) This patch updates Calgary to work with Jeff's PCI domains code by moving each bus's pointer to its struct iommu_table to struct pci_sysdata, rather than stashing it in ->sysdata directly. Signed-off-by: Muli Ben-Yehuda Signed-off-by: Jon Mason Signed-off-by: Jeff Garzik commit 37d8fe438d9922cca166c30fff42b517db2ddbf2 Author: Jeff Garzik Date: Wed Sep 27 03:48:43 2006 -0400 [PCI] add PCI domain support to x86-64 Kconfig Signed-off-by: Jeff Garzik commit 67a49a975cd9035c8929bf4f0d9d1d13cff7faf3 Author: Jeff Garzik Date: Fri Dec 2 20:30:48 2005 -0500 [x86, PCI] add PCI domain support * Store PCI domain in struct pci_sysdata * Add magic inlines to pass PCI domain to read/write config hooks * Support ACPI's notion of PCI domains (PCI segments) commit 0a247a58fc3e2ecfc17654301033e8b8d08df2a2 Author: Jeff Garzik Date: Fri Dec 2 20:12:52 2005 -0500 [x86, PCI] Switch pci_bus::sysdata from NUMA node integer to a pointer On x86[-64], struct pci_bus::sysdata is only used on NUMA platforms, to store the associated NUMA node. Preparing for the future when we'll want to do other things with sysdata, struct pci_sysdata was created. An allocated structure replaces the cast-pointer-to-int NUMA usage. Updated all NUMA users. commit 9a74cdc338a6e1cc9b2263be4044cba92cb10aaf Author: Jeff Garzik Date: Fri Dec 2 19:20:52 2005 -0500 [x86, PCI] pass PCI domain number to PCI config read/write hooks Don't hardcode zero, since modern x86 (with special ACPI sauce) can support multiple "PCI segments", aka PCI domains. arch/i386/pci/acpi.c | 27 +++++++++++++++++++++++---- arch/i386/pci/common.c | 19 ++++++++++++++++--- arch/x86_64/Kconfig | 4 ++++ arch/x86_64/kernel/pci-calgary.c | 36 ++++++++++++++++++------------------ arch/x86_64/kernel/tce.c | 12 ++++-------- arch/x86_64/pci/k8-bus.c | 6 +++++- include/asm-i386/pci.h | 19 +++++++++++++++++++ include/asm-i386/topology.h | 2 +- include/asm-x86_64/pci.h | 33 +++++++++++++++++++++++++++++++++ include/asm-x86_64/topology.h | 2 +- 10 files changed, 124 insertions(+), 36 deletions(-) diff --git a/arch/i386/pci/acpi.c b/arch/i386/pci/acpi.c index b33aea8..7060b0d 100644 --- a/arch/i386/pci/acpi.c +++ b/arch/i386/pci/acpi.c @@ -8,20 +8,39 @@ #include "pci.h" struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum) { struct pci_bus *bus; + struct pci_sysdata *sd; + /* Allocate per-root-bus (not per bus) arch-specific data. + * TODO: leak; this memory is never freed. + * It's arguable whether it's worth the trouble to care. + */ + sd = kzalloc(sizeof(*sd), GFP_KERNEL); + if (!sd) { + printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum); + return NULL; + } + +#ifdef CONFIG_PCI_DOMAINS + sd->domain = domain; +#else if (domain != 0) { printk(KERN_WARNING "PCI: Multiple domains not supported\n"); + kfree(sd); return NULL; } +#endif /* CONFIG_PCI_DOMAINS */ + + bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); + if (!bus) + kfree(sd); - bus = pcibios_scan_root(busnum); #ifdef CONFIG_ACPI_NUMA if (bus != NULL) { int pxm = acpi_get_pxm(device->handle); if (pxm >= 0) { - bus->sysdata = (void *)(unsigned long)pxm_to_node(pxm); - printk("bus %d -> pxm %d -> node %ld\n", - busnum, pxm, (long)(bus->sysdata)); + sd->node = pxm_to_node(pxm); + printk("bus %d -> pxm %d -> node %d\n", + busnum, pxm, sd->node); } } #endif diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c index 6d5ace8..aa0ac44 100644 --- a/arch/i386/pci/common.c +++ b/arch/i386/pci/common.c @@ -29,12 +29,14 @@ struct pci_raw_ops *raw_pci_ops; static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) { - return raw_pci_ops->read(0, bus->number, devfn, where, size, value); + return raw_pci_ops->read(pci_domain_nr(bus), bus->number, + devfn, where, size, value); } static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value) { - return raw_pci_ops->write(0, bus->number, devfn, where, size, value); + return raw_pci_ops->write(pci_domain_nr(bus), bus->number, + devfn, where, size, value); } struct pci_ops pci_root_ops = { @@ -197,6 +199,7 @@ #endif /* __i386__ */ struct pci_bus * __devinit pcibios_scan_root(int busnum) { struct pci_bus *bus = NULL; + struct pci_sysdata *sd; dmi_check_system(pciprobe_dmi_table); @@ -207,9 +210,19 @@ struct pci_bus * __devinit pcibios_scan_ } } + /* Allocate per-root-bus (not per bus) arch-specific data. + * TODO: leak; this memory is never freed. + * It's arguable whether it's worth the trouble to care. + */ + sd = kzalloc(sizeof(*sd), GFP_KERNEL); + if (!sd) { + printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum); + return NULL; + } + printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); - return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, NULL); + return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); } extern u8 pci_cache_line_size; diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 010d226..894f63c 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -635,6 +635,10 @@ config PCI_MMCONFIG bool "Support mmconfig PCI config space access" depends on PCI && ACPI +config PCI_DOMAINS + bool "PCI domain support" + depends on PCI + source "drivers/pci/pcie/Kconfig" source "drivers/pci/Kconfig" diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index b3296cc..9cb7927 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -316,7 +316,7 @@ void calgary_unmap_sg(struct device *dev int nelems, int direction) { unsigned long flags; - struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata; + struct iommu_table *tbl = pci_iommu(to_pci_dev(dev)->bus); if (!translate_phb(to_pci_dev(dev))) return; @@ -345,7 +345,7 @@ static int calgary_nontranslate_map_sg(s int calgary_map_sg(struct device *dev, struct scatterlist *sg, int nelems, int direction) { - struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata; + struct iommu_table *tbl = pci_iommu(to_pci_dev(dev)->bus); unsigned long flags; unsigned long vaddr; unsigned int npages; @@ -399,7 +399,7 @@ dma_addr_t calgary_map_single(struct dev dma_addr_t dma_handle = bad_dma_address; unsigned long uaddr; unsigned int npages; - struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata; + struct iommu_table *tbl = pci_iommu(to_pci_dev(dev)->bus); uaddr = (unsigned long)vaddr; npages = num_dma_pages(uaddr, size); @@ -415,7 +415,7 @@ dma_addr_t calgary_map_single(struct dev void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, size_t size, int direction) { - struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata; + struct iommu_table *tbl = pci_iommu(to_pci_dev(dev)->bus); unsigned int npages; if (!translate_phb(to_pci_dev(dev))) @@ -433,7 +433,7 @@ void* calgary_alloc_coherent(struct devi unsigned int npages, order; struct iommu_table *tbl; - tbl = to_pci_dev(dev)->bus->self->sysdata; + tbl = pci_iommu(to_pci_dev(dev)->bus); size = PAGE_ALIGN(size); /* size rounded up to full pages */ npages = size >> PAGE_SHIFT; @@ -550,7 +550,7 @@ static void __init calgary_reserve_mem_r limit++; numpages = ((limit - start) >> PAGE_SHIFT); - iommu_range_reserve(dev->sysdata, start, numpages); + iommu_range_reserve(pci_iommu(dev->bus), start, numpages); } static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev) @@ -558,7 +558,7 @@ static void __init calgary_reserve_perip void __iomem *target; u64 low, high, sizelow; u64 start, limit; - struct iommu_table *tbl = dev->sysdata; + struct iommu_table *tbl = pci_iommu(dev->bus); unsigned char busnum = dev->bus->number; void __iomem *bbar = tbl->bbar; @@ -582,7 +582,7 @@ static void __init calgary_reserve_perip u32 val32; u64 low, high, sizelow, sizehigh; u64 start, limit; - struct iommu_table *tbl = dev->sysdata; + struct iommu_table *tbl = pci_iommu(dev->bus); unsigned char busnum = dev->bus->number; void __iomem *bbar = tbl->bbar; @@ -620,7 +620,7 @@ static void __init calgary_reserve_regio void __iomem *bbar; unsigned char busnum; u64 start; - struct iommu_table *tbl = dev->sysdata; + struct iommu_table *tbl = pci_iommu(dev->bus); bbar = tbl->bbar; busnum = dev->bus->number; @@ -651,7 +651,7 @@ static int __init calgary_setup_tar(stru if (ret) return ret; - tbl = dev->sysdata; + tbl = pci_iommu(dev->bus); tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; tce_free(tbl, 0, tbl->it_size); @@ -664,7 +664,7 @@ static int __init calgary_setup_tar(stru /* zero out all TAR bits under sw control */ val64 &= ~TAR_SW_BITS; - tbl = dev->sysdata; + tbl = pci_iommu(dev->bus); table_phys = (u64)__pa(tbl->it_base); val64 |= table_phys; @@ -681,7 +681,7 @@ static int __init calgary_setup_tar(stru static void __init calgary_free_bus(struct pci_dev *dev) { u64 val64; - struct iommu_table *tbl = dev->sysdata; + struct iommu_table *tbl = pci_iommu(dev->bus); void __iomem *target; unsigned int bitmapsz; @@ -696,7 +696,8 @@ static void __init calgary_free_bus(stru tbl->it_map = NULL; kfree(tbl); - dev->sysdata = NULL; + + set_pci_iommu(dev->bus, NULL); /* Can't free bootmem allocated memory after system is up :-( */ bus_info[dev->bus->number].tce_space = NULL; @@ -705,7 +706,7 @@ static void __init calgary_free_bus(stru static void calgary_watchdog(unsigned long data) { struct pci_dev *dev = (struct pci_dev *)data; - struct iommu_table *tbl = dev->sysdata; + struct iommu_table *tbl = pci_iommu(dev->bus); void __iomem *bbar = tbl->bbar; u32 val32; void __iomem *target; @@ -741,7 +742,7 @@ static void __init calgary_enable_transl struct iommu_table *tbl; busnum = dev->bus->number; - tbl = dev->sysdata; + tbl = pci_iommu(dev->bus); bbar = tbl->bbar; /* enable TCE in PHB Config Register */ @@ -771,7 +772,7 @@ static void __init calgary_disable_trans struct iommu_table *tbl; busnum = dev->bus->number; - tbl = dev->sysdata; + tbl = pci_iommu(dev->bus); bbar = tbl->bbar; /* disable TCE in PHB Config Register */ @@ -816,8 +817,7 @@ static inline unsigned int __init locate static void __init calgary_init_one_nontraslated(struct pci_dev *dev) { pci_dev_get(dev); - dev->sysdata = NULL; - dev->bus->self = dev; + set_pci_iommu(dev->bus, NULL); } static int __init calgary_init_one(struct pci_dev *dev) diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c index f61fb8e..3aeae2f 100644 --- a/arch/x86_64/kernel/tce.c +++ b/arch/x86_64/kernel/tce.c @@ -136,9 +136,9 @@ int build_tce_table(struct pci_dev *dev, struct iommu_table *tbl; int ret; - if (dev->sysdata) { - printk(KERN_ERR "Calgary: dev %p has sysdata %p\n", - dev, dev->sysdata); + if (pci_iommu(dev->bus)) { + printk(KERN_ERR "Calgary: dev %p has sysdata->iommu %p\n", + dev, pci_iommu(dev->bus)); BUG(); } @@ -155,11 +155,7 @@ int build_tce_table(struct pci_dev *dev, tbl->bbar = bbar; - /* - * NUMA is already using the bus's sysdata pointer, so we use - * the bus's pci_dev's sysdata instead. - */ - dev->sysdata = tbl; + set_pci_iommu(dev->bus, tbl); return 0; diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c index 3acf60d..9cc813e 100644 --- a/arch/x86_64/pci/k8-bus.c +++ b/arch/x86_64/pci/k8-bus.c @@ -59,6 +59,8 @@ fill_mp_bus_to_cpumask(void) j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus); j++) { struct pci_bus *bus; + struct pci_sysdata *sd; + long node = NODE_ID(nid); /* Algorithm a bit dumb, but it shouldn't matter here */ @@ -67,7 +69,9 @@ fill_mp_bus_to_cpumask(void) continue; if (!node_online(node)) node = 0; - bus->sysdata = (void *)node; + + sd = bus->sysdata; + sd->node = node; } } } diff --git a/include/asm-i386/pci.h b/include/asm-i386/pci.h index 64b6d0b..2c8b5e9 100644 --- a/include/asm-i386/pci.h +++ b/include/asm-i386/pci.h @@ -3,6 +3,25 @@ #define __i386_PCI_H #ifdef __KERNEL__ + +struct pci_sysdata { + int domain; /* PCI domain */ + int node; /* NUMA node */ +}; + +#ifdef CONFIG_PCI_DOMAINS +static inline int pci_domain_nr(struct pci_bus *bus) +{ + struct pci_sysdata *sd = bus->sysdata; + return sd->domain; +} + +static inline int pci_proc_domain(struct pci_bus *bus) +{ + return pci_domain_nr(bus); +} +#endif /* CONFIG_PCI_DOMAINS */ + #include /* for struct page */ /* Can be used to override the logic in pci_scan_bus for skipping diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h index 978d095..d49cd66 100644 --- a/include/asm-i386/topology.h +++ b/include/asm-i386/topology.h @@ -67,7 +67,7 @@ static inline int node_to_first_cpu(int return first_cpu(mask); } -#define pcibus_to_node(bus) ((long) (bus)->sysdata) +#define pcibus_to_node(bus) ((struct pci_sysdata *)((bus)->sysdata))->node #define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus)) /* sched_domains SD_NODE_INIT for NUMAQ machines */ diff --git a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h index 49c5e92..550207f 100644 --- a/include/asm-x86_64/pci.h +++ b/include/asm-x86_64/pci.h @@ -5,6 +5,39 @@ #include #ifdef __KERNEL__ +struct pci_sysdata { + int domain; /* PCI domain */ + int node; /* NUMA node */ + void* iommu; /* IOMMU private data */ +}; + +#ifdef CONFIG_PCI_DOMAINS +static inline int pci_domain_nr(struct pci_bus *bus) +{ + struct pci_sysdata *sd = bus->sysdata; + return sd->domain; +} + +static inline int pci_proc_domain(struct pci_bus *bus) +{ + return pci_domain_nr(bus); +} +#endif /* CONFIG_PCI_DOMAINS */ + +#ifdef CONFIG_CALGARY_IOMMU +static inline void* pci_iommu(struct pci_bus *bus) +{ + struct pci_sysdata *sd = bus->sysdata; + return sd->iommu; +} + +static inline void set_pci_iommu(struct pci_bus *bus, void *val) +{ + struct pci_sysdata *sd = bus->sysdata; + sd->iommu = val; +} +#endif /* CONFIG_CALGARY_IOMMU */ + #include /* for struct page */ /* Can be used to override the logic in pci_scan_bus for skipping diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index 5c8f492..5065042 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h @@ -22,7 +22,7 @@ #define cpu_to_node(cpu) (cpu_to_node[c #define parent_node(node) (node) #define node_to_first_cpu(node) (first_cpu(node_to_cpumask[node])) #define node_to_cpumask(node) (node_to_cpumask[node]) -#define pcibus_to_node(bus) ((long)(bus->sysdata)) +#define pcibus_to_node(bus) ((struct pci_sysdata *)((bus)->sysdata))->node #define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus)); #define numa_node_id() read_pda(nodenumber)