From: Matthew Wilcox , "Durairaj, Sundarapandian" Here's a rewrite of Sundarapandian Durairaj's patch for accessing extended PCI configuration space. Changes of note: - Forward-ported to 2.6.2-rc2 - Renamed most of the 'Express' to 'MMCONFIG' since that is what we're actually doing (and it would seem to be the same for PCI-X 2.0) - Separate out the mmconfig accesses into its own file rather than lumping them in with direct. Inline the bits from include/asm-i386/pci.h. - Request the memory region we're going to use for MMCONFIG accesses. - Remove the EXPERIMENTAL tag. - Add support in sysfs for the extended config space. - Use i_size in proc_bus_pci_lseek(). - Move cfg_size to where it will pack better in pci_dev. --- arch/i386/Kconfig | 22 +++++--- arch/i386/kernel/acpi/boot.c | 34 ++++++++++++ arch/i386/pci/Makefile | 1 arch/i386/pci/common.c | 9 ++- arch/i386/pci/mmconfig.c | 115 +++++++++++++++++++++++++++++++++++++++++++ arch/i386/pci/pci.h | 3 + drivers/acpi/tables.c | 1 drivers/pci/pci-sysfs.c | 24 +++++++- drivers/pci/pci.c | 2 drivers/pci/probe.c | 17 ++++++ drivers/pci/proc.c | 26 ++++----- include/asm-i386/fixmap.h | 3 + include/linux/acpi.h | 12 ++++ include/linux/pci.h | 2 14 files changed, 246 insertions(+), 25 deletions(-) diff -puN arch/i386/Kconfig~pcix-enhanced arch/i386/Kconfig --- 25/arch/i386/Kconfig~pcix-enhanced 2004-01-29 08:56:58.000000000 -0800 +++ 25-akpm/arch/i386/Kconfig 2004-01-29 08:56:58.000000000 -0800 @@ -1052,12 +1052,16 @@ config PCI_GOBIOS PCI-based systems don't have any BIOS at all. Linux can also try to detect the PCI hardware directly without using the BIOS. - With this option, you can specify how Linux should detect the PCI - devices. If you choose "BIOS", the BIOS will be used, if you choose - "Direct", the BIOS won't be used, and if you choose "Any", the - kernel will try the direct access method and falls back to the BIOS - if that doesn't work. If unsure, go with the default, which is - "Any". + With this option, you can specify how Linux should detect the + PCI devices. If you choose "BIOS", the BIOS will be used, + if you choose "Direct", the BIOS won't be used, and if you + choose "MMConfig", then PCI Express MMCONFIG will be used. + If you choose "Any", the kernel will try MMCONFIG, then the + direct access method and falls back to the BIOS if that doesn't + work. If unsure, go with the default, which is "Any". + +config PCI_GOMMCONFIG + bool "MMConfig" config PCI_GODIRECT bool "Direct" @@ -1077,6 +1081,12 @@ config PCI_DIRECT depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS) default y +config PCI_MMCONFIG + bool + depends on PCI && (PCI_GOMMCONFIG || PCI_GOANY) + select ACPI_BOOT + default y + config PCI_USE_VECTOR bool "Vector-based interrupt indexing" depends on X86_LOCAL_APIC && X86_IO_APIC diff -puN arch/i386/kernel/acpi/boot.c~pcix-enhanced arch/i386/kernel/acpi/boot.c --- 25/arch/i386/kernel/acpi/boot.c~pcix-enhanced 2004-01-29 08:56:58.000000000 -0800 +++ 25-akpm/arch/i386/kernel/acpi/boot.c 2004-01-29 08:56:58.000000000 -0800 @@ -95,6 +95,27 @@ char *__acpi_map_table(unsigned long phy } +#ifdef CONFIG_PCI_MMCONFIG +static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size) +{ + struct acpi_table_mcfg *mcfg; + + if (!phys_addr || !size) + return -EINVAL; + + mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size); + if (!mcfg) { + printk(KERN_WARNING PREFIX "Unable to map MCFG\n"); + return -ENODEV; + } + + if (mcfg->base_address) + pci_mmcfg_base_addr = mcfg->base_address; + + return 0; +} +#endif /* CONFIG_PCI_MMCONFIG */ + #ifdef CONFIG_X86_LOCAL_APIC static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; @@ -599,6 +620,19 @@ acpi_boot_init (void) #endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */ +#ifdef CONFIG_PCI_MMCONFIG + result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg); + if (!result) { + printk(KERN_WARNING PREFIX "MCFG not present\n"); + return 0; + } else if (result < 0) { + printk(KERN_ERR PREFIX "Error parsing MCFG\n"); + return result; + } else if (result > 1) { + printk(KERN_WARNING PREFIX "Multiple MCFG tables exist\n"); + } +#endif /* CONFIG_PCI_MMCONFIG */ + #ifdef CONFIG_X86_LOCAL_APIC if (acpi_lapic && acpi_ioapic) { smp_found_config = 1; diff -puN arch/i386/pci/common.c~pcix-enhanced arch/i386/pci/common.c --- 25/arch/i386/pci/common.c~pcix-enhanced 2004-01-29 08:56:58.000000000 -0800 +++ 25-akpm/arch/i386/pci/common.c 2004-01-29 08:56:58.000000000 -0800 @@ -20,7 +20,8 @@ extern void pcibios_sort(void); #endif -unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2; +unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | + PCI_PROBE_MMCONF; int pcibios_last_bus = -1; struct pci_bus *pci_root_bus = NULL; @@ -198,6 +199,12 @@ char * __devinit pcibios_setup(char *st return NULL; } #endif +#ifdef CONFIG_PCI_MMCONFIG + else if (!strcmp(str, "nommconf")) { + pci_probe &= ~PCI_PROBE_MMCONF; + return NULL; + } +#endif else if (!strcmp(str, "noacpi")) { acpi_noirq_set(); return NULL; diff -puN arch/i386/pci/Makefile~pcix-enhanced arch/i386/pci/Makefile --- 25/arch/i386/pci/Makefile~pcix-enhanced 2004-01-29 08:56:58.000000000 -0800 +++ 25-akpm/arch/i386/pci/Makefile 2004-01-29 08:56:58.000000000 -0800 @@ -1,6 +1,7 @@ obj-y := i386.o obj-$(CONFIG_PCI_BIOS) += pcbios.o +obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o obj-$(CONFIG_PCI_DIRECT) += direct.o pci-y := fixup.o diff -puN /dev/null arch/i386/pci/mmconfig.c --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/arch/i386/pci/mmconfig.c 2004-01-29 08:56:58.000000000 -0800 @@ -0,0 +1,115 @@ +/* + * mmconfig.c - Low-level direct PCI config space access via MMCONFIG + */ + +#include +#include +#include "pci.h" + +/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ +u32 pci_mmcfg_base_addr; + +#define mmcfg_virt_addr (fix_to_virt(FIX_PCIE_MCFG)) + +/* The base address of the last MMCONFIG device accessed */ +static u32 mmcfg_last_accessed_device; + +/* + * Functions for accessing PCI configuration space with MMCONFIG accesses + */ + +static inline void pci_exp_set_dev_base(int bus, int devfn) +{ + u32 dev_base = pci_mmcfg_base_addr | (bus << 20) | (devfn << 12); + if (dev_base != mmcfg_last_accessed_device) { + mmcfg_last_accessed_device = dev_base; + set_fixmap(FIX_PCIE_MCFG, dev_base); + } +} + +static int pci_mmcfg_read(int seg, int bus, int devfn, int reg, int len, u32 *value) +{ + unsigned long flags; + + if (!value || (bus > 255) || (devfn > 255) || (reg > 4095)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + pci_exp_set_dev_base(bus, devfn); + + switch (len) { + case 1: + *value = readb(mmcfg_virt_addr + reg); + break; + case 2: + *value = readw(mmcfg_virt_addr + reg); + break; + case 4: + *value = readl(mmcfg_virt_addr + reg); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static int pci_mmcfg_write(int seg, int bus, int devfn, int reg, int len, u32 value) +{ + unsigned long flags; + + if ((bus > 255) || (devfn > 255) || (reg > 4095)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + pci_exp_set_dev_base(bus, devfn); + + switch (len) { + case 1: + writeb(value, mmcfg_virt_addr + reg); + break; + case 2: + writew(value, mmcfg_virt_addr + reg); + break; + case 4: + writel(value, mmcfg_virt_addr + reg); + break; + } + + /* Dummy read to flush PCI write */ + readl(mmcfg_virt_addr); + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static struct pci_raw_ops pci_mmcfg = { + .read = pci_mmcfg_read, + .write = pci_mmcfg_write, +}; + +static int __init pci_mmcfg_init(void) +{ + struct resource *region; + + if ((pci_probe & PCI_PROBE_MMCONF) == 0) + goto out; + if (!pci_mmcfg_base_addr) + goto out; + region = request_mem_region(pci_mmcfg_base_addr, 256 * 1024 * 1024, + "PCI MMCONFIG"); + if (!region) + goto out; + + printk(KERN_INFO "PCI: Using MMCONFIG\n"); + raw_pci_ops = &pci_mmcfg; + pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; + + out: + return 0; +} + +arch_initcall(pci_mmcfg_init); diff -puN arch/i386/pci/pci.h~pcix-enhanced arch/i386/pci/pci.h --- 25/arch/i386/pci/pci.h~pcix-enhanced 2004-01-29 08:56:58.000000000 -0800 +++ 25-akpm/arch/i386/pci/pci.h 2004-01-29 08:56:58.000000000 -0800 @@ -15,6 +15,9 @@ #define PCI_PROBE_BIOS 0x0001 #define PCI_PROBE_CONF1 0x0002 #define PCI_PROBE_CONF2 0x0004 +#define PCI_PROBE_MMCONF 0x0008 +#define PCI_PROBE_MASK 0x000f + #define PCI_NO_SORT 0x0100 #define PCI_BIOS_SORT 0x0200 #define PCI_NO_CHECKS 0x0400 diff -puN drivers/acpi/tables.c~pcix-enhanced drivers/acpi/tables.c --- 25/drivers/acpi/tables.c~pcix-enhanced 2004-01-29 08:56:58.000000000 -0800 +++ 25-akpm/drivers/acpi/tables.c 2004-01-29 08:56:58.000000000 -0800 @@ -58,6 +58,7 @@ static char *acpi_table_signatures[ACPI_ [ACPI_SSDT] = "SSDT", [ACPI_SPMI] = "SPMI", [ACPI_HPET] = "HPET", + [ACPI_MCFG] = "MCFG", }; static char *mps_inti_flags_polarity[] = { "dfl", "high", "res", "low" }; diff -puN drivers/pci/pci.c~pcix-enhanced drivers/pci/pci.c --- 25/drivers/pci/pci.c~pcix-enhanced 2004-01-29 08:56:58.000000000 -0800 +++ 25-akpm/drivers/pci/pci.c 2004-01-29 08:56:58.000000000 -0800 @@ -90,6 +90,8 @@ pci_max_busnr(void) * %PCI_CAP_ID_CHSWP CompactPCI HotSwap * * %PCI_CAP_ID_PCIX PCI-X + * + * %PCI_CAP_ID_EXP PCI Express */ int pci_find_capability(struct pci_dev *dev, int cap) diff -puN drivers/pci/pci-sysfs.c~pcix-enhanced drivers/pci/pci-sysfs.c --- 25/drivers/pci/pci-sysfs.c~pcix-enhanced 2004-01-29 08:56:58.000000000 -0800 +++ 25-akpm/drivers/pci/pci-sysfs.c 2004-01-29 08:56:58.000000000 -0800 @@ -71,7 +71,7 @@ pci_read_config(struct kobject *kobj, ch /* Several chips lock up trying to read undefined config space */ if (capable(CAP_SYS_ADMIN)) { - size = 256; + size = dev->cfg_size; } else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) { size = 128; } @@ -123,10 +123,10 @@ pci_write_config(struct kobject *kobj, c unsigned int size = count; loff_t init_off = off; - if (off > 256) + if (off > dev->cfg_size) return 0; - if (off + count > 256) { - size = 256 - off; + if (off + count > dev->cfg_size) { + size = dev->cfg_size - off; count = size; } @@ -166,6 +166,16 @@ static struct bin_attribute pci_config_a .write = pci_write_config, }; +static struct bin_attribute pcie_config_attr = { + .attr = { + .name = "config", + .mode = S_IRUGO | S_IWUSR, + }, + .size = 4096, + .read = pci_read_config, + .write = pci_write_config, +}; + void pci_create_sysfs_dev_files (struct pci_dev *pdev) { struct device *dev = &pdev->dev; @@ -178,5 +188,9 @@ void pci_create_sysfs_dev_files (struct device_create_file (dev, &dev_attr_class); device_create_file (dev, &dev_attr_irq); device_create_file (dev, &dev_attr_resource); - sysfs_create_bin_file(&dev->kobj, &pci_config_attr); + if (pdev->cfg_size < 4096) { + sysfs_create_bin_file(&dev->kobj, &pci_config_attr); + } else { + sysfs_create_bin_file(&dev->kobj, &pcie_config_attr); + } } diff -puN drivers/pci/probe.c~pcix-enhanced drivers/pci/probe.c --- 25/drivers/pci/probe.c~pcix-enhanced 2004-01-29 08:56:58.000000000 -0800 +++ 25-akpm/drivers/pci/probe.c 2004-01-29 08:56:58.000000000 -0800 @@ -17,6 +17,8 @@ #define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ #define CARDBUS_RESERVE_BUSNR 3 +#define PCI_CFG_SPACE_SIZE 256 +#define PCI_CFG_SPACE_EXP_SIZE 4096 /* Ugh. Need to stop exporting this to modules. */ LIST_HEAD(pci_root_buses); @@ -479,6 +481,20 @@ static void pci_release_dev(struct devic kfree(pci_dev); } +/** + * pci_cfg_space_size - get the configuration space size of the PCI device + */ +static int pci_cfg_space_size(struct pci_dev *dev) +{ +#ifdef CONFIG_PCI_MMCONFIG + /* Find whether the device is PCI Express */ + int is_pci_express_dev = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (is_pci_express_dev) + return PCI_CFG_SPACE_EXP_SIZE; +#endif + return PCI_CFG_SPACE_SIZE; +} + /* * Read the config data for a PCI device, sanity-check it * and fill in the dev structure... @@ -515,6 +531,7 @@ pci_scan_device(struct pci_bus *bus, int dev->multifunction = !!(hdr_type & 0x80); dev->vendor = l & 0xffff; dev->device = (l >> 16) & 0xffff; + dev->cfg_size = pci_cfg_space_size(dev); /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) set this higher, assuming the system even supports it. */ diff -puN drivers/pci/proc.c~pcix-enhanced drivers/pci/proc.c --- 25/drivers/pci/proc.c~pcix-enhanced 2004-01-29 08:56:58.000000000 -0800 +++ 25-akpm/drivers/pci/proc.c 2004-01-29 08:56:58.000000000 -0800 @@ -16,16 +16,15 @@ #include #include -#define PCI_CFG_SPACE_SIZE 256 - static int proc_initialized; /* = 0 */ static loff_t proc_bus_pci_lseek(struct file *file, loff_t off, int whence) { loff_t new = -1; + struct inode *inode = file->f_dentry->d_inode; - down(&file->f_dentry->d_inode->i_sem); + down(&inode->i_sem); switch (whence) { case 0: new = off; @@ -34,14 +33,14 @@ proc_bus_pci_lseek(struct file *file, lo new = file->f_pos + off; break; case 2: - new = PCI_CFG_SPACE_SIZE + off; + new = inode->i_size + off; break; } - if (new < 0 || new > PCI_CFG_SPACE_SIZE) + if (new < 0 || new > inode->i_size) new = -EINVAL; else file->f_pos = new; - up(&file->f_dentry->d_inode->i_sem); + up(&inode->i_sem); return new; } @@ -61,7 +60,7 @@ proc_bus_pci_read(struct file *file, cha */ if (capable(CAP_SYS_ADMIN)) - size = PCI_CFG_SPACE_SIZE; + size = dev->cfg_size; else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) size = 128; else @@ -134,14 +133,15 @@ proc_bus_pci_write(struct file *file, co const struct proc_dir_entry *dp = PDE(ino); struct pci_dev *dev = dp->data; int pos = *ppos; + int size = dev->cfg_size; int cnt; - if (pos >= PCI_CFG_SPACE_SIZE) + if (pos >= size) return 0; - if (nbytes >= PCI_CFG_SPACE_SIZE) - nbytes = PCI_CFG_SPACE_SIZE; - if (pos + nbytes > PCI_CFG_SPACE_SIZE) - nbytes = PCI_CFG_SPACE_SIZE - pos; + if (nbytes >= size) + nbytes = size; + if (pos + nbytes > size) + nbytes = size - pos; cnt = nbytes; if (!access_ok(VERIFY_READ, buf, cnt)) @@ -403,7 +403,7 @@ int pci_proc_attach_device(struct pci_de return -ENOMEM; e->proc_fops = &proc_bus_pci_operations; e->data = dev; - e->size = PCI_CFG_SPACE_SIZE; + e->size = dev->cfg_size; return 0; } diff -puN include/asm-i386/fixmap.h~pcix-enhanced include/asm-i386/fixmap.h --- 25/include/asm-i386/fixmap.h~pcix-enhanced 2004-01-29 08:56:58.000000000 -0800 +++ 25-akpm/include/asm-i386/fixmap.h 2004-01-29 08:56:58.000000000 -0800 @@ -71,6 +71,9 @@ enum fixed_addresses { FIX_ACPI_BEGIN, FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, #endif +#ifdef CONFIG_PCI_MMCONFIG + FIX_PCIE_MCFG, +#endif __end_of_permanent_fixed_addresses, /* temporary boot-time mappings, used before ioremap() is functional */ #define NR_FIX_BTMAPS 16 diff -puN include/linux/acpi.h~pcix-enhanced include/linux/acpi.h --- 25/include/linux/acpi.h~pcix-enhanced 2004-01-29 08:56:58.000000000 -0800 +++ 25-akpm/include/linux/acpi.h 2004-01-29 08:56:58.000000000 -0800 @@ -317,6 +317,15 @@ struct acpi_table_ecdt { char ec_id[0]; } __attribute__ ((packed)); +/* PCI MMCONFIG */ + +struct acpi_table_mcfg { + struct acpi_table_header header; + u8 reserved[8]; + u32 base_address; + u32 base_reserved; +} __attribute__ ((packed)); + /* Table Handlers */ enum acpi_table_id { @@ -338,6 +347,7 @@ enum acpi_table_id { ACPI_SSDT, ACPI_SPMI, ACPI_HPET, + ACPI_MCFG, ACPI_TABLE_COUNT }; @@ -369,6 +379,8 @@ void acpi_numa_arch_fixup(void); extern int acpi_mp_config; +extern u32 pci_mmcfg_base_addr; + #else /*!CONFIG_ACPI_BOOT*/ #define acpi_mp_config 0 diff -puN include/linux/pci.h~pcix-enhanced include/linux/pci.h --- 25/include/linux/pci.h~pcix-enhanced 2004-01-29 08:56:58.000000000 -0800 +++ 25-akpm/include/linux/pci.h 2004-01-29 08:56:58.000000000 -0800 @@ -410,6 +410,8 @@ struct pci_dev { unsigned short vendor_compatible[DEVICE_COUNT_COMPATIBLE]; unsigned short device_compatible[DEVICE_COUNT_COMPATIBLE]; + int cfg_size; /* Size of configuration space */ + /* * Instead of touching interrupt line and base address registers * directly, use the values stored here. They might be different! _