]> git.openfabrics.org - ~emulex/infiniband.git/commitdiff
x86, apic: move APIC drivers to arch/x86/kernel/apic/*
authorIngo Molnar <mingo@elte.hu>
Tue, 17 Feb 2009 17:09:24 +0000 (18:09 +0100)
committerIngo Molnar <mingo@elte.hu>
Tue, 17 Feb 2009 17:17:36 +0000 (18:17 +0100)
arch/x86/kernel/ is getting a bit crowded, and the APIC
drivers are scattered into various different files.

Move them to arch/x86/kernel/apic/*, and also remove
the 'gen' prefix from those which had it.

Also move APIC related functionality: the IO-APIC driver,
the NMI and the IPI code.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
20 files changed:
arch/x86/kernel/Makefile
arch/x86/kernel/apic.c [deleted file]
arch/x86/kernel/apic/Makefile [new file with mode: 0644]
arch/x86/kernel/apic/apic.c [new file with mode: 0644]
arch/x86/kernel/apic/apic_64.c [new file with mode: 0644]
arch/x86/kernel/apic/apic_flat_64.c [new file with mode: 0644]
arch/x86/kernel/apic/io_apic.c [new file with mode: 0644]
arch/x86/kernel/apic/ipi.c [new file with mode: 0644]
arch/x86/kernel/apic/nmi.c [new file with mode: 0644]
arch/x86/kernel/apic/x2apic_cluster.c [new file with mode: 0644]
arch/x86/kernel/apic/x2apic_phys.c [new file with mode: 0644]
arch/x86/kernel/apic/x2apic_uv_x.c [new file with mode: 0644]
arch/x86/kernel/genapic_64.c [deleted file]
arch/x86/kernel/genapic_flat_64.c [deleted file]
arch/x86/kernel/genx2apic_cluster.c [deleted file]
arch/x86/kernel/genx2apic_phys.c [deleted file]
arch/x86/kernel/genx2apic_uv_x.c [deleted file]
arch/x86/kernel/io_apic.c [deleted file]
arch/x86/kernel/ipi.c [deleted file]
arch/x86/kernel/nmi.c [deleted file]

index 1cefd218f764093d46d8200b360b915c548c76c6..9139ff69471c5a10ad232c65c93dcecd1a524711 100644 (file)
@@ -58,13 +58,12 @@ obj-$(CONFIG_PCI)           += early-quirks.o
 apm-y                          := apm_32.o
 obj-$(CONFIG_APM)              += apm.o
 obj-$(CONFIG_SMP)              += smp.o
-obj-$(CONFIG_SMP)              += smpboot.o tsc_sync.o ipi.o
+obj-$(CONFIG_SMP)              += smpboot.o tsc_sync.o
 obj-$(CONFIG_SMP)              += setup_percpu.o
 obj-$(CONFIG_X86_64_SMP)       += tsc_sync.o
 obj-$(CONFIG_X86_TRAMPOLINE)   += trampoline_$(BITS).o
 obj-$(CONFIG_X86_MPPARSE)      += mpparse.o
-obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o nmi.o ipi.o
-obj-$(CONFIG_X86_IO_APIC)      += io_apic.o
+obj-$(CONFIG_X86_LOCAL_APIC)   += apic/
 obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
 obj-$(CONFIG_DYNAMIC_FTRACE)   += ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)    += ftrace.o
@@ -116,17 +115,13 @@ obj-$(CONFIG_SWIOTLB)                     += pci-swiotlb_64.o # NB rename without _64
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
-        obj-y                          += genapic_64.o genapic_flat_64.o
-        obj-$(CONFIG_X86_X2APIC)       += genx2apic_cluster.o
-        obj-$(CONFIG_X86_X2APIC)       += genx2apic_phys.o
-       obj-$(CONFIG_X86_UV)            += genx2apic_uv_x.o tlb_uv.o
-       obj-$(CONFIG_X86_UV)            += bios_uv.o uv_irq.o uv_sysfs.o
-        obj-$(CONFIG_X86_PM_TIMER)     += pmtimer_64.o
-        obj-$(CONFIG_AUDIT)            += audit_64.o
-
-        obj-$(CONFIG_GART_IOMMU)       += pci-gart_64.o aperture_64.o
-        obj-$(CONFIG_CALGARY_IOMMU)    += pci-calgary_64.o tce_64.o
-        obj-$(CONFIG_AMD_IOMMU)                += amd_iommu_init.o amd_iommu.o
-
-        obj-$(CONFIG_PCI_MMCONFIG)     += mmconf-fam10h_64.o
+       obj-$(CONFIG_X86_UV)            += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o
+       obj-$(CONFIG_X86_PM_TIMER)      += pmtimer_64.o
+       obj-$(CONFIG_AUDIT)             += audit_64.o
+
+       obj-$(CONFIG_GART_IOMMU)        += pci-gart_64.o aperture_64.o
+       obj-$(CONFIG_CALGARY_IOMMU)     += pci-calgary_64.o tce_64.o
+       obj-$(CONFIG_AMD_IOMMU)         += amd_iommu_init.o amd_iommu.o
+
+       obj-$(CONFIG_PCI_MMCONFIG)      += mmconf-fam10h_64.o
 endif
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
deleted file mode 100644 (file)
index c12823e..0000000
+++ /dev/null
@@ -1,2212 +0,0 @@
-/*
- *     Local APIC handling, local APIC timers
- *
- *     (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
- *
- *     Fixes
- *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
- *                                     thanks to Eric Gilmore
- *                                     and Rolf G. Tews
- *                                     for testing these extensively.
- *     Maciej W. Rozycki       :       Various updates and fixes.
- *     Mikael Pettersson       :       Power Management for UP-APIC.
- *     Pavel Machek and
- *     Mikael Pettersson       :       PM converted to driver model.
- */
-
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/acpi_pmtmr.h>
-#include <linux/clockchips.h>
-#include <linux/interrupt.h>
-#include <linux/bootmem.h>
-#include <linux/ftrace.h>
-#include <linux/ioport.h>
-#include <linux/module.h>
-#include <linux/sysdev.h>
-#include <linux/delay.h>
-#include <linux/timex.h>
-#include <linux/dmar.h>
-#include <linux/init.h>
-#include <linux/cpu.h>
-#include <linux/dmi.h>
-#include <linux/nmi.h>
-#include <linux/smp.h>
-#include <linux/mm.h>
-
-#include <asm/arch_hooks.h>
-#include <asm/pgalloc.h>
-#include <asm/atomic.h>
-#include <asm/mpspec.h>
-#include <asm/i8253.h>
-#include <asm/i8259.h>
-#include <asm/proto.h>
-#include <asm/apic.h>
-#include <asm/desc.h>
-#include <asm/hpet.h>
-#include <asm/idle.h>
-#include <asm/mtrr.h>
-#include <asm/smp.h>
-
-unsigned int num_processors;
-
-unsigned disabled_cpus __cpuinitdata;
-
-/* Processor that is doing the boot up */
-unsigned int boot_cpu_physical_apicid = -1U;
-
-/*
- * The highest APIC ID seen during enumeration.
- *
- * This determines the messaging protocol we can use: if all APIC IDs
- * are in the 0 ... 7 range, then we can use logical addressing which
- * has some performance advantages (better broadcasting).
- *
- * If there's an APIC ID above 8, we use physical addressing.
- */
-unsigned int max_physical_apicid;
-
-/*
- * Bitmask of physically existing CPUs:
- */
-physid_mask_t phys_cpu_present_map;
-
-/*
- * Map cpu index to physical APIC ID
- */
-DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
-DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
-
-#ifdef CONFIG_X86_32
-/*
- * Knob to control our willingness to enable the local APIC.
- *
- * +1=force-enable
- */
-static int force_enable_local_apic;
-/*
- * APIC command line parameters
- */
-static int __init parse_lapic(char *arg)
-{
-       force_enable_local_apic = 1;
-       return 0;
-}
-early_param("lapic", parse_lapic);
-/* Local APIC was disabled by the BIOS and enabled by the kernel */
-static int enabled_via_apicbase;
-
-#endif
-
-#ifdef CONFIG_X86_64
-static int apic_calibrate_pmtmr __initdata;
-static __init int setup_apicpmtimer(char *s)
-{
-       apic_calibrate_pmtmr = 1;
-       notsc_setup(NULL);
-       return 0;
-}
-__setup("apicpmtimer", setup_apicpmtimer);
-#endif
-
-#ifdef CONFIG_X86_X2APIC
-int x2apic;
-/* x2apic enabled before OS handover */
-static int x2apic_preenabled;
-static int disable_x2apic;
-static __init int setup_nox2apic(char *str)
-{
-       disable_x2apic = 1;
-       setup_clear_cpu_cap(X86_FEATURE_X2APIC);
-       return 0;
-}
-early_param("nox2apic", setup_nox2apic);
-#endif
-
-unsigned long mp_lapic_addr;
-int disable_apic;
-/* Disable local APIC timer from the kernel commandline or via dmi quirk */
-static int disable_apic_timer __cpuinitdata;
-/* Local APIC timer works in C2 */
-int local_apic_timer_c2_ok;
-EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
-
-int first_system_vector = 0xfe;
-
-/*
- * Debug level, exported for io_apic.c
- */
-unsigned int apic_verbosity;
-
-int pic_mode;
-
-/* Have we found an MP table */
-int smp_found_config;
-
-static struct resource lapic_resource = {
-       .name = "Local APIC",
-       .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
-};
-
-static unsigned int calibration_result;
-
-static int lapic_next_event(unsigned long delta,
-                           struct clock_event_device *evt);
-static void lapic_timer_setup(enum clock_event_mode mode,
-                             struct clock_event_device *evt);
-static void lapic_timer_broadcast(const struct cpumask *mask);
-static void apic_pm_activate(void);
-
-/*
- * The local apic timer can be used for any function which is CPU local.
- */
-static struct clock_event_device lapic_clockevent = {
-       .name           = "lapic",
-       .features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
-                       | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
-       .shift          = 32,
-       .set_mode       = lapic_timer_setup,
-       .set_next_event = lapic_next_event,
-       .broadcast      = lapic_timer_broadcast,
-       .rating         = 100,
-       .irq            = -1,
-};
-static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-
-static unsigned long apic_phys;
-
-/*
- * Get the LAPIC version
- */
-static inline int lapic_get_version(void)
-{
-       return GET_APIC_VERSION(apic_read(APIC_LVR));
-}
-
-/*
- * Check, if the APIC is integrated or a separate chip
- */
-static inline int lapic_is_integrated(void)
-{
-#ifdef CONFIG_X86_64
-       return 1;
-#else
-       return APIC_INTEGRATED(lapic_get_version());
-#endif
-}
-
-/*
- * Check, whether this is a modern or a first generation APIC
- */
-static int modern_apic(void)
-{
-       /* AMD systems use old APIC versions, so check the CPU */
-       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-           boot_cpu_data.x86 >= 0xf)
-               return 1;
-       return lapic_get_version() >= 0x14;
-}
-
-void native_apic_wait_icr_idle(void)
-{
-       while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
-               cpu_relax();
-}
-
-u32 native_safe_apic_wait_icr_idle(void)
-{
-       u32 send_status;
-       int timeout;
-
-       timeout = 0;
-       do {
-               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-               if (!send_status)
-                       break;
-               udelay(100);
-       } while (timeout++ < 1000);
-
-       return send_status;
-}
-
-void native_apic_icr_write(u32 low, u32 id)
-{
-       apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
-       apic_write(APIC_ICR, low);
-}
-
-u64 native_apic_icr_read(void)
-{
-       u32 icr1, icr2;
-
-       icr2 = apic_read(APIC_ICR2);
-       icr1 = apic_read(APIC_ICR);
-
-       return icr1 | ((u64)icr2 << 32);
-}
-
-/**
- * enable_NMI_through_LVT0 - enable NMI through local vector table 0
- */
-void __cpuinit enable_NMI_through_LVT0(void)
-{
-       unsigned int v;
-
-       /* unmask and set to NMI */
-       v = APIC_DM_NMI;
-
-       /* Level triggered for 82489DX (32bit mode) */
-       if (!lapic_is_integrated())
-               v |= APIC_LVT_LEVEL_TRIGGER;
-
-       apic_write(APIC_LVT0, v);
-}
-
-#ifdef CONFIG_X86_32
-/**
- * get_physical_broadcast - Get number of physical broadcast IDs
- */
-int get_physical_broadcast(void)
-{
-       return modern_apic() ? 0xff : 0xf;
-}
-#endif
-
-/**
- * lapic_get_maxlvt - get the maximum number of local vector table entries
- */
-int lapic_get_maxlvt(void)
-{
-       unsigned int v;
-
-       v = apic_read(APIC_LVR);
-       /*
-        * - we always have APIC integrated on 64bit mode
-        * - 82489DXs do not report # of LVT entries
-        */
-       return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
-}
-
-/*
- * Local APIC timer
- */
-
-/* Clock divisor */
-#define APIC_DIVISOR 16
-
-/*
- * This function sets up the local APIC timer, with a timeout of
- * 'clocks' APIC bus clock. During calibration we actually call
- * this function twice on the boot CPU, once with a bogus timeout
- * value, second time for real. The other (noncalibrating) CPUs
- * call this function only once, with the real, calibrated value.
- *
- * We do reads before writes even if unnecessary, to get around the
- * P5 APIC double write bug.
- */
-static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
-{
-       unsigned int lvtt_value, tmp_value;
-
-       lvtt_value = LOCAL_TIMER_VECTOR;
-       if (!oneshot)
-               lvtt_value |= APIC_LVT_TIMER_PERIODIC;
-       if (!lapic_is_integrated())
-               lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
-
-       if (!irqen)
-               lvtt_value |= APIC_LVT_MASKED;
-
-       apic_write(APIC_LVTT, lvtt_value);
-
-       /*
-        * Divide PICLK by 16
-        */
-       tmp_value = apic_read(APIC_TDCR);
-       apic_write(APIC_TDCR,
-               (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
-               APIC_TDR_DIV_16);
-
-       if (!oneshot)
-               apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
-}
-
-/*
- * Setup extended LVT, AMD specific (K8, family 10h)
- *
- * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
- * MCE interrupts are supported. Thus MCE offset must be set to 0.
- *
- * If mask=1, the LVT entry does not generate interrupts while mask=0
- * enables the vector. See also the BKDGs.
- */
-
-#define APIC_EILVT_LVTOFF_MCE 0
-#define APIC_EILVT_LVTOFF_IBS 1
-
-static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
-{
-       unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
-       unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
-
-       apic_write(reg, v);
-}
-
-u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
-{
-       setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
-       return APIC_EILVT_LVTOFF_MCE;
-}
-
-u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
-{
-       setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
-       return APIC_EILVT_LVTOFF_IBS;
-}
-EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
-
-/*
- * Program the next event, relative to now
- */
-static int lapic_next_event(unsigned long delta,
-                           struct clock_event_device *evt)
-{
-       apic_write(APIC_TMICT, delta);
-       return 0;
-}
-
-/*
- * Setup the lapic timer in periodic or oneshot mode
- */
-static void lapic_timer_setup(enum clock_event_mode mode,
-                             struct clock_event_device *evt)
-{
-       unsigned long flags;
-       unsigned int v;
-
-       /* Lapic used as dummy for broadcast ? */
-       if (evt->features & CLOCK_EVT_FEAT_DUMMY)
-               return;
-
-       local_irq_save(flags);
-
-       switch (mode) {
-       case CLOCK_EVT_MODE_PERIODIC:
-       case CLOCK_EVT_MODE_ONESHOT:
-               __setup_APIC_LVTT(calibration_result,
-                                 mode != CLOCK_EVT_MODE_PERIODIC, 1);
-               break;
-       case CLOCK_EVT_MODE_UNUSED:
-       case CLOCK_EVT_MODE_SHUTDOWN:
-               v = apic_read(APIC_LVTT);
-               v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-               apic_write(APIC_LVTT, v);
-               apic_write(APIC_TMICT, 0xffffffff);
-               break;
-       case CLOCK_EVT_MODE_RESUME:
-               /* Nothing to do here */
-               break;
-       }
-
-       local_irq_restore(flags);
-}
-
-/*
- * Local APIC timer broadcast function
- */
-static void lapic_timer_broadcast(const struct cpumask *mask)
-{
-#ifdef CONFIG_SMP
-       apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
-#endif
-}
-
-/*
- * Setup the local APIC timer for this CPU. Copy the initilized values
- * of the boot CPU and register the clock event in the framework.
- */
-static void __cpuinit setup_APIC_timer(void)
-{
-       struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-
-       memcpy(levt, &lapic_clockevent, sizeof(*levt));
-       levt->cpumask = cpumask_of(smp_processor_id());
-
-       clockevents_register_device(levt);
-}
-
-/*
- * In this functions we calibrate APIC bus clocks to the external timer.
- *
- * We want to do the calibration only once since we want to have local timer
- * irqs syncron. CPUs connected by the same APIC bus have the very same bus
- * frequency.
- *
- * This was previously done by reading the PIT/HPET and waiting for a wrap
- * around to find out, that a tick has elapsed. I have a box, where the PIT
- * readout is broken, so it never gets out of the wait loop again. This was
- * also reported by others.
- *
- * Monitoring the jiffies value is inaccurate and the clockevents
- * infrastructure allows us to do a simple substitution of the interrupt
- * handler.
- *
- * The calibration routine also uses the pm_timer when possible, as the PIT
- * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
- * back to normal later in the boot process).
- */
-
-#define LAPIC_CAL_LOOPS                (HZ/10)
-
-static __initdata int lapic_cal_loops = -1;
-static __initdata long lapic_cal_t1, lapic_cal_t2;
-static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
-static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
-static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
-
-/*
- * Temporary interrupt handler.
- */
-static void __init lapic_cal_handler(struct clock_event_device *dev)
-{
-       unsigned long long tsc = 0;
-       long tapic = apic_read(APIC_TMCCT);
-       unsigned long pm = acpi_pm_read_early();
-
-       if (cpu_has_tsc)
-               rdtscll(tsc);
-
-       switch (lapic_cal_loops++) {
-       case 0:
-               lapic_cal_t1 = tapic;
-               lapic_cal_tsc1 = tsc;
-               lapic_cal_pm1 = pm;
-               lapic_cal_j1 = jiffies;
-               break;
-
-       case LAPIC_CAL_LOOPS:
-               lapic_cal_t2 = tapic;
-               lapic_cal_tsc2 = tsc;
-               if (pm < lapic_cal_pm1)
-                       pm += ACPI_PM_OVRRUN;
-               lapic_cal_pm2 = pm;
-               lapic_cal_j2 = jiffies;
-               break;
-       }
-}
-
-static int __init
-calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
-{
-       const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
-       const long pm_thresh = pm_100ms / 100;
-       unsigned long mult;
-       u64 res;
-
-#ifndef CONFIG_X86_PM_TIMER
-       return -1;
-#endif
-
-       apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
-
-       /* Check, if the PM timer is available */
-       if (!deltapm)
-               return -1;
-
-       mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
-
-       if (deltapm > (pm_100ms - pm_thresh) &&
-           deltapm < (pm_100ms + pm_thresh)) {
-               apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
-               return 0;
-       }
-
-       res = (((u64)deltapm) *  mult) >> 22;
-       do_div(res, 1000000);
-       pr_warning("APIC calibration not consistent "
-                  "with PM-Timer: %ldms instead of 100ms\n",(long)res);
-
-       /* Correct the lapic counter value */
-       res = (((u64)(*delta)) * pm_100ms);
-       do_div(res, deltapm);
-       pr_info("APIC delta adjusted to PM-Timer: "
-               "%lu (%ld)\n", (unsigned long)res, *delta);
-       *delta = (long)res;
-
-       /* Correct the tsc counter value */
-       if (cpu_has_tsc) {
-               res = (((u64)(*deltatsc)) * pm_100ms);
-               do_div(res, deltapm);
-               apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
-                                         "PM-Timer: %lu (%ld) \n",
-                                       (unsigned long)res, *deltatsc);
-               *deltatsc = (long)res;
-       }
-
-       return 0;
-}
-
-static int __init calibrate_APIC_clock(void)
-{
-       struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-       void (*real_handler)(struct clock_event_device *dev);
-       unsigned long deltaj;
-       long delta, deltatsc;
-       int pm_referenced = 0;
-
-       local_irq_disable();
-
-       /* Replace the global interrupt handler */
-       real_handler = global_clock_event->event_handler;
-       global_clock_event->event_handler = lapic_cal_handler;
-
-       /*
-        * Setup the APIC counter to maximum. There is no way the lapic
-        * can underflow in the 100ms detection time frame
-        */
-       __setup_APIC_LVTT(0xffffffff, 0, 0);
-
-       /* Let the interrupts run */
-       local_irq_enable();
-
-       while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
-               cpu_relax();
-
-       local_irq_disable();
-
-       /* Restore the real event handler */
-       global_clock_event->event_handler = real_handler;
-
-       /* Build delta t1-t2 as apic timer counts down */
-       delta = lapic_cal_t1 - lapic_cal_t2;
-       apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
-
-       deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
-
-       /* we trust the PM based calibration if possible */
-       pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
-                                       &delta, &deltatsc);
-
-       /* Calculate the scaled math multiplication factor */
-       lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
-                                      lapic_clockevent.shift);
-       lapic_clockevent.max_delta_ns =
-               clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
-       lapic_clockevent.min_delta_ns =
-               clockevent_delta2ns(0xF, &lapic_clockevent);
-
-       calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
-
-       apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
-       apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult);
-       apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
-                   calibration_result);
-
-       if (cpu_has_tsc) {
-               apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
-                           "%ld.%04ld MHz.\n",
-                           (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
-                           (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
-       }
-
-       apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
-                   "%u.%04u MHz.\n",
-                   calibration_result / (1000000 / HZ),
-                   calibration_result % (1000000 / HZ));
-
-       /*
-        * Do a sanity check on the APIC calibration result
-        */
-       if (calibration_result < (1000000 / HZ)) {
-               local_irq_enable();
-               pr_warning("APIC frequency too slow, disabling apic timer\n");
-               return -1;
-       }
-
-       levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
-
-       /*
-        * PM timer calibration failed or not turned on
-        * so lets try APIC timer based calibration
-        */
-       if (!pm_referenced) {
-               apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
-
-               /*
-                * Setup the apic timer manually
-                */
-               levt->event_handler = lapic_cal_handler;
-               lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
-               lapic_cal_loops = -1;
-
-               /* Let the interrupts run */
-               local_irq_enable();
-
-               while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
-                       cpu_relax();
-
-               /* Stop the lapic timer */
-               lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
-
-               /* Jiffies delta */
-               deltaj = lapic_cal_j2 - lapic_cal_j1;
-               apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
-
-               /* Check, if the jiffies result is consistent */
-               if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
-                       apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
-               else
-                       levt->features |= CLOCK_EVT_FEAT_DUMMY;
-       } else
-               local_irq_enable();
-
-       if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
-               pr_warning("APIC timer disabled due to verification failure\n");
-                       return -1;
-       }
-
-       return 0;
-}
-
-/*
- * Setup the boot APIC
- *
- * Calibrate and verify the result.
- */
-void __init setup_boot_APIC_clock(void)
-{
-       /*
-        * The local apic timer can be disabled via the kernel
-        * commandline or from the CPU detection code. Register the lapic
-        * timer as a dummy clock event source on SMP systems, so the
-        * broadcast mechanism is used. On UP systems simply ignore it.
-        */
-       if (disable_apic_timer) {
-               pr_info("Disabling APIC timer\n");
-               /* No broadcast on UP ! */
-               if (num_possible_cpus() > 1) {
-                       lapic_clockevent.mult = 1;
-                       setup_APIC_timer();
-               }
-               return;
-       }
-
-       apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
-                   "calibrating APIC timer ...\n");
-
-       if (calibrate_APIC_clock()) {
-               /* No broadcast on UP ! */
-               if (num_possible_cpus() > 1)
-                       setup_APIC_timer();
-               return;
-       }
-
-       /*
-        * If nmi_watchdog is set to IO_APIC, we need the
-        * PIT/HPET going.  Otherwise register lapic as a dummy
-        * device.
-        */
-       if (nmi_watchdog != NMI_IO_APIC)
-               lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-       else
-               pr_warning("APIC timer registered as dummy,"
-                       " due to nmi_watchdog=%d!\n", nmi_watchdog);
-
-       /* Setup the lapic or request the broadcast */
-       setup_APIC_timer();
-}
-
-void __cpuinit setup_secondary_APIC_clock(void)
-{
-       setup_APIC_timer();
-}
-
-/*
- * The guts of the apic timer interrupt
- */
-static void local_apic_timer_interrupt(void)
-{
-       int cpu = smp_processor_id();
-       struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
-
-       /*
-        * Normally we should not be here till LAPIC has been initialized but
-        * in some cases like kdump, its possible that there is a pending LAPIC
-        * timer interrupt from previous kernel's context and is delivered in
-        * new kernel the moment interrupts are enabled.
-        *
-        * Interrupts are enabled early and LAPIC is setup much later, hence
-        * its possible that when we get here evt->event_handler is NULL.
-        * Check for event_handler being NULL and discard the interrupt as
-        * spurious.
-        */
-       if (!evt->event_handler) {
-               pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
-               /* Switch it off */
-               lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
-               return;
-       }
-
-       /*
-        * the NMI deadlock-detector uses this.
-        */
-       inc_irq_stat(apic_timer_irqs);
-
-       evt->event_handler(evt);
-}
-
-/*
- * Local APIC timer interrupt. This is the most natural way for doing
- * local interrupts, but local timer interrupts can be emulated by
- * broadcast interrupts too. [in case the hw doesn't support APIC timers]
- *
- * [ if a single-CPU system runs an SMP kernel then we call the local
- *   interrupt as well. Thus we cannot inline the local irq ... ]
- */
-void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
-{
-       struct pt_regs *old_regs = set_irq_regs(regs);
-
-       /*
-        * NOTE! We'd better ACK the irq immediately,
-        * because timer handling can be slow.
-        */
-       ack_APIC_irq();
-       /*
-        * update_process_times() expects us to have done irq_enter().
-        * Besides, if we don't timer interrupts ignore the global
-        * interrupt lock, which is the WrongThing (tm) to do.
-        */
-       exit_idle();
-       irq_enter();
-       local_apic_timer_interrupt();
-       irq_exit();
-
-       set_irq_regs(old_regs);
-}
-
-int setup_profiling_timer(unsigned int multiplier)
-{
-       return -EINVAL;
-}
-
-/*
- * Local APIC start and shutdown
- */
-
-/**
- * clear_local_APIC - shutdown the local APIC
- *
- * This is called, when a CPU is disabled and before rebooting, so the state of
- * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
- * leftovers during boot.
- */
-void clear_local_APIC(void)
-{
-       int maxlvt;
-       u32 v;
-
-       /* APIC hasn't been mapped yet */
-       if (!apic_phys)
-               return;
-
-       maxlvt = lapic_get_maxlvt();
-       /*
-        * Masking an LVT entry can trigger a local APIC error
-        * if the vector is zero. Mask LVTERR first to prevent this.
-        */
-       if (maxlvt >= 3) {
-               v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
-               apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
-       }
-       /*
-        * Careful: we have to set masks only first to deassert
-        * any level-triggered sources.
-        */
-       v = apic_read(APIC_LVTT);
-       apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
-       v = apic_read(APIC_LVT0);
-       apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-       v = apic_read(APIC_LVT1);
-       apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
-       if (maxlvt >= 4) {
-               v = apic_read(APIC_LVTPC);
-               apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
-       }
-
-       /* lets not touch this if we didn't frob it */
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
-       if (maxlvt >= 5) {
-               v = apic_read(APIC_LVTTHMR);
-               apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
-       }
-#endif
-       /*
-        * Clean APIC state for other OSs:
-        */
-       apic_write(APIC_LVTT, APIC_LVT_MASKED);
-       apic_write(APIC_LVT0, APIC_LVT_MASKED);
-       apic_write(APIC_LVT1, APIC_LVT_MASKED);
-       if (maxlvt >= 3)
-               apic_write(APIC_LVTERR, APIC_LVT_MASKED);
-       if (maxlvt >= 4)
-               apic_write(APIC_LVTPC, APIC_LVT_MASKED);
-
-       /* Integrated APIC (!82489DX) ? */
-       if (lapic_is_integrated()) {
-               if (maxlvt > 3)
-                       /* Clear ESR due to Pentium errata 3AP and 11AP */
-                       apic_write(APIC_ESR, 0);
-               apic_read(APIC_ESR);
-       }
-}
-
-/**
- * disable_local_APIC - clear and disable the local APIC
- */
-void disable_local_APIC(void)
-{
-       unsigned int value;
-
-       /* APIC hasn't been mapped yet */
-       if (!apic_phys)
-               return;
-
-       clear_local_APIC();
-
-       /*
-        * Disable APIC (implies clearing of registers
-        * for 82489DX!).
-        */
-       value = apic_read(APIC_SPIV);
-       value &= ~APIC_SPIV_APIC_ENABLED;
-       apic_write(APIC_SPIV, value);
-
-#ifdef CONFIG_X86_32
-       /*
-        * When LAPIC was disabled by the BIOS and enabled by the kernel,
-        * restore the disabled state.
-        */
-       if (enabled_via_apicbase) {
-               unsigned int l, h;
-
-               rdmsr(MSR_IA32_APICBASE, l, h);
-               l &= ~MSR_IA32_APICBASE_ENABLE;
-               wrmsr(MSR_IA32_APICBASE, l, h);
-       }
-#endif
-}
-
-/*
- * If Linux enabled the LAPIC against the BIOS default disable it down before
- * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
- * not power-off.  Additionally clear all LVT entries before disable_local_APIC
- * for the case where Linux didn't enable the LAPIC.
- */
-void lapic_shutdown(void)
-{
-       unsigned long flags;
-
-       if (!cpu_has_apic)
-               return;
-
-       local_irq_save(flags);
-
-#ifdef CONFIG_X86_32
-       if (!enabled_via_apicbase)
-               clear_local_APIC();
-       else
-#endif
-               disable_local_APIC();
-
-
-       local_irq_restore(flags);
-}
-
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
-       unsigned int reg0, reg1;
-
-       /*
-        * The version register is read-only in a real APIC.
-        */
-       reg0 = apic_read(APIC_LVR);
-       apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
-       apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
-       reg1 = apic_read(APIC_LVR);
-       apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
-       /*
-        * The two version reads above should print the same
-        * numbers.  If the second one is different, then we
-        * poke at a non-APIC.
-        */
-       if (reg1 != reg0)
-               return 0;
-
-       /*
-        * Check if the version looks reasonably.
-        */
-       reg1 = GET_APIC_VERSION(reg0);
-       if (reg1 == 0x00 || reg1 == 0xff)
-               return 0;
-       reg1 = lapic_get_maxlvt();
-       if (reg1 < 0x02 || reg1 == 0xff)
-               return 0;
-
-       /*
-        * The ID register is read/write in a real APIC.
-        */
-       reg0 = apic_read(APIC_ID);
-       apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
-       apic_write(APIC_ID, reg0 ^ apic->apic_id_mask);
-       reg1 = apic_read(APIC_ID);
-       apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
-       apic_write(APIC_ID, reg0);
-       if (reg1 != (reg0 ^ apic->apic_id_mask))
-               return 0;
-
-       /*
-        * The next two are just to see if we have sane values.
-        * They're only really relevant if we're in Virtual Wire
-        * compatibility mode, but most boxes are anymore.
-        */
-       reg0 = apic_read(APIC_LVT0);
-       apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
-       reg1 = apic_read(APIC_LVT1);
-       apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
-       return 1;
-}
-
-/**
- * sync_Arb_IDs - synchronize APIC bus arbitration IDs
- */
-void __init sync_Arb_IDs(void)
-{
-       /*
-        * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
-        * needed on AMD.
-        */
-       if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
-               return;
-
-       /*
-        * Wait for idle.
-        */
-       apic_wait_icr_idle();
-
-       apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
-       apic_write(APIC_ICR, APIC_DEST_ALLINC |
-                       APIC_INT_LEVELTRIG | APIC_DM_INIT);
-}
-
-/*
- * An initial setup of the virtual wire mode.
- */
-void __init init_bsp_APIC(void)
-{
-       unsigned int value;
-
-       /*
-        * Don't do the setup now if we have a SMP BIOS as the
-        * through-I/O-APIC virtual wire mode might be active.
-        */
-       if (smp_found_config || !cpu_has_apic)
-               return;
-
-       /*
-        * Do not trust the local APIC being empty at bootup.
-        */
-       clear_local_APIC();
-
-       /*
-        * Enable APIC.
-        */
-       value = apic_read(APIC_SPIV);
-       value &= ~APIC_VECTOR_MASK;
-       value |= APIC_SPIV_APIC_ENABLED;
-
-#ifdef CONFIG_X86_32
-       /* This bit is reserved on P4/Xeon and should be cleared */
-       if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-           (boot_cpu_data.x86 == 15))
-               value &= ~APIC_SPIV_FOCUS_DISABLED;
-       else
-#endif
-               value |= APIC_SPIV_FOCUS_DISABLED;
-       value |= SPURIOUS_APIC_VECTOR;
-       apic_write(APIC_SPIV, value);
-
-       /*
-        * Set up the virtual wire mode.
-        */
-       apic_write(APIC_LVT0, APIC_DM_EXTINT);
-       value = APIC_DM_NMI;
-       if (!lapic_is_integrated())             /* 82489DX */
-               value |= APIC_LVT_LEVEL_TRIGGER;
-       apic_write(APIC_LVT1, value);
-}
-
-static void __cpuinit lapic_setup_esr(void)
-{
-       unsigned int oldvalue, value, maxlvt;
-
-       if (!lapic_is_integrated()) {
-               pr_info("No ESR for 82489DX.\n");
-               return;
-       }
-
-       if (apic->disable_esr) {
-               /*
-                * Something untraceable is creating bad interrupts on
-                * secondary quads ... for the moment, just leave the
-                * ESR disabled - we can't do anything useful with the
-                * errors anyway - mbligh
-                */
-               pr_info("Leaving ESR disabled.\n");
-               return;
-       }
-
-       maxlvt = lapic_get_maxlvt();
-       if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
-               apic_write(APIC_ESR, 0);
-       oldvalue = apic_read(APIC_ESR);
-
-       /* enables sending errors */
-       value = ERROR_APIC_VECTOR;
-       apic_write(APIC_LVTERR, value);
-
-       /*
-        * spec says clear errors after enabling vector.
-        */
-       if (maxlvt > 3)
-               apic_write(APIC_ESR, 0);
-       value = apic_read(APIC_ESR);
-       if (value != oldvalue)
-               apic_printk(APIC_VERBOSE, "ESR value before enabling "
-                       "vector: 0x%08x  after: 0x%08x\n",
-                       oldvalue, value);
-}
-
-
-/**
- * setup_local_APIC - setup the local APIC
- */
-void __cpuinit setup_local_APIC(void)
-{
-       unsigned int value;
-       int i, j;
-
-       if (disable_apic) {
-               arch_disable_smp_support();
-               return;
-       }
-
-#ifdef CONFIG_X86_32
-       /* Pound the ESR really hard over the head with a big hammer - mbligh */
-       if (lapic_is_integrated() && apic->disable_esr) {
-               apic_write(APIC_ESR, 0);
-               apic_write(APIC_ESR, 0);
-               apic_write(APIC_ESR, 0);
-               apic_write(APIC_ESR, 0);
-       }
-#endif
-
-       preempt_disable();
-
-       /*
-        * Double-check whether this APIC is really registered.
-        * This is meaningless in clustered apic mode, so we skip it.
-        */
-       if (!apic->apic_id_registered())
-               BUG();
-
-       /*
-        * Intel recommends to set DFR, LDR and TPR before enabling
-        * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
-        * document number 292116).  So here it goes...
-        */
-       apic->init_apic_ldr();
-
-       /*
-        * Set Task Priority to 'accept all'. We never change this
-        * later on.
-        */
-       value = apic_read(APIC_TASKPRI);
-       value &= ~APIC_TPRI_MASK;
-       apic_write(APIC_TASKPRI, value);
-
-       /*
-        * After a crash, we no longer service the interrupts and a pending
-        * interrupt from previous kernel might still have ISR bit set.
-        *
-        * Most probably by now CPU has serviced that pending interrupt and
-        * it might not have done the ack_APIC_irq() because it thought,
-        * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
-        * does not clear the ISR bit and cpu thinks it has already serivced
-        * the interrupt. Hence a vector might get locked. It was noticed
-        * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
-        */
-       for (i = APIC_ISR_NR - 1; i >= 0; i--) {
-               value = apic_read(APIC_ISR + i*0x10);
-               for (j = 31; j >= 0; j--) {
-                       if (value & (1<<j))
-                               ack_APIC_irq();
-               }
-       }
-
-       /*
-        * Now that we are all set up, enable the APIC
-        */
-       value = apic_read(APIC_SPIV);
-       value &= ~APIC_VECTOR_MASK;
-       /*
-        * Enable APIC
-        */
-       value |= APIC_SPIV_APIC_ENABLED;
-
-#ifdef CONFIG_X86_32
-       /*
-        * Some unknown Intel IO/APIC (or APIC) errata is biting us with
-        * certain networking cards. If high frequency interrupts are
-        * happening on a particular IOAPIC pin, plus the IOAPIC routing
-        * entry is masked/unmasked at a high rate as well then sooner or
-        * later IOAPIC line gets 'stuck', no more interrupts are received
-        * from the device. If focus CPU is disabled then the hang goes
-        * away, oh well :-(
-        *
-        * [ This bug can be reproduced easily with a level-triggered
-        *   PCI Ne2000 networking cards and PII/PIII processors, dual
-        *   BX chipset. ]
-        */
-       /*
-        * Actually disabling the focus CPU check just makes the hang less
-        * frequent as it makes the interrupt distributon model be more
-        * like LRU than MRU (the short-term load is more even across CPUs).
-        * See also the comment in end_level_ioapic_irq().  --macro
-        */
-
-       /*
-        * - enable focus processor (bit==0)
-        * - 64bit mode always use processor focus
-        *   so no need to set it
-        */
-       value &= ~APIC_SPIV_FOCUS_DISABLED;
-#endif
-
-       /*
-        * Set spurious IRQ vector
-        */
-       value |= SPURIOUS_APIC_VECTOR;
-       apic_write(APIC_SPIV, value);
-
-       /*
-        * Set up LVT0, LVT1:
-        *
-        * set up through-local-APIC on the BP's LINT0. This is not
-        * strictly necessary in pure symmetric-IO mode, but sometimes
-        * we delegate interrupts to the 8259A.
-        */
-       /*
-        * TODO: set up through-local-APIC from through-I/O-APIC? --macro
-        */
-       value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
-       if (!smp_processor_id() && (pic_mode || !value)) {
-               value = APIC_DM_EXTINT;
-               apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
-                               smp_processor_id());
-       } else {
-               value = APIC_DM_EXTINT | APIC_LVT_MASKED;
-               apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
-                               smp_processor_id());
-       }
-       apic_write(APIC_LVT0, value);
-
-       /*
-        * only the BP should see the LINT1 NMI signal, obviously.
-        */
-       if (!smp_processor_id())
-               value = APIC_DM_NMI;
-       else
-               value = APIC_DM_NMI | APIC_LVT_MASKED;
-       if (!lapic_is_integrated())             /* 82489DX */
-               value |= APIC_LVT_LEVEL_TRIGGER;
-       apic_write(APIC_LVT1, value);
-
-       preempt_enable();
-}
-
-void __cpuinit end_local_APIC_setup(void)
-{
-       lapic_setup_esr();
-
-#ifdef CONFIG_X86_32
-       {
-               unsigned int value;
-               /* Disable the local apic timer */
-               value = apic_read(APIC_LVTT);
-               value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-               apic_write(APIC_LVTT, value);
-       }
-#endif
-
-       setup_apic_nmi_watchdog(NULL);
-       apic_pm_activate();
-}
-
-#ifdef CONFIG_X86_X2APIC
-void check_x2apic(void)
-{
-       int msr, msr2;
-
-       if (!cpu_has_x2apic)
-               return;
-
-       rdmsr(MSR_IA32_APICBASE, msr, msr2);
-
-       if (msr & X2APIC_ENABLE) {
-               pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
-               x2apic_preenabled = x2apic = 1;
-       }
-}
-
-void enable_x2apic(void)
-{
-       int msr, msr2;
-
-       if (!x2apic)
-               return;
-
-       rdmsr(MSR_IA32_APICBASE, msr, msr2);
-       if (!(msr & X2APIC_ENABLE)) {
-               pr_info("Enabling x2apic\n");
-               wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
-       }
-}
-
-void __init enable_IR_x2apic(void)
-{
-#ifdef CONFIG_INTR_REMAP
-       int ret;
-       unsigned long flags;
-
-       if (!cpu_has_x2apic)
-               return;
-
-       if (!x2apic_preenabled && disable_x2apic) {
-               pr_info("Skipped enabling x2apic and Interrupt-remapping "
-                       "because of nox2apic\n");
-               return;
-       }
-
-       if (x2apic_preenabled && disable_x2apic)
-               panic("Bios already enabled x2apic, can't enforce nox2apic");
-
-       if (!x2apic_preenabled && skip_ioapic_setup) {
-               pr_info("Skipped enabling x2apic and Interrupt-remapping "
-                       "because of skipping io-apic setup\n");
-               return;
-       }
-
-       ret = dmar_table_init();
-       if (ret) {
-               pr_info("dmar_table_init() failed with %d:\n", ret);
-
-               if (x2apic_preenabled)
-                       panic("x2apic enabled by bios. But IR enabling failed");
-               else
-                       pr_info("Not enabling x2apic,Intr-remapping\n");
-               return;
-       }
-
-       local_irq_save(flags);
-       mask_8259A();
-
-       ret = save_mask_IO_APIC_setup();
-       if (ret) {
-               pr_info("Saving IO-APIC state failed: %d\n", ret);
-               goto end;
-       }
-
-       ret = enable_intr_remapping(1);
-
-       if (ret && x2apic_preenabled) {
-               local_irq_restore(flags);
-               panic("x2apic enabled by bios. But IR enabling failed");
-       }
-
-       if (ret)
-               goto end_restore;
-
-       if (!x2apic) {
-               x2apic = 1;
-               enable_x2apic();
-       }
-
-end_restore:
-       if (ret)
-               /*
-                * IR enabling failed
-                */
-               restore_IO_APIC_setup();
-       else
-               reinit_intr_remapped_IO_APIC(x2apic_preenabled);
-
-end:
-       unmask_8259A();
-       local_irq_restore(flags);
-
-       if (!ret) {
-               if (!x2apic_preenabled)
-                       pr_info("Enabled x2apic and interrupt-remapping\n");
-               else
-                       pr_info("Enabled Interrupt-remapping\n");
-       } else
-               pr_err("Failed to enable Interrupt-remapping and x2apic\n");
-#else
-       if (!cpu_has_x2apic)
-               return;
-
-       if (x2apic_preenabled)
-               panic("x2apic enabled prior OS handover,"
-                     " enable CONFIG_INTR_REMAP");
-
-       pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
-               " and x2apic\n");
-#endif
-
-       return;
-}
-#endif /* CONFIG_X86_X2APIC */
-
-#ifdef CONFIG_X86_64
-/*
- * Detect and enable local APICs on non-SMP boards.
- * Original code written by Keir Fraser.
- * On AMD64 we trust the BIOS - if it says no APIC it is likely
- * not correctly set up (usually the APIC timer won't work etc.)
- */
-static int __init detect_init_APIC(void)
-{
-       if (!cpu_has_apic) {
-               pr_info("No local APIC present\n");
-               return -1;
-       }
-
-       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-       boot_cpu_physical_apicid = 0;
-       return 0;
-}
-#else
-/*
- * Detect and initialize APIC
- */
-static int __init detect_init_APIC(void)
-{
-       u32 h, l, features;
-
-       /* Disabled by kernel option? */
-       if (disable_apic)
-               return -1;
-
-       switch (boot_cpu_data.x86_vendor) {
-       case X86_VENDOR_AMD:
-               if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
-                   (boot_cpu_data.x86 >= 15))
-                       break;
-               goto no_apic;
-       case X86_VENDOR_INTEL:
-               if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
-                   (boot_cpu_data.x86 == 5 && cpu_has_apic))
-                       break;
-               goto no_apic;
-       default:
-               goto no_apic;
-       }
-
-       if (!cpu_has_apic) {
-               /*
-                * Over-ride BIOS and try to enable the local APIC only if
-                * "lapic" specified.
-                */
-               if (!force_enable_local_apic) {
-                       pr_info("Local APIC disabled by BIOS -- "
-                               "you can enable it with \"lapic\"\n");
-                       return -1;
-               }
-               /*
-                * Some BIOSes disable the local APIC in the APIC_BASE
-                * MSR. This can only be done in software for Intel P6 or later
-                * and AMD K7 (Model > 1) or later.
-                */
-               rdmsr(MSR_IA32_APICBASE, l, h);
-               if (!(l & MSR_IA32_APICBASE_ENABLE)) {
-                       pr_info("Local APIC disabled by BIOS -- reenabling.\n");
-                       l &= ~MSR_IA32_APICBASE_BASE;
-                       l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
-                       wrmsr(MSR_IA32_APICBASE, l, h);
-                       enabled_via_apicbase = 1;
-               }
-       }
-       /*
-        * The APIC feature bit should now be enabled
-        * in `cpuid'
-        */
-       features = cpuid_edx(1);
-       if (!(features & (1 << X86_FEATURE_APIC))) {
-               pr_warning("Could not enable APIC!\n");
-               return -1;
-       }
-       set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
-       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
-       /* The BIOS may have set up the APIC at some other address */
-       rdmsr(MSR_IA32_APICBASE, l, h);
-       if (l & MSR_IA32_APICBASE_ENABLE)
-               mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
-
-       pr_info("Found and enabled local APIC!\n");
-
-       apic_pm_activate();
-
-       return 0;
-
-no_apic:
-       pr_info("No local APIC present or hardware disabled\n");
-       return -1;
-}
-#endif
-
-#ifdef CONFIG_X86_64
-void __init early_init_lapic_mapping(void)
-{
-       unsigned long phys_addr;
-
-       /*
-        * If no local APIC can be found then go out
-        * : it means there is no mpatable and MADT
-        */
-       if (!smp_found_config)
-               return;
-
-       phys_addr = mp_lapic_addr;
-
-       set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
-       apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-                   APIC_BASE, phys_addr);
-
-       /*
-        * Fetch the APIC ID of the BSP in case we have a
-        * default configuration (or the MP table is broken).
-        */
-       boot_cpu_physical_apicid = read_apic_id();
-}
-#endif
-
-/**
- * init_apic_mappings - initialize APIC mappings
- */
-void __init init_apic_mappings(void)
-{
-#ifdef CONFIG_X86_X2APIC
-       if (x2apic) {
-               boot_cpu_physical_apicid = read_apic_id();
-               return;
-       }
-#endif
-
-       /*
-        * If no local APIC can be found then set up a fake all
-        * zeroes page to simulate the local APIC and another
-        * one for the IO-APIC.
-        */
-       if (!smp_found_config && detect_init_APIC()) {
-               apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
-               apic_phys = __pa(apic_phys);
-       } else
-               apic_phys = mp_lapic_addr;
-
-       set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-       apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
-                               APIC_BASE, apic_phys);
-
-       /*
-        * Fetch the APIC ID of the BSP in case we have a
-        * default configuration (or the MP table is broken).
-        */
-       if (boot_cpu_physical_apicid == -1U)
-               boot_cpu_physical_apicid = read_apic_id();
-}
-
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
- */
-int apic_version[MAX_APICS];
-
-int __init APIC_init_uniprocessor(void)
-{
-       if (disable_apic) {
-               pr_info("Apic disabled\n");
-               return -1;
-       }
-#ifdef CONFIG_X86_64
-       if (!cpu_has_apic) {
-               disable_apic = 1;
-               pr_info("Apic disabled by BIOS\n");
-               return -1;
-       }
-#else
-       if (!smp_found_config && !cpu_has_apic)
-               return -1;
-
-       /*
-        * Complain if the BIOS pretends there is one.
-        */
-       if (!cpu_has_apic &&
-           APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
-               pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
-                       boot_cpu_physical_apicid);
-               clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
-               return -1;
-       }
-#endif
-
-       enable_IR_x2apic();
-#ifdef CONFIG_X86_64
-       default_setup_apic_routing();
-#endif
-
-       verify_local_APIC();
-       connect_bsp_APIC();
-
-#ifdef CONFIG_X86_64
-       apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
-#else
-       /*
-        * Hack: In case of kdump, after a crash, kernel might be booting
-        * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
-        * might be zero if read from MP tables. Get it from LAPIC.
-        */
-# ifdef CONFIG_CRASH_DUMP
-       boot_cpu_physical_apicid = read_apic_id();
-# endif
-#endif
-       physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
-       setup_local_APIC();
-
-#ifdef CONFIG_X86_IO_APIC
-       /*
-        * Now enable IO-APICs, actually call clear_IO_APIC
-        * We need clear_IO_APIC before enabling error vector
-        */
-       if (!skip_ioapic_setup && nr_ioapics)
-               enable_IO_APIC();
-#endif
-
-       end_local_APIC_setup();
-
-#ifdef CONFIG_X86_IO_APIC
-       if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
-               setup_IO_APIC();
-       else {
-               nr_ioapics = 0;
-               localise_nmi_watchdog();
-       }
-#else
-       localise_nmi_watchdog();
-#endif
-
-       setup_boot_clock();
-#ifdef CONFIG_X86_64
-       check_nmi_watchdog();
-#endif
-
-       return 0;
-}
-
-/*
- * Local APIC interrupts
- */
-
-/*
- * This interrupt should _never_ happen with our APIC/SMP architecture
- */
-void smp_spurious_interrupt(struct pt_regs *regs)
-{
-       u32 v;
-
-       exit_idle();
-       irq_enter();
-       /*
-        * Check if this really is a spurious interrupt and ACK it
-        * if it is a vectored one.  Just in case...
-        * Spurious interrupts should not be ACKed.
-        */
-       v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
-       if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
-               ack_APIC_irq();
-
-       inc_irq_stat(irq_spurious_count);
-
-       /* see sw-dev-man vol 3, chapter 7.4.13.5 */
-       pr_info("spurious APIC interrupt on CPU#%d, "
-               "should never happen.\n", smp_processor_id());
-       irq_exit();
-}
-
-/*
- * This interrupt should never happen with our APIC/SMP architecture
- */
-void smp_error_interrupt(struct pt_regs *regs)
-{
-       u32 v, v1;
-
-       exit_idle();
-       irq_enter();
-       /* First tickle the hardware, only then report what went on. -- REW */
-       v = apic_read(APIC_ESR);
-       apic_write(APIC_ESR, 0);
-       v1 = apic_read(APIC_ESR);
-       ack_APIC_irq();
-       atomic_inc(&irq_err_count);
-
-       /*
-        * Here is what the APIC error bits mean:
-        * 0: Send CS error
-        * 1: Receive CS error
-        * 2: Send accept error
-        * 3: Receive accept error
-        * 4: Reserved
-        * 5: Send illegal vector
-        * 6: Received illegal vector
-        * 7: Illegal register address
-        */
-       pr_debug("APIC error on CPU%d: %02x(%02x)\n",
-               smp_processor_id(), v , v1);
-       irq_exit();
-}
-
-/**
- * connect_bsp_APIC - attach the APIC to the interrupt system
- */
-void __init connect_bsp_APIC(void)
-{
-#ifdef CONFIG_X86_32
-       if (pic_mode) {
-               /*
-                * Do not trust the local APIC being empty at bootup.
-                */
-               clear_local_APIC();
-               /*
-                * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
-                * local APIC to INT and NMI lines.
-                */
-               apic_printk(APIC_VERBOSE, "leaving PIC mode, "
-                               "enabling APIC mode.\n");
-               outb(0x70, 0x22);
-               outb(0x01, 0x23);
-       }
-#endif
-       if (apic->enable_apic_mode)
-               apic->enable_apic_mode();
-}
-
-/**
- * disconnect_bsp_APIC - detach the APIC from the interrupt system
- * @virt_wire_setup:   indicates, whether virtual wire mode is selected
- *
- * Virtual wire mode is necessary to deliver legacy interrupts even when the
- * APIC is disabled.
- */
-void disconnect_bsp_APIC(int virt_wire_setup)
-{
-       unsigned int value;
-
-#ifdef CONFIG_X86_32
-       if (pic_mode) {
-               /*
-                * Put the board back into PIC mode (has an effect only on
-                * certain older boards).  Note that APIC interrupts, including
-                * IPIs, won't work beyond this point!  The only exception are
-                * INIT IPIs.
-                */
-               apic_printk(APIC_VERBOSE, "disabling APIC mode, "
-                               "entering PIC mode.\n");
-               outb(0x70, 0x22);
-               outb(0x00, 0x23);
-               return;
-       }
-#endif
-
-       /* Go back to Virtual Wire compatibility mode */
-
-       /* For the spurious interrupt use vector F, and enable it */
-       value = apic_read(APIC_SPIV);
-       value &= ~APIC_VECTOR_MASK;
-       value |= APIC_SPIV_APIC_ENABLED;
-       value |= 0xf;
-       apic_write(APIC_SPIV, value);
-
-       if (!virt_wire_setup) {
-               /*
-                * For LVT0 make it edge triggered, active high,
-                * external and enabled
-                */
-               value = apic_read(APIC_LVT0);
-               value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-                       APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-                       APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-               value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-               value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
-               apic_write(APIC_LVT0, value);
-       } else {
-               /* Disable LVT0 */
-               apic_write(APIC_LVT0, APIC_LVT_MASKED);
-       }
-
-       /*
-        * For LVT1 make it edge triggered, active high,
-        * nmi and enabled
-        */
-       value = apic_read(APIC_LVT1);
-       value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-                       APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-                       APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-       value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-       value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
-       apic_write(APIC_LVT1, value);
-}
-
-void __cpuinit generic_processor_info(int apicid, int version)
-{
-       int cpu;
-
-       /*
-        * Validate version
-        */
-       if (version == 0x0) {
-               pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
-                          "fixing up to 0x10. (tell your hw vendor)\n",
-                               version);
-               version = 0x10;
-       }
-       apic_version[apicid] = version;
-
-       if (num_processors >= nr_cpu_ids) {
-               int max = nr_cpu_ids;
-               int thiscpu = max + disabled_cpus;
-
-               pr_warning(
-                       "ACPI: NR_CPUS/possible_cpus limit of %i reached."
-                       "  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
-
-               disabled_cpus++;
-               return;
-       }
-
-       num_processors++;
-       cpu = cpumask_next_zero(-1, cpu_present_mask);
-
-       if (version != apic_version[boot_cpu_physical_apicid])
-               WARN_ONCE(1,
-                       "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
-                       apic_version[boot_cpu_physical_apicid], cpu, version);
-
-       physid_set(apicid, phys_cpu_present_map);
-       if (apicid == boot_cpu_physical_apicid) {
-               /*
-                * x86_bios_cpu_apicid is required to have processors listed
-                * in same order as logical cpu numbers. Hence the first
-                * entry is BSP, and so on.
-                */
-               cpu = 0;
-       }
-       if (apicid > max_physical_apicid)
-               max_physical_apicid = apicid;
-
-#ifdef CONFIG_X86_32
-       /*
-        * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
-        * but we need to work other dependencies like SMP_SUSPEND etc
-        * before this can be done without some confusion.
-        * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
-        *       - Ashok Raj <ashok.raj@intel.com>
-        */
-       if (max_physical_apicid >= 8) {
-               switch (boot_cpu_data.x86_vendor) {
-               case X86_VENDOR_INTEL:
-                       if (!APIC_XAPIC(version)) {
-                               def_to_bigsmp = 0;
-                               break;
-                       }
-                       /* If P4 and above fall through */
-               case X86_VENDOR_AMD:
-                       def_to_bigsmp = 1;
-               }
-       }
-#endif
-
-#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
-       early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
-       early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
-#endif
-
-       set_cpu_possible(cpu, true);
-       set_cpu_present(cpu, true);
-}
-
-int hard_smp_processor_id(void)
-{
-       return read_apic_id();
-}
-
-void default_init_apic_ldr(void)
-{
-       unsigned long val;
-
-       apic_write(APIC_DFR, APIC_DFR_VALUE);
-       val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
-       val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
-       apic_write(APIC_LDR, val);
-}
-
-#ifdef CONFIG_X86_32
-int default_apicid_to_node(int logical_apicid)
-{
-#ifdef CONFIG_SMP
-       return apicid_2_node[hard_smp_processor_id()];
-#else
-       return 0;
-#endif
-}
-#endif
-
-/*
- * Power management
- */
-#ifdef CONFIG_PM
-
-static struct {
-       /*
-        * 'active' is true if the local APIC was enabled by us and
-        * not the BIOS; this signifies that we are also responsible
-        * for disabling it before entering apm/acpi suspend
-        */
-       int active;
-       /* r/w apic fields */
-       unsigned int apic_id;
-       unsigned int apic_taskpri;
-       unsigned int apic_ldr;
-       unsigned int apic_dfr;
-       unsigned int apic_spiv;
-       unsigned int apic_lvtt;
-       unsigned int apic_lvtpc;
-       unsigned int apic_lvt0;
-       unsigned int apic_lvt1;
-       unsigned int apic_lvterr;
-       unsigned int apic_tmict;
-       unsigned int apic_tdcr;
-       unsigned int apic_thmr;
-} apic_pm_state;
-
-static int lapic_suspend(struct sys_device *dev, pm_message_t state)
-{
-       unsigned long flags;
-       int maxlvt;
-
-       if (!apic_pm_state.active)
-               return 0;
-
-       maxlvt = lapic_get_maxlvt();
-
-       apic_pm_state.apic_id = apic_read(APIC_ID);
-       apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
-       apic_pm_state.apic_ldr = apic_read(APIC_LDR);
-       apic_pm_state.apic_dfr = apic_read(APIC_DFR);
-       apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
-       apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
-       if (maxlvt >= 4)
-               apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
-       apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
-       apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
-       apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
-       apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
-       apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
-       if (maxlvt >= 5)
-               apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
-#endif
-
-       local_irq_save(flags);
-       disable_local_APIC();
-       local_irq_restore(flags);
-       return 0;
-}
-
-static int lapic_resume(struct sys_device *dev)
-{
-       unsigned int l, h;
-       unsigned long flags;
-       int maxlvt;
-
-       if (!apic_pm_state.active)
-               return 0;
-
-       maxlvt = lapic_get_maxlvt();
-
-       local_irq_save(flags);
-
-#ifdef CONFIG_X86_X2APIC
-       if (x2apic)
-               enable_x2apic();
-       else
-#endif
-       {
-               /*
-                * Make sure the APICBASE points to the right address
-                *
-                * FIXME! This will be wrong if we ever support suspend on
-                * SMP! We'll need to do this as part of the CPU restore!
-                */
-               rdmsr(MSR_IA32_APICBASE, l, h);
-               l &= ~MSR_IA32_APICBASE_BASE;
-               l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
-               wrmsr(MSR_IA32_APICBASE, l, h);
-       }
-
-       apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
-       apic_write(APIC_ID, apic_pm_state.apic_id);
-       apic_write(APIC_DFR, apic_pm_state.apic_dfr);
-       apic_write(APIC_LDR, apic_pm_state.apic_ldr);
-       apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
-       apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
-       apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
-       apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
-       if (maxlvt >= 5)
-               apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
-#endif
-       if (maxlvt >= 4)
-               apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
-       apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
-       apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
-       apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
-       apic_write(APIC_ESR, 0);
-       apic_read(APIC_ESR);
-       apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
-       apic_write(APIC_ESR, 0);
-       apic_read(APIC_ESR);
-
-       local_irq_restore(flags);
-
-       return 0;
-}
-
-/*
- * This device has no shutdown method - fully functioning local APICs
- * are needed on every CPU up until machine_halt/restart/poweroff.
- */
-
-static struct sysdev_class lapic_sysclass = {
-       .name           = "lapic",
-       .resume         = lapic_resume,
-       .suspend        = lapic_suspend,
-};
-
-static struct sys_device device_lapic = {
-       .id     = 0,
-       .cls    = &lapic_sysclass,
-};
-
-static void __cpuinit apic_pm_activate(void)
-{
-       apic_pm_state.active = 1;
-}
-
-static int __init init_lapic_sysfs(void)
-{
-       int error;
-
-       if (!cpu_has_apic)
-               return 0;
-       /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
-
-       error = sysdev_class_register(&lapic_sysclass);
-       if (!error)
-               error = sysdev_register(&device_lapic);
-       return error;
-}
-device_initcall(init_lapic_sysfs);
-
-#else  /* CONFIG_PM */
-
-static void apic_pm_activate(void) { }
-
-#endif /* CONFIG_PM */
-
-#ifdef CONFIG_X86_64
-/*
- * apic_is_clustered_box() -- Check if we can expect good TSC
- *
- * Thus far, the major user of this is IBM's Summit2 series:
- *
- * Clustered boxes may have unsynced TSC problems if they are
- * multi-chassis. Use available data to take a good guess.
- * If in doubt, go HPET.
- */
-__cpuinit int apic_is_clustered_box(void)
-{
-       int i, clusters, zeros;
-       unsigned id;
-       u16 *bios_cpu_apicid;
-       DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
-
-       /*
-        * there is not this kind of box with AMD CPU yet.
-        * Some AMD box with quadcore cpu and 8 sockets apicid
-        * will be [4, 0x23] or [8, 0x27] could be thought to
-        * vsmp box still need checking...
-        */
-       if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
-               return 0;
-
-       bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
-       bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
-
-       for (i = 0; i < nr_cpu_ids; i++) {
-               /* are we being called early in kernel startup? */
-               if (bios_cpu_apicid) {
-                       id = bios_cpu_apicid[i];
-               } else if (i < nr_cpu_ids) {
-                       if (cpu_present(i))
-                               id = per_cpu(x86_bios_cpu_apicid, i);
-                       else
-                               continue;
-               } else
-                       break;
-
-               if (id != BAD_APICID)
-                       __set_bit(APIC_CLUSTERID(id), clustermap);
-       }
-
-       /* Problem:  Partially populated chassis may not have CPUs in some of
-        * the APIC clusters they have been allocated.  Only present CPUs have
-        * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
-        * Since clusters are allocated sequentially, count zeros only if
-        * they are bounded by ones.
-        */
-       clusters = 0;
-       zeros = 0;
-       for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
-               if (test_bit(i, clustermap)) {
-                       clusters += 1 + zeros;
-                       zeros = 0;
-               } else
-                       ++zeros;
-       }
-
-       /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
-        * not guaranteed to be synced between boards
-        */
-       if (is_vsmp_box() && clusters > 1)
-               return 1;
-
-       /*
-        * If clusters > 2, then should be multi-chassis.
-        * May have to revisit this when multi-core + hyperthreaded CPUs come
-        * out, but AFAIK this will work even for them.
-        */
-       return (clusters > 2);
-}
-#endif
-
-/*
- * APIC command line parameters
- */
-static int __init setup_disableapic(char *arg)
-{
-       disable_apic = 1;
-       setup_clear_cpu_cap(X86_FEATURE_APIC);
-       return 0;
-}
-early_param("disableapic", setup_disableapic);
-
-/* same as disableapic, for compatibility */
-static int __init setup_nolapic(char *arg)
-{
-       return setup_disableapic(arg);
-}
-early_param("nolapic", setup_nolapic);
-
-static int __init parse_lapic_timer_c2_ok(char *arg)
-{
-       local_apic_timer_c2_ok = 1;
-       return 0;
-}
-early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-
-static int __init parse_disable_apic_timer(char *arg)
-{
-       disable_apic_timer = 1;
-       return 0;
-}
-early_param("noapictimer", parse_disable_apic_timer);
-
-static int __init parse_nolapic_timer(char *arg)
-{
-       disable_apic_timer = 1;
-       return 0;
-}
-early_param("nolapic_timer", parse_nolapic_timer);
-
-static int __init apic_set_verbosity(char *arg)
-{
-       if (!arg)  {
-#ifdef CONFIG_X86_64
-               skip_ioapic_setup = 0;
-               return 0;
-#endif
-               return -EINVAL;
-       }
-
-       if (strcmp("debug", arg) == 0)
-               apic_verbosity = APIC_DEBUG;
-       else if (strcmp("verbose", arg) == 0)
-               apic_verbosity = APIC_VERBOSE;
-       else {
-               pr_warning("APIC Verbosity level %s not recognised"
-                       " use apic=verbose or apic=debug\n", arg);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-early_param("apic", apic_set_verbosity);
-
-static int __init lapic_insert_resource(void)
-{
-       if (!apic_phys)
-               return -1;
-
-       /* Put local APIC into the resource map. */
-       lapic_resource.start = apic_phys;
-       lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
-       insert_resource(&iomem_resource, &lapic_resource);
-
-       return 0;
-}
-
-/*
- * need call insert after e820_reserve_resources()
- * that is using request_resource
- */
-late_initcall(lapic_insert_resource);
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
new file mode 100644 (file)
index 0000000..da20b70
--- /dev/null
@@ -0,0 +1,15 @@
+#
+# Makefile for local APIC drivers and for the IO-APIC code
+#
+
+obj-y                          := apic.o ipi.o nmi.o
+obj-$(CONFIG_X86_IO_APIC)      += io_apic.o
+obj-$(CONFIG_SMP)              += ipi.o
+
+ifeq ($(CONFIG_X86_64),y)
+obj-y                          += apic_64.o apic_flat_64.o
+obj-$(CONFIG_X86_X2APIC)       += x2apic_cluster.o
+obj-$(CONFIG_X86_X2APIC)       += x2apic_phys.o
+obj-$(CONFIG_X86_UV)           += x2apic_uv_x.o
+endif
+
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
new file mode 100644 (file)
index 0000000..c12823e
--- /dev/null
@@ -0,0 +1,2212 @@
+/*
+ *     Local APIC handling, local APIC timers
+ *
+ *     (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
+ *
+ *     Fixes
+ *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
+ *                                     thanks to Eric Gilmore
+ *                                     and Rolf G. Tews
+ *                                     for testing these extensively.
+ *     Maciej W. Rozycki       :       Various updates and fixes.
+ *     Mikael Pettersson       :       Power Management for UP-APIC.
+ *     Pavel Machek and
+ *     Mikael Pettersson       :       PM converted to driver model.
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/acpi_pmtmr.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/bootmem.h>
+#include <linux/ftrace.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/sysdev.h>
+#include <linux/delay.h>
+#include <linux/timex.h>
+#include <linux/dmar.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/dmi.h>
+#include <linux/nmi.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+
+#include <asm/arch_hooks.h>
+#include <asm/pgalloc.h>
+#include <asm/atomic.h>
+#include <asm/mpspec.h>
+#include <asm/i8253.h>
+#include <asm/i8259.h>
+#include <asm/proto.h>
+#include <asm/apic.h>
+#include <asm/desc.h>
+#include <asm/hpet.h>
+#include <asm/idle.h>
+#include <asm/mtrr.h>
+#include <asm/smp.h>
+
+unsigned int num_processors;
+
+unsigned disabled_cpus __cpuinitdata;
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
+
+/*
+ * The highest APIC ID seen during enumeration.
+ *
+ * This determines the messaging protocol we can use: if all APIC IDs
+ * are in the 0 ... 7 range, then we can use logical addressing which
+ * has some performance advantages (better broadcasting).
+ *
+ * If there's an APIC ID above 8, we use physical addressing.
+ */
+unsigned int max_physical_apicid;
+
+/*
+ * Bitmask of physically existing CPUs:
+ */
+physid_mask_t phys_cpu_present_map;
+
+/*
+ * Map cpu index to physical APIC ID
+ */
+DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
+DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+
+#ifdef CONFIG_X86_32
+/*
+ * Knob to control our willingness to enable the local APIC.
+ *
+ * +1=force-enable
+ */
+static int force_enable_local_apic;
+/*
+ * APIC command line parameters
+ */
+static int __init parse_lapic(char *arg)
+{
+       force_enable_local_apic = 1;
+       return 0;
+}
+early_param("lapic", parse_lapic);
+/* Local APIC was disabled by the BIOS and enabled by the kernel */
+static int enabled_via_apicbase;
+
+#endif
+
+#ifdef CONFIG_X86_64
+static int apic_calibrate_pmtmr __initdata;
+static __init int setup_apicpmtimer(char *s)
+{
+       apic_calibrate_pmtmr = 1;
+       notsc_setup(NULL);
+       return 0;
+}
+__setup("apicpmtimer", setup_apicpmtimer);
+#endif
+
+#ifdef CONFIG_X86_X2APIC
+int x2apic;
+/* x2apic enabled before OS handover */
+static int x2apic_preenabled;
+static int disable_x2apic;
+static __init int setup_nox2apic(char *str)
+{
+       disable_x2apic = 1;
+       setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+       return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+#endif
+
+unsigned long mp_lapic_addr;
+int disable_apic;
+/* Disable local APIC timer from the kernel commandline or via dmi quirk */
+static int disable_apic_timer __cpuinitdata;
+/* Local APIC timer works in C2 */
+int local_apic_timer_c2_ok;
+EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
+
+int first_system_vector = 0xfe;
+
+/*
+ * Debug level, exported for io_apic.c
+ */
+unsigned int apic_verbosity;
+
+int pic_mode;
+
+/* Have we found an MP table */
+int smp_found_config;
+
+static struct resource lapic_resource = {
+       .name = "Local APIC",
+       .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
+};
+
+static unsigned int calibration_result;
+
+static int lapic_next_event(unsigned long delta,
+                           struct clock_event_device *evt);
+static void lapic_timer_setup(enum clock_event_mode mode,
+                             struct clock_event_device *evt);
+static void lapic_timer_broadcast(const struct cpumask *mask);
+static void apic_pm_activate(void);
+
+/*
+ * The local apic timer can be used for any function which is CPU local.
+ */
+static struct clock_event_device lapic_clockevent = {
+       .name           = "lapic",
+       .features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
+                       | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
+       .shift          = 32,
+       .set_mode       = lapic_timer_setup,
+       .set_next_event = lapic_next_event,
+       .broadcast      = lapic_timer_broadcast,
+       .rating         = 100,
+       .irq            = -1,
+};
+static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
+
+static unsigned long apic_phys;
+
+/*
+ * Get the LAPIC version
+ */
+static inline int lapic_get_version(void)
+{
+       return GET_APIC_VERSION(apic_read(APIC_LVR));
+}
+
+/*
+ * Check, if the APIC is integrated or a separate chip
+ */
+static inline int lapic_is_integrated(void)
+{
+#ifdef CONFIG_X86_64
+       return 1;
+#else
+       return APIC_INTEGRATED(lapic_get_version());
+#endif
+}
+
+/*
+ * Check, whether this is a modern or a first generation APIC
+ */
+static int modern_apic(void)
+{
+       /* AMD systems use old APIC versions, so check the CPU */
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+           boot_cpu_data.x86 >= 0xf)
+               return 1;
+       return lapic_get_version() >= 0x14;
+}
+
+void native_apic_wait_icr_idle(void)
+{
+       while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
+               cpu_relax();
+}
+
+u32 native_safe_apic_wait_icr_idle(void)
+{
+       u32 send_status;
+       int timeout;
+
+       timeout = 0;
+       do {
+               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+               if (!send_status)
+                       break;
+               udelay(100);
+       } while (timeout++ < 1000);
+
+       return send_status;
+}
+
+void native_apic_icr_write(u32 low, u32 id)
+{
+       apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+       apic_write(APIC_ICR, low);
+}
+
+u64 native_apic_icr_read(void)
+{
+       u32 icr1, icr2;
+
+       icr2 = apic_read(APIC_ICR2);
+       icr1 = apic_read(APIC_ICR);
+
+       return icr1 | ((u64)icr2 << 32);
+}
+
+/**
+ * enable_NMI_through_LVT0 - enable NMI through local vector table 0
+ */
+void __cpuinit enable_NMI_through_LVT0(void)
+{
+       unsigned int v;
+
+       /* unmask and set to NMI */
+       v = APIC_DM_NMI;
+
+       /* Level triggered for 82489DX (32bit mode) */
+       if (!lapic_is_integrated())
+               v |= APIC_LVT_LEVEL_TRIGGER;
+
+       apic_write(APIC_LVT0, v);
+}
+
+#ifdef CONFIG_X86_32
+/**
+ * get_physical_broadcast - Get number of physical broadcast IDs
+ */
+int get_physical_broadcast(void)
+{
+       return modern_apic() ? 0xff : 0xf;
+}
+#endif
+
+/**
+ * lapic_get_maxlvt - get the maximum number of local vector table entries
+ */
+int lapic_get_maxlvt(void)
+{
+       unsigned int v;
+
+       v = apic_read(APIC_LVR);
+       /*
+        * - we always have APIC integrated on 64bit mode
+        * - 82489DXs do not report # of LVT entries
+        */
+       return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
+}
+
+/*
+ * Local APIC timer
+ */
+
+/* Clock divisor */
+#define APIC_DIVISOR 16
+
+/*
+ * This function sets up the local APIC timer, with a timeout of
+ * 'clocks' APIC bus clock. During calibration we actually call
+ * this function twice on the boot CPU, once with a bogus timeout
+ * value, second time for real. The other (noncalibrating) CPUs
+ * call this function only once, with the real, calibrated value.
+ *
+ * We do reads before writes even if unnecessary, to get around the
+ * P5 APIC double write bug.
+ */
+static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
+{
+       unsigned int lvtt_value, tmp_value;
+
+       lvtt_value = LOCAL_TIMER_VECTOR;
+       if (!oneshot)
+               lvtt_value |= APIC_LVT_TIMER_PERIODIC;
+       if (!lapic_is_integrated())
+               lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
+
+       if (!irqen)
+               lvtt_value |= APIC_LVT_MASKED;
+
+       apic_write(APIC_LVTT, lvtt_value);
+
+       /*
+        * Divide PICLK by 16
+        */
+       tmp_value = apic_read(APIC_TDCR);
+       apic_write(APIC_TDCR,
+               (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
+               APIC_TDR_DIV_16);
+
+       if (!oneshot)
+               apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
+}
+
+/*
+ * Setup extended LVT, AMD specific (K8, family 10h)
+ *
+ * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
+ * MCE interrupts are supported. Thus MCE offset must be set to 0.
+ *
+ * If mask=1, the LVT entry does not generate interrupts while mask=0
+ * enables the vector. See also the BKDGs.
+ */
+
+#define APIC_EILVT_LVTOFF_MCE 0
+#define APIC_EILVT_LVTOFF_IBS 1
+
+static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
+{
+       unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
+       unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
+
+       apic_write(reg, v);
+}
+
+u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
+{
+       setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
+       return APIC_EILVT_LVTOFF_MCE;
+}
+
+u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
+{
+       setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
+       return APIC_EILVT_LVTOFF_IBS;
+}
+EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
+
+/*
+ * Program the next event, relative to now
+ */
+static int lapic_next_event(unsigned long delta,
+                           struct clock_event_device *evt)
+{
+       apic_write(APIC_TMICT, delta);
+       return 0;
+}
+
+/*
+ * Setup the lapic timer in periodic or oneshot mode
+ */
+static void lapic_timer_setup(enum clock_event_mode mode,
+                             struct clock_event_device *evt)
+{
+       unsigned long flags;
+       unsigned int v;
+
+       /* Lapic used as dummy for broadcast ? */
+       if (evt->features & CLOCK_EVT_FEAT_DUMMY)
+               return;
+
+       local_irq_save(flags);
+
+       switch (mode) {
+       case CLOCK_EVT_MODE_PERIODIC:
+       case CLOCK_EVT_MODE_ONESHOT:
+               __setup_APIC_LVTT(calibration_result,
+                                 mode != CLOCK_EVT_MODE_PERIODIC, 1);
+               break;
+       case CLOCK_EVT_MODE_UNUSED:
+       case CLOCK_EVT_MODE_SHUTDOWN:
+               v = apic_read(APIC_LVTT);
+               v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+               apic_write(APIC_LVTT, v);
+               apic_write(APIC_TMICT, 0xffffffff);
+               break;
+       case CLOCK_EVT_MODE_RESUME:
+               /* Nothing to do here */
+               break;
+       }
+
+       local_irq_restore(flags);
+}
+
+/*
+ * Local APIC timer broadcast function
+ */
+static void lapic_timer_broadcast(const struct cpumask *mask)
+{
+#ifdef CONFIG_SMP
+       apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+#endif
+}
+
+/*
+ * Setup the local APIC timer for this CPU. Copy the initilized values
+ * of the boot CPU and register the clock event in the framework.
+ */
+static void __cpuinit setup_APIC_timer(void)
+{
+       struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+
+       memcpy(levt, &lapic_clockevent, sizeof(*levt));
+       levt->cpumask = cpumask_of(smp_processor_id());
+
+       clockevents_register_device(levt);
+}
+
+/*
+ * In this functions we calibrate APIC bus clocks to the external timer.
+ *
+ * We want to do the calibration only once since we want to have local timer
+ * irqs syncron. CPUs connected by the same APIC bus have the very same bus
+ * frequency.
+ *
+ * This was previously done by reading the PIT/HPET and waiting for a wrap
+ * around to find out, that a tick has elapsed. I have a box, where the PIT
+ * readout is broken, so it never gets out of the wait loop again. This was
+ * also reported by others.
+ *
+ * Monitoring the jiffies value is inaccurate and the clockevents
+ * infrastructure allows us to do a simple substitution of the interrupt
+ * handler.
+ *
+ * The calibration routine also uses the pm_timer when possible, as the PIT
+ * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
+ * back to normal later in the boot process).
+ */
+
+#define LAPIC_CAL_LOOPS                (HZ/10)
+
+static __initdata int lapic_cal_loops = -1;
+static __initdata long lapic_cal_t1, lapic_cal_t2;
+static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
+static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
+static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
+
+/*
+ * Temporary interrupt handler.
+ */
+static void __init lapic_cal_handler(struct clock_event_device *dev)
+{
+       unsigned long long tsc = 0;
+       long tapic = apic_read(APIC_TMCCT);
+       unsigned long pm = acpi_pm_read_early();
+
+       if (cpu_has_tsc)
+               rdtscll(tsc);
+
+       switch (lapic_cal_loops++) {
+       case 0:
+               lapic_cal_t1 = tapic;
+               lapic_cal_tsc1 = tsc;
+               lapic_cal_pm1 = pm;
+               lapic_cal_j1 = jiffies;
+               break;
+
+       case LAPIC_CAL_LOOPS:
+               lapic_cal_t2 = tapic;
+               lapic_cal_tsc2 = tsc;
+               if (pm < lapic_cal_pm1)
+                       pm += ACPI_PM_OVRRUN;
+               lapic_cal_pm2 = pm;
+               lapic_cal_j2 = jiffies;
+               break;
+       }
+}
+
+static int __init
+calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
+{
+       const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
+       const long pm_thresh = pm_100ms / 100;
+       unsigned long mult;
+       u64 res;
+
+#ifndef CONFIG_X86_PM_TIMER
+       return -1;
+#endif
+
+       apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
+
+       /* Check, if the PM timer is available */
+       if (!deltapm)
+               return -1;
+
+       mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
+
+       if (deltapm > (pm_100ms - pm_thresh) &&
+           deltapm < (pm_100ms + pm_thresh)) {
+               apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
+               return 0;
+       }
+
+       res = (((u64)deltapm) *  mult) >> 22;
+       do_div(res, 1000000);
+       pr_warning("APIC calibration not consistent "
+                  "with PM-Timer: %ldms instead of 100ms\n",(long)res);
+
+       /* Correct the lapic counter value */
+       res = (((u64)(*delta)) * pm_100ms);
+       do_div(res, deltapm);
+       pr_info("APIC delta adjusted to PM-Timer: "
+               "%lu (%ld)\n", (unsigned long)res, *delta);
+       *delta = (long)res;
+
+       /* Correct the tsc counter value */
+       if (cpu_has_tsc) {
+               res = (((u64)(*deltatsc)) * pm_100ms);
+               do_div(res, deltapm);
+               apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
+                                         "PM-Timer: %lu (%ld) \n",
+                                       (unsigned long)res, *deltatsc);
+               *deltatsc = (long)res;
+       }
+
+       return 0;
+}
+
+static int __init calibrate_APIC_clock(void)
+{
+       struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+       void (*real_handler)(struct clock_event_device *dev);
+       unsigned long deltaj;
+       long delta, deltatsc;
+       int pm_referenced = 0;
+
+       local_irq_disable();
+
+       /* Replace the global interrupt handler */
+       real_handler = global_clock_event->event_handler;
+       global_clock_event->event_handler = lapic_cal_handler;
+
+       /*
+        * Setup the APIC counter to maximum. There is no way the lapic
+        * can underflow in the 100ms detection time frame
+        */
+       __setup_APIC_LVTT(0xffffffff, 0, 0);
+
+       /* Let the interrupts run */
+       local_irq_enable();
+
+       while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
+               cpu_relax();
+
+       local_irq_disable();
+
+       /* Restore the real event handler */
+       global_clock_event->event_handler = real_handler;
+
+       /* Build delta t1-t2 as apic timer counts down */
+       delta = lapic_cal_t1 - lapic_cal_t2;
+       apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
+
+       deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
+
+       /* we trust the PM based calibration if possible */
+       pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
+                                       &delta, &deltatsc);
+
+       /* Calculate the scaled math multiplication factor */
+       lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
+                                      lapic_clockevent.shift);
+       lapic_clockevent.max_delta_ns =
+               clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+       lapic_clockevent.min_delta_ns =
+               clockevent_delta2ns(0xF, &lapic_clockevent);
+
+       calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+
+       apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
+       apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult);
+       apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
+                   calibration_result);
+
+       if (cpu_has_tsc) {
+               apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
+                           "%ld.%04ld MHz.\n",
+                           (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
+                           (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
+       }
+
+       apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
+                   "%u.%04u MHz.\n",
+                   calibration_result / (1000000 / HZ),
+                   calibration_result % (1000000 / HZ));
+
+       /*
+        * Do a sanity check on the APIC calibration result
+        */
+       if (calibration_result < (1000000 / HZ)) {
+               local_irq_enable();
+               pr_warning("APIC frequency too slow, disabling apic timer\n");
+               return -1;
+       }
+
+       levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
+
+       /*
+        * PM timer calibration failed or not turned on
+        * so lets try APIC timer based calibration
+        */
+       if (!pm_referenced) {
+               apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
+
+               /*
+                * Setup the apic timer manually
+                */
+               levt->event_handler = lapic_cal_handler;
+               lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
+               lapic_cal_loops = -1;
+
+               /* Let the interrupts run */
+               local_irq_enable();
+
+               while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
+                       cpu_relax();
+
+               /* Stop the lapic timer */
+               lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
+
+               /* Jiffies delta */
+               deltaj = lapic_cal_j2 - lapic_cal_j1;
+               apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
+
+               /* Check, if the jiffies result is consistent */
+               if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
+                       apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
+               else
+                       levt->features |= CLOCK_EVT_FEAT_DUMMY;
+       } else
+               local_irq_enable();
+
+       if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
+               pr_warning("APIC timer disabled due to verification failure\n");
+                       return -1;
+       }
+
+       return 0;
+}
+
+/*
+ * Setup the boot APIC
+ *
+ * Calibrate and verify the result.
+ */
+void __init setup_boot_APIC_clock(void)
+{
+       /*
+        * The local apic timer can be disabled via the kernel
+        * commandline or from the CPU detection code. Register the lapic
+        * timer as a dummy clock event source on SMP systems, so the
+        * broadcast mechanism is used. On UP systems simply ignore it.
+        */
+       if (disable_apic_timer) {
+               pr_info("Disabling APIC timer\n");
+               /* No broadcast on UP ! */
+               if (num_possible_cpus() > 1) {
+                       lapic_clockevent.mult = 1;
+                       setup_APIC_timer();
+               }
+               return;
+       }
+
+       apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
+                   "calibrating APIC timer ...\n");
+
+       if (calibrate_APIC_clock()) {
+               /* No broadcast on UP ! */
+               if (num_possible_cpus() > 1)
+                       setup_APIC_timer();
+               return;
+       }
+
+       /*
+        * If nmi_watchdog is set to IO_APIC, we need the
+        * PIT/HPET going.  Otherwise register lapic as a dummy
+        * device.
+        */
+       if (nmi_watchdog != NMI_IO_APIC)
+               lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+       else
+               pr_warning("APIC timer registered as dummy,"
+                       " due to nmi_watchdog=%d!\n", nmi_watchdog);
+
+       /* Setup the lapic or request the broadcast */
+       setup_APIC_timer();
+}
+
+void __cpuinit setup_secondary_APIC_clock(void)
+{
+       setup_APIC_timer();
+}
+
+/*
+ * The guts of the apic timer interrupt
+ */
+static void local_apic_timer_interrupt(void)
+{
+       int cpu = smp_processor_id();
+       struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
+
+       /*
+        * Normally we should not be here till LAPIC has been initialized but
+        * in some cases like kdump, its possible that there is a pending LAPIC
+        * timer interrupt from previous kernel's context and is delivered in
+        * new kernel the moment interrupts are enabled.
+        *
+        * Interrupts are enabled early and LAPIC is setup much later, hence
+        * its possible that when we get here evt->event_handler is NULL.
+        * Check for event_handler being NULL and discard the interrupt as
+        * spurious.
+        */
+       if (!evt->event_handler) {
+               pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+               /* Switch it off */
+               lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
+               return;
+       }
+
+       /*
+        * the NMI deadlock-detector uses this.
+        */
+       inc_irq_stat(apic_timer_irqs);
+
+       evt->event_handler(evt);
+}
+
+/*
+ * Local APIC timer interrupt. This is the most natural way for doing
+ * local interrupts, but local timer interrupts can be emulated by
+ * broadcast interrupts too. [in case the hw doesn't support APIC timers]
+ *
+ * [ if a single-CPU system runs an SMP kernel then we call the local
+ *   interrupt as well. Thus we cannot inline the local irq ... ]
+ */
+void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
+{
+       struct pt_regs *old_regs = set_irq_regs(regs);
+
+       /*
+        * NOTE! We'd better ACK the irq immediately,
+        * because timer handling can be slow.
+        */
+       ack_APIC_irq();
+       /*
+        * update_process_times() expects us to have done irq_enter().
+        * Besides, if we don't timer interrupts ignore the global
+        * interrupt lock, which is the WrongThing (tm) to do.
+        */
+       exit_idle();
+       irq_enter();
+       local_apic_timer_interrupt();
+       irq_exit();
+
+       set_irq_regs(old_regs);
+}
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+       return -EINVAL;
+}
+
+/*
+ * Local APIC start and shutdown
+ */
+
+/**
+ * clear_local_APIC - shutdown the local APIC
+ *
+ * This is called, when a CPU is disabled and before rebooting, so the state of
+ * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
+ * leftovers during boot.
+ */
+void clear_local_APIC(void)
+{
+       int maxlvt;
+       u32 v;
+
+       /* APIC hasn't been mapped yet */
+       if (!apic_phys)
+               return;
+
+       maxlvt = lapic_get_maxlvt();
+       /*
+        * Masking an LVT entry can trigger a local APIC error
+        * if the vector is zero. Mask LVTERR first to prevent this.
+        */
+       if (maxlvt >= 3) {
+               v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
+               apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+       }
+       /*
+        * Careful: we have to set masks only first to deassert
+        * any level-triggered sources.
+        */
+       v = apic_read(APIC_LVTT);
+       apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
+       v = apic_read(APIC_LVT0);
+       apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+       v = apic_read(APIC_LVT1);
+       apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
+       if (maxlvt >= 4) {
+               v = apic_read(APIC_LVTPC);
+               apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
+       }
+
+       /* lets not touch this if we didn't frob it */
+#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
+       if (maxlvt >= 5) {
+               v = apic_read(APIC_LVTTHMR);
+               apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
+       }
+#endif
+       /*
+        * Clean APIC state for other OSs:
+        */
+       apic_write(APIC_LVTT, APIC_LVT_MASKED);
+       apic_write(APIC_LVT0, APIC_LVT_MASKED);
+       apic_write(APIC_LVT1, APIC_LVT_MASKED);
+       if (maxlvt >= 3)
+               apic_write(APIC_LVTERR, APIC_LVT_MASKED);
+       if (maxlvt >= 4)
+               apic_write(APIC_LVTPC, APIC_LVT_MASKED);
+
+       /* Integrated APIC (!82489DX) ? */
+       if (lapic_is_integrated()) {
+               if (maxlvt > 3)
+                       /* Clear ESR due to Pentium errata 3AP and 11AP */
+                       apic_write(APIC_ESR, 0);
+               apic_read(APIC_ESR);
+       }
+}
+
+/**
+ * disable_local_APIC - clear and disable the local APIC
+ */
+void disable_local_APIC(void)
+{
+       unsigned int value;
+
+       /* APIC hasn't been mapped yet */
+       if (!apic_phys)
+               return;
+
+       clear_local_APIC();
+
+       /*
+        * Disable APIC (implies clearing of registers
+        * for 82489DX!).
+        */
+       value = apic_read(APIC_SPIV);
+       value &= ~APIC_SPIV_APIC_ENABLED;
+       apic_write(APIC_SPIV, value);
+
+#ifdef CONFIG_X86_32
+       /*
+        * When LAPIC was disabled by the BIOS and enabled by the kernel,
+        * restore the disabled state.
+        */
+       if (enabled_via_apicbase) {
+               unsigned int l, h;
+
+               rdmsr(MSR_IA32_APICBASE, l, h);
+               l &= ~MSR_IA32_APICBASE_ENABLE;
+               wrmsr(MSR_IA32_APICBASE, l, h);
+       }
+#endif
+}
+
+/*
+ * If Linux enabled the LAPIC against the BIOS default disable it down before
+ * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
+ * not power-off.  Additionally clear all LVT entries before disable_local_APIC
+ * for the case where Linux didn't enable the LAPIC.
+ */
+void lapic_shutdown(void)
+{
+       unsigned long flags;
+
+       if (!cpu_has_apic)
+               return;
+
+       local_irq_save(flags);
+
+#ifdef CONFIG_X86_32
+       if (!enabled_via_apicbase)
+               clear_local_APIC();
+       else
+#endif
+               disable_local_APIC();
+
+
+       local_irq_restore(flags);
+}
+
+/*
+ * This is to verify that we're looking at a real local APIC.
+ * Check these against your board if the CPUs aren't getting
+ * started for no apparent reason.
+ */
+int __init verify_local_APIC(void)
+{
+       unsigned int reg0, reg1;
+
+       /*
+        * The version register is read-only in a real APIC.
+        */
+       reg0 = apic_read(APIC_LVR);
+       apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
+       apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
+       reg1 = apic_read(APIC_LVR);
+       apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
+
+       /*
+        * The two version reads above should print the same
+        * numbers.  If the second one is different, then we
+        * poke at a non-APIC.
+        */
+       if (reg1 != reg0)
+               return 0;
+
+       /*
+        * Check if the version looks reasonably.
+        */
+       reg1 = GET_APIC_VERSION(reg0);
+       if (reg1 == 0x00 || reg1 == 0xff)
+               return 0;
+       reg1 = lapic_get_maxlvt();
+       if (reg1 < 0x02 || reg1 == 0xff)
+               return 0;
+
+       /*
+        * The ID register is read/write in a real APIC.
+        */
+       reg0 = apic_read(APIC_ID);
+       apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
+       apic_write(APIC_ID, reg0 ^ apic->apic_id_mask);
+       reg1 = apic_read(APIC_ID);
+       apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
+       apic_write(APIC_ID, reg0);
+       if (reg1 != (reg0 ^ apic->apic_id_mask))
+               return 0;
+
+       /*
+        * The next two are just to see if we have sane values.
+        * They're only really relevant if we're in Virtual Wire
+        * compatibility mode, but most boxes are anymore.
+        */
+       reg0 = apic_read(APIC_LVT0);
+       apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
+       reg1 = apic_read(APIC_LVT1);
+       apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
+
+       return 1;
+}
+
+/**
+ * sync_Arb_IDs - synchronize APIC bus arbitration IDs
+ */
+void __init sync_Arb_IDs(void)
+{
+       /*
+        * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
+        * needed on AMD.
+        */
+       if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+               return;
+
+       /*
+        * Wait for idle.
+        */
+       apic_wait_icr_idle();
+
+       apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
+       apic_write(APIC_ICR, APIC_DEST_ALLINC |
+                       APIC_INT_LEVELTRIG | APIC_DM_INIT);
+}
+
+/*
+ * An initial setup of the virtual wire mode.
+ */
+void __init init_bsp_APIC(void)
+{
+       unsigned int value;
+
+       /*
+        * Don't do the setup now if we have a SMP BIOS as the
+        * through-I/O-APIC virtual wire mode might be active.
+        */
+       if (smp_found_config || !cpu_has_apic)
+               return;
+
+       /*
+        * Do not trust the local APIC being empty at bootup.
+        */
+       clear_local_APIC();
+
+       /*
+        * Enable APIC.
+        */
+       value = apic_read(APIC_SPIV);
+       value &= ~APIC_VECTOR_MASK;
+       value |= APIC_SPIV_APIC_ENABLED;
+
+#ifdef CONFIG_X86_32
+       /* This bit is reserved on P4/Xeon and should be cleared */
+       if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+           (boot_cpu_data.x86 == 15))
+               value &= ~APIC_SPIV_FOCUS_DISABLED;
+       else
+#endif
+               value |= APIC_SPIV_FOCUS_DISABLED;
+       value |= SPURIOUS_APIC_VECTOR;
+       apic_write(APIC_SPIV, value);
+
+       /*
+        * Set up the virtual wire mode.
+        */
+       apic_write(APIC_LVT0, APIC_DM_EXTINT);
+       value = APIC_DM_NMI;
+       if (!lapic_is_integrated())             /* 82489DX */
+               value |= APIC_LVT_LEVEL_TRIGGER;
+       apic_write(APIC_LVT1, value);
+}
+
+static void __cpuinit lapic_setup_esr(void)
+{
+       unsigned int oldvalue, value, maxlvt;
+
+       if (!lapic_is_integrated()) {
+               pr_info("No ESR for 82489DX.\n");
+               return;
+       }
+
+       if (apic->disable_esr) {
+               /*
+                * Something untraceable is creating bad interrupts on
+                * secondary quads ... for the moment, just leave the
+                * ESR disabled - we can't do anything useful with the
+                * errors anyway - mbligh
+                */
+               pr_info("Leaving ESR disabled.\n");
+               return;
+       }
+
+       maxlvt = lapic_get_maxlvt();
+       if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+               apic_write(APIC_ESR, 0);
+       oldvalue = apic_read(APIC_ESR);
+
+       /* enables sending errors */
+       value = ERROR_APIC_VECTOR;
+       apic_write(APIC_LVTERR, value);
+
+       /*
+        * spec says clear errors after enabling vector.
+        */
+       if (maxlvt > 3)
+               apic_write(APIC_ESR, 0);
+       value = apic_read(APIC_ESR);
+       if (value != oldvalue)
+               apic_printk(APIC_VERBOSE, "ESR value before enabling "
+                       "vector: 0x%08x  after: 0x%08x\n",
+                       oldvalue, value);
+}
+
+
+/**
+ * setup_local_APIC - setup the local APIC
+ */
+void __cpuinit setup_local_APIC(void)
+{
+       unsigned int value;
+       int i, j;
+
+       if (disable_apic) {
+               arch_disable_smp_support();
+               return;
+       }
+
+#ifdef CONFIG_X86_32
+       /* Pound the ESR really hard over the head with a big hammer - mbligh */
+       if (lapic_is_integrated() && apic->disable_esr) {
+               apic_write(APIC_ESR, 0);
+               apic_write(APIC_ESR, 0);
+               apic_write(APIC_ESR, 0);
+               apic_write(APIC_ESR, 0);
+       }
+#endif
+
+       preempt_disable();
+
+       /*
+        * Double-check whether this APIC is really registered.
+        * This is meaningless in clustered apic mode, so we skip it.
+        */
+       if (!apic->apic_id_registered())
+               BUG();
+
+       /*
+        * Intel recommends to set DFR, LDR and TPR before enabling
+        * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
+        * document number 292116).  So here it goes...
+        */
+       apic->init_apic_ldr();
+
+       /*
+        * Set Task Priority to 'accept all'. We never change this
+        * later on.
+        */
+       value = apic_read(APIC_TASKPRI);
+       value &= ~APIC_TPRI_MASK;
+       apic_write(APIC_TASKPRI, value);
+
+       /*
+        * After a crash, we no longer service the interrupts and a pending
+        * interrupt from previous kernel might still have ISR bit set.
+        *
+        * Most probably by now CPU has serviced that pending interrupt and
+        * it might not have done the ack_APIC_irq() because it thought,
+        * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
+        * does not clear the ISR bit and cpu thinks it has already serivced
+        * the interrupt. Hence a vector might get locked. It was noticed
+        * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
+        */
+       for (i = APIC_ISR_NR - 1; i >= 0; i--) {
+               value = apic_read(APIC_ISR + i*0x10);
+               for (j = 31; j >= 0; j--) {
+                       if (value & (1<<j))
+                               ack_APIC_irq();
+               }
+       }
+
+       /*
+        * Now that we are all set up, enable the APIC
+        */
+       value = apic_read(APIC_SPIV);
+       value &= ~APIC_VECTOR_MASK;
+       /*
+        * Enable APIC
+        */
+       value |= APIC_SPIV_APIC_ENABLED;
+
+#ifdef CONFIG_X86_32
+       /*
+        * Some unknown Intel IO/APIC (or APIC) errata is biting us with
+        * certain networking cards. If high frequency interrupts are
+        * happening on a particular IOAPIC pin, plus the IOAPIC routing
+        * entry is masked/unmasked at a high rate as well then sooner or
+        * later IOAPIC line gets 'stuck', no more interrupts are received
+        * from the device. If focus CPU is disabled then the hang goes
+        * away, oh well :-(
+        *
+        * [ This bug can be reproduced easily with a level-triggered
+        *   PCI Ne2000 networking cards and PII/PIII processors, dual
+        *   BX chipset. ]
+        */
+       /*
+        * Actually disabling the focus CPU check just makes the hang less
+        * frequent as it makes the interrupt distributon model be more
+        * like LRU than MRU (the short-term load is more even across CPUs).
+        * See also the comment in end_level_ioapic_irq().  --macro
+        */
+
+       /*
+        * - enable focus processor (bit==0)
+        * - 64bit mode always use processor focus
+        *   so no need to set it
+        */
+       value &= ~APIC_SPIV_FOCUS_DISABLED;
+#endif
+
+       /*
+        * Set spurious IRQ vector
+        */
+       value |= SPURIOUS_APIC_VECTOR;
+       apic_write(APIC_SPIV, value);
+
+       /*
+        * Set up LVT0, LVT1:
+        *
+        * set up through-local-APIC on the BP's LINT0. This is not
+        * strictly necessary in pure symmetric-IO mode, but sometimes
+        * we delegate interrupts to the 8259A.
+        */
+       /*
+        * TODO: set up through-local-APIC from through-I/O-APIC? --macro
+        */
+       value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
+       if (!smp_processor_id() && (pic_mode || !value)) {
+               value = APIC_DM_EXTINT;
+               apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
+                               smp_processor_id());
+       } else {
+               value = APIC_DM_EXTINT | APIC_LVT_MASKED;
+               apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
+                               smp_processor_id());
+       }
+       apic_write(APIC_LVT0, value);
+
+       /*
+        * only the BP should see the LINT1 NMI signal, obviously.
+        */
+       if (!smp_processor_id())
+               value = APIC_DM_NMI;
+       else
+               value = APIC_DM_NMI | APIC_LVT_MASKED;
+       if (!lapic_is_integrated())             /* 82489DX */
+               value |= APIC_LVT_LEVEL_TRIGGER;
+       apic_write(APIC_LVT1, value);
+
+       preempt_enable();
+}
+
+void __cpuinit end_local_APIC_setup(void)
+{
+       lapic_setup_esr();
+
+#ifdef CONFIG_X86_32
+       {
+               unsigned int value;
+               /* Disable the local apic timer */
+               value = apic_read(APIC_LVTT);
+               value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+               apic_write(APIC_LVTT, value);
+       }
+#endif
+
+       setup_apic_nmi_watchdog(NULL);
+       apic_pm_activate();
+}
+
+#ifdef CONFIG_X86_X2APIC
+void check_x2apic(void)
+{
+       int msr, msr2;
+
+       if (!cpu_has_x2apic)
+               return;
+
+       rdmsr(MSR_IA32_APICBASE, msr, msr2);
+
+       if (msr & X2APIC_ENABLE) {
+               pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
+               x2apic_preenabled = x2apic = 1;
+       }
+}
+
+void enable_x2apic(void)
+{
+       int msr, msr2;
+
+       if (!x2apic)
+               return;
+
+       rdmsr(MSR_IA32_APICBASE, msr, msr2);
+       if (!(msr & X2APIC_ENABLE)) {
+               pr_info("Enabling x2apic\n");
+               wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
+       }
+}
+
+void __init enable_IR_x2apic(void)
+{
+#ifdef CONFIG_INTR_REMAP
+       int ret;
+       unsigned long flags;
+
+       if (!cpu_has_x2apic)
+               return;
+
+       if (!x2apic_preenabled && disable_x2apic) {
+               pr_info("Skipped enabling x2apic and Interrupt-remapping "
+                       "because of nox2apic\n");
+               return;
+       }
+
+       if (x2apic_preenabled && disable_x2apic)
+               panic("Bios already enabled x2apic, can't enforce nox2apic");
+
+       if (!x2apic_preenabled && skip_ioapic_setup) {
+               pr_info("Skipped enabling x2apic and Interrupt-remapping "
+                       "because of skipping io-apic setup\n");
+               return;
+       }
+
+       ret = dmar_table_init();
+       if (ret) {
+               pr_info("dmar_table_init() failed with %d:\n", ret);
+
+               if (x2apic_preenabled)
+                       panic("x2apic enabled by bios. But IR enabling failed");
+               else
+                       pr_info("Not enabling x2apic,Intr-remapping\n");
+               return;
+       }
+
+       local_irq_save(flags);
+       mask_8259A();
+
+       ret = save_mask_IO_APIC_setup();
+       if (ret) {
+               pr_info("Saving IO-APIC state failed: %d\n", ret);
+               goto end;
+       }
+
+       ret = enable_intr_remapping(1);
+
+       if (ret && x2apic_preenabled) {
+               local_irq_restore(flags);
+               panic("x2apic enabled by bios. But IR enabling failed");
+       }
+
+       if (ret)
+               goto end_restore;
+
+       if (!x2apic) {
+               x2apic = 1;
+               enable_x2apic();
+       }
+
+end_restore:
+       if (ret)
+               /*
+                * IR enabling failed
+                */
+               restore_IO_APIC_setup();
+       else
+               reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+
+end:
+       unmask_8259A();
+       local_irq_restore(flags);
+
+       if (!ret) {
+               if (!x2apic_preenabled)
+                       pr_info("Enabled x2apic and interrupt-remapping\n");
+               else
+                       pr_info("Enabled Interrupt-remapping\n");
+       } else
+               pr_err("Failed to enable Interrupt-remapping and x2apic\n");
+#else
+       if (!cpu_has_x2apic)
+               return;
+
+       if (x2apic_preenabled)
+               panic("x2apic enabled prior OS handover,"
+                     " enable CONFIG_INTR_REMAP");
+
+       pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+               " and x2apic\n");
+#endif
+
+       return;
+}
+#endif /* CONFIG_X86_X2APIC */
+
+#ifdef CONFIG_X86_64
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ * On AMD64 we trust the BIOS - if it says no APIC it is likely
+ * not correctly set up (usually the APIC timer won't work etc.)
+ */
+static int __init detect_init_APIC(void)
+{
+       if (!cpu_has_apic) {
+               pr_info("No local APIC present\n");
+               return -1;
+       }
+
+       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+       boot_cpu_physical_apicid = 0;
+       return 0;
+}
+#else
+/*
+ * Detect and initialize APIC
+ */
+static int __init detect_init_APIC(void)
+{
+       u32 h, l, features;
+
+       /* Disabled by kernel option? */
+       if (disable_apic)
+               return -1;
+
+       switch (boot_cpu_data.x86_vendor) {
+       case X86_VENDOR_AMD:
+               if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
+                   (boot_cpu_data.x86 >= 15))
+                       break;
+               goto no_apic;
+       case X86_VENDOR_INTEL:
+               if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
+                   (boot_cpu_data.x86 == 5 && cpu_has_apic))
+                       break;
+               goto no_apic;
+       default:
+               goto no_apic;
+       }
+
+       if (!cpu_has_apic) {
+               /*
+                * Over-ride BIOS and try to enable the local APIC only if
+                * "lapic" specified.
+                */
+               if (!force_enable_local_apic) {
+                       pr_info("Local APIC disabled by BIOS -- "
+                               "you can enable it with \"lapic\"\n");
+                       return -1;
+               }
+               /*
+                * Some BIOSes disable the local APIC in the APIC_BASE
+                * MSR. This can only be done in software for Intel P6 or later
+                * and AMD K7 (Model > 1) or later.
+                */
+               rdmsr(MSR_IA32_APICBASE, l, h);
+               if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+                       pr_info("Local APIC disabled by BIOS -- reenabling.\n");
+                       l &= ~MSR_IA32_APICBASE_BASE;
+                       l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+                       wrmsr(MSR_IA32_APICBASE, l, h);
+                       enabled_via_apicbase = 1;
+               }
+       }
+       /*
+        * The APIC feature bit should now be enabled
+        * in `cpuid'
+        */
+       features = cpuid_edx(1);
+       if (!(features & (1 << X86_FEATURE_APIC))) {
+               pr_warning("Could not enable APIC!\n");
+               return -1;
+       }
+       set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+       /* The BIOS may have set up the APIC at some other address */
+       rdmsr(MSR_IA32_APICBASE, l, h);
+       if (l & MSR_IA32_APICBASE_ENABLE)
+               mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+
+       pr_info("Found and enabled local APIC!\n");
+
+       apic_pm_activate();
+
+       return 0;
+
+no_apic:
+       pr_info("No local APIC present or hardware disabled\n");
+       return -1;
+}
+#endif
+
+#ifdef CONFIG_X86_64
+void __init early_init_lapic_mapping(void)
+{
+       unsigned long phys_addr;
+
+       /*
+        * If no local APIC can be found then go out
+        * : it means there is no mpatable and MADT
+        */
+       if (!smp_found_config)
+               return;
+
+       phys_addr = mp_lapic_addr;
+
+       set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
+       apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+                   APIC_BASE, phys_addr);
+
+       /*
+        * Fetch the APIC ID of the BSP in case we have a
+        * default configuration (or the MP table is broken).
+        */
+       boot_cpu_physical_apicid = read_apic_id();
+}
+#endif
+
+/**
+ * init_apic_mappings - initialize APIC mappings
+ */
+void __init init_apic_mappings(void)
+{
+#ifdef CONFIG_X86_X2APIC
+       if (x2apic) {
+               boot_cpu_physical_apicid = read_apic_id();
+               return;
+       }
+#endif
+
+       /*
+        * If no local APIC can be found then set up a fake all
+        * zeroes page to simulate the local APIC and another
+        * one for the IO-APIC.
+        */
+       if (!smp_found_config && detect_init_APIC()) {
+               apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
+               apic_phys = __pa(apic_phys);
+       } else
+               apic_phys = mp_lapic_addr;
+
+       set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+       apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
+                               APIC_BASE, apic_phys);
+
+       /*
+        * Fetch the APIC ID of the BSP in case we have a
+        * default configuration (or the MP table is broken).
+        */
+       if (boot_cpu_physical_apicid == -1U)
+               boot_cpu_physical_apicid = read_apic_id();
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int apic_version[MAX_APICS];
+
+int __init APIC_init_uniprocessor(void)
+{
+       if (disable_apic) {
+               pr_info("Apic disabled\n");
+               return -1;
+       }
+#ifdef CONFIG_X86_64
+       if (!cpu_has_apic) {
+               disable_apic = 1;
+               pr_info("Apic disabled by BIOS\n");
+               return -1;
+       }
+#else
+       if (!smp_found_config && !cpu_has_apic)
+               return -1;
+
+       /*
+        * Complain if the BIOS pretends there is one.
+        */
+       if (!cpu_has_apic &&
+           APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+               pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
+                       boot_cpu_physical_apicid);
+               clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+               return -1;
+       }
+#endif
+
+       enable_IR_x2apic();
+#ifdef CONFIG_X86_64
+       default_setup_apic_routing();
+#endif
+
+       verify_local_APIC();
+       connect_bsp_APIC();
+
+#ifdef CONFIG_X86_64
+       apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
+#else
+       /*
+        * Hack: In case of kdump, after a crash, kernel might be booting
+        * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
+        * might be zero if read from MP tables. Get it from LAPIC.
+        */
+# ifdef CONFIG_CRASH_DUMP
+       boot_cpu_physical_apicid = read_apic_id();
+# endif
+#endif
+       physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
+       setup_local_APIC();
+
+#ifdef CONFIG_X86_IO_APIC
+       /*
+        * Now enable IO-APICs, actually call clear_IO_APIC
+        * We need clear_IO_APIC before enabling error vector
+        */
+       if (!skip_ioapic_setup && nr_ioapics)
+               enable_IO_APIC();
+#endif
+
+       end_local_APIC_setup();
+
+#ifdef CONFIG_X86_IO_APIC
+       if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
+               setup_IO_APIC();
+       else {
+               nr_ioapics = 0;
+               localise_nmi_watchdog();
+       }
+#else
+       localise_nmi_watchdog();
+#endif
+
+       setup_boot_clock();
+#ifdef CONFIG_X86_64
+       check_nmi_watchdog();
+#endif
+
+       return 0;
+}
+
+/*
+ * Local APIC interrupts
+ */
+
+/*
+ * This interrupt should _never_ happen with our APIC/SMP architecture
+ */
+void smp_spurious_interrupt(struct pt_regs *regs)
+{
+       u32 v;
+
+       exit_idle();
+       irq_enter();
+       /*
+        * Check if this really is a spurious interrupt and ACK it
+        * if it is a vectored one.  Just in case...
+        * Spurious interrupts should not be ACKed.
+        */
+       v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
+       if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+               ack_APIC_irq();
+
+       inc_irq_stat(irq_spurious_count);
+
+       /* see sw-dev-man vol 3, chapter 7.4.13.5 */
+       pr_info("spurious APIC interrupt on CPU#%d, "
+               "should never happen.\n", smp_processor_id());
+       irq_exit();
+}
+
+/*
+ * This interrupt should never happen with our APIC/SMP architecture
+ */
+void smp_error_interrupt(struct pt_regs *regs)
+{
+       u32 v, v1;
+
+       exit_idle();
+       irq_enter();
+       /* First tickle the hardware, only then report what went on. -- REW */
+       v = apic_read(APIC_ESR);
+       apic_write(APIC_ESR, 0);
+       v1 = apic_read(APIC_ESR);
+       ack_APIC_irq();
+       atomic_inc(&irq_err_count);
+
+       /*
+        * Here is what the APIC error bits mean:
+        * 0: Send CS error
+        * 1: Receive CS error
+        * 2: Send accept error
+        * 3: Receive accept error
+        * 4: Reserved
+        * 5: Send illegal vector
+        * 6: Received illegal vector
+        * 7: Illegal register address
+        */
+       pr_debug("APIC error on CPU%d: %02x(%02x)\n",
+               smp_processor_id(), v , v1);
+       irq_exit();
+}
+
+/**
+ * connect_bsp_APIC - attach the APIC to the interrupt system
+ */
+void __init connect_bsp_APIC(void)
+{
+#ifdef CONFIG_X86_32
+       if (pic_mode) {
+               /*
+                * Do not trust the local APIC being empty at bootup.
+                */
+               clear_local_APIC();
+               /*
+                * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
+                * local APIC to INT and NMI lines.
+                */
+               apic_printk(APIC_VERBOSE, "leaving PIC mode, "
+                               "enabling APIC mode.\n");
+               outb(0x70, 0x22);
+               outb(0x01, 0x23);
+       }
+#endif
+       if (apic->enable_apic_mode)
+               apic->enable_apic_mode();
+}
+
+/**
+ * disconnect_bsp_APIC - detach the APIC from the interrupt system
+ * @virt_wire_setup:   indicates, whether virtual wire mode is selected
+ *
+ * Virtual wire mode is necessary to deliver legacy interrupts even when the
+ * APIC is disabled.
+ */
+void disconnect_bsp_APIC(int virt_wire_setup)
+{
+       unsigned int value;
+
+#ifdef CONFIG_X86_32
+       if (pic_mode) {
+               /*
+                * Put the board back into PIC mode (has an effect only on
+                * certain older boards).  Note that APIC interrupts, including
+                * IPIs, won't work beyond this point!  The only exception are
+                * INIT IPIs.
+                */
+               apic_printk(APIC_VERBOSE, "disabling APIC mode, "
+                               "entering PIC mode.\n");
+               outb(0x70, 0x22);
+               outb(0x00, 0x23);
+               return;
+       }
+#endif
+
+       /* Go back to Virtual Wire compatibility mode */
+
+       /* For the spurious interrupt use vector F, and enable it */
+       value = apic_read(APIC_SPIV);
+       value &= ~APIC_VECTOR_MASK;
+       value |= APIC_SPIV_APIC_ENABLED;
+       value |= 0xf;
+       apic_write(APIC_SPIV, value);
+
+       if (!virt_wire_setup) {
+               /*
+                * For LVT0 make it edge triggered, active high,
+                * external and enabled
+                */
+               value = apic_read(APIC_LVT0);
+               value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+                       APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+                       APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+               value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+               value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
+               apic_write(APIC_LVT0, value);
+       } else {
+               /* Disable LVT0 */
+               apic_write(APIC_LVT0, APIC_LVT_MASKED);
+       }
+
+       /*
+        * For LVT1 make it edge triggered, active high,
+        * nmi and enabled
+        */
+       value = apic_read(APIC_LVT1);
+       value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+                       APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+                       APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+       value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+       value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
+       apic_write(APIC_LVT1, value);
+}
+
+void __cpuinit generic_processor_info(int apicid, int version)
+{
+       int cpu;
+
+       /*
+        * Validate version
+        */
+       if (version == 0x0) {
+               pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
+                          "fixing up to 0x10. (tell your hw vendor)\n",
+                               version);
+               version = 0x10;
+       }
+       apic_version[apicid] = version;
+
+       if (num_processors >= nr_cpu_ids) {
+               int max = nr_cpu_ids;
+               int thiscpu = max + disabled_cpus;
+
+               pr_warning(
+                       "ACPI: NR_CPUS/possible_cpus limit of %i reached."
+                       "  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
+
+               disabled_cpus++;
+               return;
+       }
+
+       num_processors++;
+       cpu = cpumask_next_zero(-1, cpu_present_mask);
+
+       if (version != apic_version[boot_cpu_physical_apicid])
+               WARN_ONCE(1,
+                       "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
+                       apic_version[boot_cpu_physical_apicid], cpu, version);
+
+       physid_set(apicid, phys_cpu_present_map);
+       if (apicid == boot_cpu_physical_apicid) {
+               /*
+                * x86_bios_cpu_apicid is required to have processors listed
+                * in same order as logical cpu numbers. Hence the first
+                * entry is BSP, and so on.
+                */
+               cpu = 0;
+       }
+       if (apicid > max_physical_apicid)
+               max_physical_apicid = apicid;
+
+#ifdef CONFIG_X86_32
+       /*
+        * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
+        * but we need to work other dependencies like SMP_SUSPEND etc
+        * before this can be done without some confusion.
+        * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
+        *       - Ashok Raj <ashok.raj@intel.com>
+        */
+       if (max_physical_apicid >= 8) {
+               switch (boot_cpu_data.x86_vendor) {
+               case X86_VENDOR_INTEL:
+                       if (!APIC_XAPIC(version)) {
+                               def_to_bigsmp = 0;
+                               break;
+                       }
+                       /* If P4 and above fall through */
+               case X86_VENDOR_AMD:
+                       def_to_bigsmp = 1;
+               }
+       }
+#endif
+
+#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
+       early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
+       early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
+#endif
+
+       set_cpu_possible(cpu, true);
+       set_cpu_present(cpu, true);
+}
+
+int hard_smp_processor_id(void)
+{
+       return read_apic_id();
+}
+
+void default_init_apic_ldr(void)
+{
+       unsigned long val;
+
+       apic_write(APIC_DFR, APIC_DFR_VALUE);
+       val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+       val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
+       apic_write(APIC_LDR, val);
+}
+
+#ifdef CONFIG_X86_32
+int default_apicid_to_node(int logical_apicid)
+{
+#ifdef CONFIG_SMP
+       return apicid_2_node[hard_smp_processor_id()];
+#else
+       return 0;
+#endif
+}
+#endif
+
+/*
+ * Power management
+ */
+#ifdef CONFIG_PM
+
+static struct {
+       /*
+        * 'active' is true if the local APIC was enabled by us and
+        * not the BIOS; this signifies that we are also responsible
+        * for disabling it before entering apm/acpi suspend
+        */
+       int active;
+       /* r/w apic fields */
+       unsigned int apic_id;
+       unsigned int apic_taskpri;
+       unsigned int apic_ldr;
+       unsigned int apic_dfr;
+       unsigned int apic_spiv;
+       unsigned int apic_lvtt;
+       unsigned int apic_lvtpc;
+       unsigned int apic_lvt0;
+       unsigned int apic_lvt1;
+       unsigned int apic_lvterr;
+       unsigned int apic_tmict;
+       unsigned int apic_tdcr;
+       unsigned int apic_thmr;
+} apic_pm_state;
+
+static int lapic_suspend(struct sys_device *dev, pm_message_t state)
+{
+       unsigned long flags;
+       int maxlvt;
+
+       if (!apic_pm_state.active)
+               return 0;
+
+       maxlvt = lapic_get_maxlvt();
+
+       apic_pm_state.apic_id = apic_read(APIC_ID);
+       apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
+       apic_pm_state.apic_ldr = apic_read(APIC_LDR);
+       apic_pm_state.apic_dfr = apic_read(APIC_DFR);
+       apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
+       apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
+       if (maxlvt >= 4)
+               apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+       apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
+       apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
+       apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
+       apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
+       apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
+#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+       if (maxlvt >= 5)
+               apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+#endif
+
+       local_irq_save(flags);
+       disable_local_APIC();
+       local_irq_restore(flags);
+       return 0;
+}
+
+static int lapic_resume(struct sys_device *dev)
+{
+       unsigned int l, h;
+       unsigned long flags;
+       int maxlvt;
+
+       if (!apic_pm_state.active)
+               return 0;
+
+       maxlvt = lapic_get_maxlvt();
+
+       local_irq_save(flags);
+
+#ifdef CONFIG_X86_X2APIC
+       if (x2apic)
+               enable_x2apic();
+       else
+#endif
+       {
+               /*
+                * Make sure the APICBASE points to the right address
+                *
+                * FIXME! This will be wrong if we ever support suspend on
+                * SMP! We'll need to do this as part of the CPU restore!
+                */
+               rdmsr(MSR_IA32_APICBASE, l, h);
+               l &= ~MSR_IA32_APICBASE_BASE;
+               l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+               wrmsr(MSR_IA32_APICBASE, l, h);
+       }
+
+       apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
+       apic_write(APIC_ID, apic_pm_state.apic_id);
+       apic_write(APIC_DFR, apic_pm_state.apic_dfr);
+       apic_write(APIC_LDR, apic_pm_state.apic_ldr);
+       apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
+       apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
+       apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
+       apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
+#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+       if (maxlvt >= 5)
+               apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+#endif
+       if (maxlvt >= 4)
+               apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
+       apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
+       apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
+       apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
+       apic_write(APIC_ESR, 0);
+       apic_read(APIC_ESR);
+       apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
+       apic_write(APIC_ESR, 0);
+       apic_read(APIC_ESR);
+
+       local_irq_restore(flags);
+
+       return 0;
+}
+
+/*
+ * This device has no shutdown method - fully functioning local APICs
+ * are needed on every CPU up until machine_halt/restart/poweroff.
+ */
+
+static struct sysdev_class lapic_sysclass = {
+       .name           = "lapic",
+       .resume         = lapic_resume,
+       .suspend        = lapic_suspend,
+};
+
+static struct sys_device device_lapic = {
+       .id     = 0,
+       .cls    = &lapic_sysclass,
+};
+
+static void __cpuinit apic_pm_activate(void)
+{
+       apic_pm_state.active = 1;
+}
+
+static int __init init_lapic_sysfs(void)
+{
+       int error;
+
+       if (!cpu_has_apic)
+               return 0;
+       /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
+
+       error = sysdev_class_register(&lapic_sysclass);
+       if (!error)
+               error = sysdev_register(&device_lapic);
+       return error;
+}
+device_initcall(init_lapic_sysfs);
+
+#else  /* CONFIG_PM */
+
+static void apic_pm_activate(void) { }
+
+#endif /* CONFIG_PM */
+
+#ifdef CONFIG_X86_64
+/*
+ * apic_is_clustered_box() -- Check if we can expect good TSC
+ *
+ * Thus far, the major user of this is IBM's Summit2 series:
+ *
+ * Clustered boxes may have unsynced TSC problems if they are
+ * multi-chassis. Use available data to take a good guess.
+ * If in doubt, go HPET.
+ */
+__cpuinit int apic_is_clustered_box(void)
+{
+       int i, clusters, zeros;
+       unsigned id;
+       u16 *bios_cpu_apicid;
+       DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
+
+       /*
+        * there is not this kind of box with AMD CPU yet.
+        * Some AMD box with quadcore cpu and 8 sockets apicid
+        * will be [4, 0x23] or [8, 0x27] could be thought to
+        * vsmp box still need checking...
+        */
+       if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
+               return 0;
+
+       bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
+       bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
+
+       for (i = 0; i < nr_cpu_ids; i++) {
+               /* are we being called early in kernel startup? */
+               if (bios_cpu_apicid) {
+                       id = bios_cpu_apicid[i];
+               } else if (i < nr_cpu_ids) {
+                       if (cpu_present(i))
+                               id = per_cpu(x86_bios_cpu_apicid, i);
+                       else
+                               continue;
+               } else
+                       break;
+
+               if (id != BAD_APICID)
+                       __set_bit(APIC_CLUSTERID(id), clustermap);
+       }
+
+       /* Problem:  Partially populated chassis may not have CPUs in some of
+        * the APIC clusters they have been allocated.  Only present CPUs have
+        * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
+        * Since clusters are allocated sequentially, count zeros only if
+        * they are bounded by ones.
+        */
+       clusters = 0;
+       zeros = 0;
+       for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+               if (test_bit(i, clustermap)) {
+                       clusters += 1 + zeros;
+                       zeros = 0;
+               } else
+                       ++zeros;
+       }
+
+       /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+        * not guaranteed to be synced between boards
+        */
+       if (is_vsmp_box() && clusters > 1)
+               return 1;
+
+       /*
+        * If clusters > 2, then should be multi-chassis.
+        * May have to revisit this when multi-core + hyperthreaded CPUs come
+        * out, but AFAIK this will work even for them.
+        */
+       return (clusters > 2);
+}
+#endif
+
+/*
+ * APIC command line parameters
+ */
+static int __init setup_disableapic(char *arg)
+{
+       disable_apic = 1;
+       setup_clear_cpu_cap(X86_FEATURE_APIC);
+       return 0;
+}
+early_param("disableapic", setup_disableapic);
+
+/* same as disableapic, for compatibility */
+static int __init setup_nolapic(char *arg)
+{
+       return setup_disableapic(arg);
+}
+early_param("nolapic", setup_nolapic);
+
+static int __init parse_lapic_timer_c2_ok(char *arg)
+{
+       local_apic_timer_c2_ok = 1;
+       return 0;
+}
+early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
+
+static int __init parse_disable_apic_timer(char *arg)
+{
+       disable_apic_timer = 1;
+       return 0;
+}
+early_param("noapictimer", parse_disable_apic_timer);
+
+static int __init parse_nolapic_timer(char *arg)
+{
+       disable_apic_timer = 1;
+       return 0;
+}
+early_param("nolapic_timer", parse_nolapic_timer);
+
+static int __init apic_set_verbosity(char *arg)
+{
+       if (!arg)  {
+#ifdef CONFIG_X86_64
+               skip_ioapic_setup = 0;
+               return 0;
+#endif
+               return -EINVAL;
+       }
+
+       if (strcmp("debug", arg) == 0)
+               apic_verbosity = APIC_DEBUG;
+       else if (strcmp("verbose", arg) == 0)
+               apic_verbosity = APIC_VERBOSE;
+       else {
+               pr_warning("APIC Verbosity level %s not recognised"
+                       " use apic=verbose or apic=debug\n", arg);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+early_param("apic", apic_set_verbosity);
+
+static int __init lapic_insert_resource(void)
+{
+       if (!apic_phys)
+               return -1;
+
+       /* Put local APIC into the resource map. */
+       lapic_resource.start = apic_phys;
+       lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
+       insert_resource(&iomem_resource, &lapic_resource);
+
+       return 0;
+}
+
+/*
+ * need call insert after e820_reserve_resources()
+ * that is using request_resource
+ */
+late_initcall(lapic_insert_resource);
diff --git a/arch/x86/kernel/apic/apic_64.c b/arch/x86/kernel/apic/apic_64.c
new file mode 100644 (file)
index 0000000..70935dd
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Generic APIC sub-arch probe layer.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ */
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/hardirq.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/setup.h>
+
+extern struct apic apic_flat;
+extern struct apic apic_physflat;
+extern struct apic apic_x2xpic_uv_x;
+extern struct apic apic_x2apic_phys;
+extern struct apic apic_x2apic_cluster;
+
+struct apic __read_mostly *apic = &apic_flat;
+EXPORT_SYMBOL_GPL(apic);
+
+static struct apic *apic_probe[] __initdata = {
+#ifdef CONFIG_X86_UV
+       &apic_x2apic_uv_x,
+#endif
+#ifdef CONFIG_X86_X2APIC
+       &apic_x2apic_phys,
+       &apic_x2apic_cluster,
+#endif
+       &apic_physflat,
+       NULL,
+};
+
+/*
+ * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
+ */
+void __init default_setup_apic_routing(void)
+{
+#ifdef CONFIG_X86_X2APIC
+       if (apic == &apic_x2apic_phys || apic == &apic_x2apic_cluster) {
+               if (!intr_remapping_enabled)
+                       apic = &apic_flat;
+       }
+#endif
+
+       if (apic == &apic_flat) {
+               if (max_physical_apicid >= 8)
+                       apic = &apic_physflat;
+               printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
+       }
+
+       if (x86_quirks->update_apic)
+               x86_quirks->update_apic();
+}
+
+/* Same for both flat and physical. */
+
+void apic_send_IPI_self(int vector)
+{
+       __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+}
+
+int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+       int i;
+
+       for (i = 0; apic_probe[i]; ++i) {
+               if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
+                       apic = apic_probe[i];
+                       printk(KERN_INFO "Setting APIC routing to %s.\n",
+                               apic->name);
+                       return 1;
+               }
+       }
+       return 0;
+}
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
new file mode 100644 (file)
index 0000000..3b00299
--- /dev/null
@@ -0,0 +1,389 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Flat APIC subarch code.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ */
+#include <linux/errno.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/hardirq.h>
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+
+#ifdef CONFIG_ACPI
+#include <acpi/acpi_bus.h>
+#endif
+
+static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+       return 1;
+}
+
+static const struct cpumask *flat_target_cpus(void)
+{
+       return cpu_online_mask;
+}
+
+static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+       /* Careful. Some cpus do not strictly honor the set of cpus
+        * specified in the interrupt destination when using lowest
+        * priority interrupt delivery mode.
+        *
+        * In particular there was a hyperthreading cpu observed to
+        * deliver interrupts to the wrong hyperthread when only one
+        * hyperthread was specified in the interrupt desitination.
+        */
+       cpumask_clear(retmask);
+       cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
+}
+
+/*
+ * Set up the logical destination ID.
+ *
+ * Intel recommends to set DFR, LDR and TPR before enabling
+ * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116).  So here it goes...
+ */
+static void flat_init_apic_ldr(void)
+{
+       unsigned long val;
+       unsigned long num, id;
+
+       num = smp_processor_id();
+       id = 1UL << num;
+       apic_write(APIC_DFR, APIC_DFR_FLAT);
+       val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+       val |= SET_APIC_LOGICAL_ID(id);
+       apic_write(APIC_LDR, val);
+}
+
+static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
+       local_irq_restore(flags);
+}
+
+static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
+{
+       unsigned long mask = cpumask_bits(cpumask)[0];
+
+       _flat_send_IPI_mask(mask, vector);
+}
+
+static void
+ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
+{
+       unsigned long mask = cpumask_bits(cpumask)[0];
+       int cpu = smp_processor_id();
+
+       if (cpu < BITS_PER_LONG)
+               clear_bit(cpu, &mask);
+
+       _flat_send_IPI_mask(mask, vector);
+}
+
+static void flat_send_IPI_allbutself(int vector)
+{
+       int cpu = smp_processor_id();
+#ifdef CONFIG_HOTPLUG_CPU
+       int hotplug = 1;
+#else
+       int hotplug = 0;
+#endif
+       if (hotplug || vector == NMI_VECTOR) {
+               if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
+                       unsigned long mask = cpumask_bits(cpu_online_mask)[0];
+
+                       if (cpu < BITS_PER_LONG)
+                               clear_bit(cpu, &mask);
+
+                       _flat_send_IPI_mask(mask, vector);
+               }
+       } else if (num_online_cpus() > 1) {
+               __default_send_IPI_shortcut(APIC_DEST_ALLBUT,
+                                           vector, apic->dest_logical);
+       }
+}
+
+static void flat_send_IPI_all(int vector)
+{
+       if (vector == NMI_VECTOR) {
+               flat_send_IPI_mask(cpu_online_mask, vector);
+       } else {
+               __default_send_IPI_shortcut(APIC_DEST_ALLINC,
+                                           vector, apic->dest_logical);
+       }
+}
+
+static unsigned int flat_get_apic_id(unsigned long x)
+{
+       unsigned int id;
+
+       id = (((x)>>24) & 0xFFu);
+
+       return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+       unsigned long x;
+
+       x = ((id & 0xFFu)<<24);
+       return x;
+}
+
+static unsigned int read_xapic_id(void)
+{
+       unsigned int id;
+
+       id = flat_get_apic_id(apic_read(APIC_ID));
+       return id;
+}
+
+static int flat_apic_id_registered(void)
+{
+       return physid_isset(read_xapic_id(), phys_cpu_present_map);
+}
+
+static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+       return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+}
+
+static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+                                               const struct cpumask *andmask)
+{
+       unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+       unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
+
+       return mask1 & mask2;
+}
+
+static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
+{
+       return hard_smp_processor_id() >> index_msb;
+}
+
+struct apic apic_flat =  {
+       .name                           = "flat",
+       .probe                          = NULL,
+       .acpi_madt_oem_check            = flat_acpi_madt_oem_check,
+       .apic_id_registered             = flat_apic_id_registered,
+
+       .irq_delivery_mode              = dest_LowestPrio,
+       .irq_dest_mode                  = 1, /* logical */
+
+       .target_cpus                    = flat_target_cpus,
+       .disable_esr                    = 0,
+       .dest_logical                   = APIC_DEST_LOGICAL,
+       .check_apicid_used              = NULL,
+       .check_apicid_present           = NULL,
+
+       .vector_allocation_domain       = flat_vector_allocation_domain,
+       .init_apic_ldr                  = flat_init_apic_ldr,
+
+       .ioapic_phys_id_map             = NULL,
+       .setup_apic_routing             = NULL,
+       .multi_timer_check              = NULL,
+       .apicid_to_node                 = NULL,
+       .cpu_to_logical_apicid          = NULL,
+       .cpu_present_to_apicid          = default_cpu_present_to_apicid,
+       .apicid_to_cpu_present          = NULL,
+       .setup_portio_remap             = NULL,
+       .check_phys_apicid_present      = default_check_phys_apicid_present,
+       .enable_apic_mode               = NULL,
+       .phys_pkg_id                    = flat_phys_pkg_id,
+       .mps_oem_check                  = NULL,
+
+       .get_apic_id                    = flat_get_apic_id,
+       .set_apic_id                    = set_apic_id,
+       .apic_id_mask                   = 0xFFu << 24,
+
+       .cpu_mask_to_apicid             = flat_cpu_mask_to_apicid,
+       .cpu_mask_to_apicid_and         = flat_cpu_mask_to_apicid_and,
+
+       .send_IPI_mask                  = flat_send_IPI_mask,
+       .send_IPI_mask_allbutself       = flat_send_IPI_mask_allbutself,
+       .send_IPI_allbutself            = flat_send_IPI_allbutself,
+       .send_IPI_all                   = flat_send_IPI_all,
+       .send_IPI_self                  = apic_send_IPI_self,
+
+       .wakeup_cpu                     = NULL,
+       .trampoline_phys_low            = DEFAULT_TRAMPOLINE_PHYS_LOW,
+       .trampoline_phys_high           = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+       .wait_for_init_deassert         = NULL,
+       .smp_callin_clear_local_apic    = NULL,
+       .inquire_remote_apic            = NULL,
+
+       .read                           = native_apic_mem_read,
+       .write                          = native_apic_mem_write,
+       .icr_read                       = native_apic_icr_read,
+       .icr_write                      = native_apic_icr_write,
+       .wait_icr_idle                  = native_apic_wait_icr_idle,
+       .safe_wait_icr_idle             = native_safe_apic_wait_icr_idle,
+};
+
+/*
+ * Physflat mode is used when there are more than 8 CPUs on a AMD system.
+ * We cannot use logical delivery in this case because the mask
+ * overflows, so use physical mode.
+ */
+static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+#ifdef CONFIG_ACPI
+       /*
+        * Quirk: some x86_64 machines can only use physical APIC mode
+        * regardless of how many processors are present (x86_64 ES7000
+        * is an example).
+        */
+       if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
+               (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
+               printk(KERN_DEBUG "system APIC only can use physical flat");
+               return 1;
+       }
+#endif
+
+       return 0;
+}
+
+static const struct cpumask *physflat_target_cpus(void)
+{
+       return cpu_online_mask;
+}
+
+static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+       cpumask_clear(retmask);
+       cpumask_set_cpu(cpu, retmask);
+}
+
+static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
+{
+       default_send_IPI_mask_sequence_phys(cpumask, vector);
+}
+
+static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
+                                             int vector)
+{
+       default_send_IPI_mask_allbutself_phys(cpumask, vector);
+}
+
+static void physflat_send_IPI_allbutself(int vector)
+{
+       default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
+}
+
+static void physflat_send_IPI_all(int vector)
+{
+       physflat_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+       int cpu;
+
+       /*
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+       cpu = cpumask_first(cpumask);
+       if ((unsigned)cpu < nr_cpu_ids)
+               return per_cpu(x86_cpu_to_apicid, cpu);
+       else
+               return BAD_APICID;
+}
+
+static unsigned int
+physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+                               const struct cpumask *andmask)
+{
+       int cpu;
+
+       /*
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+       for_each_cpu_and(cpu, cpumask, andmask) {
+               if (cpumask_test_cpu(cpu, cpu_online_mask))
+                       break;
+       }
+       if (cpu < nr_cpu_ids)
+               return per_cpu(x86_cpu_to_apicid, cpu);
+
+       return BAD_APICID;
+}
+
+struct apic apic_physflat =  {
+
+       .name                           = "physical flat",
+       .probe                          = NULL,
+       .acpi_madt_oem_check            = physflat_acpi_madt_oem_check,
+       .apic_id_registered             = flat_apic_id_registered,
+
+       .irq_delivery_mode              = dest_Fixed,
+       .irq_dest_mode                  = 0, /* physical */
+
+       .target_cpus                    = physflat_target_cpus,
+       .disable_esr                    = 0,
+       .dest_logical                   = 0,
+       .check_apicid_used              = NULL,
+       .check_apicid_present           = NULL,
+
+       .vector_allocation_domain       = physflat_vector_allocation_domain,
+       /* not needed, but shouldn't hurt: */
+       .init_apic_ldr                  = flat_init_apic_ldr,
+
+       .ioapic_phys_id_map             = NULL,
+       .setup_apic_routing             = NULL,
+       .multi_timer_check              = NULL,
+       .apicid_to_node                 = NULL,
+       .cpu_to_logical_apicid          = NULL,
+       .cpu_present_to_apicid          = default_cpu_present_to_apicid,
+       .apicid_to_cpu_present          = NULL,
+       .setup_portio_remap             = NULL,
+       .check_phys_apicid_present      = default_check_phys_apicid_present,
+       .enable_apic_mode               = NULL,
+       .phys_pkg_id                    = flat_phys_pkg_id,
+       .mps_oem_check                  = NULL,
+
+       .get_apic_id                    = flat_get_apic_id,
+       .set_apic_id                    = set_apic_id,
+       .apic_id_mask                   = 0xFFu << 24,
+
+       .cpu_mask_to_apicid             = physflat_cpu_mask_to_apicid,
+       .cpu_mask_to_apicid_and         = physflat_cpu_mask_to_apicid_and,
+
+       .send_IPI_mask                  = physflat_send_IPI_mask,
+       .send_IPI_mask_allbutself       = physflat_send_IPI_mask_allbutself,
+       .send_IPI_allbutself            = physflat_send_IPI_allbutself,
+       .send_IPI_all                   = physflat_send_IPI_all,
+       .send_IPI_self                  = apic_send_IPI_self,
+
+       .wakeup_cpu                     = NULL,
+       .trampoline_phys_low            = DEFAULT_TRAMPOLINE_PHYS_LOW,
+       .trampoline_phys_high           = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+       .wait_for_init_deassert         = NULL,
+       .smp_callin_clear_local_apic    = NULL,
+       .inquire_remote_apic            = NULL,
+
+       .read                           = native_apic_mem_read,
+       .write                          = native_apic_mem_write,
+       .icr_read                       = native_apic_icr_read,
+       .icr_write                      = native_apic_icr_write,
+       .wait_icr_idle                  = native_apic_wait_icr_idle,
+       .safe_wait_icr_idle             = native_safe_apic_wait_icr_idle,
+};
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
new file mode 100644 (file)
index 0000000..00e6071
--- /dev/null
@@ -0,0 +1,4160 @@
+/*
+ *     Intel IO-APIC support for multi-Pentium hosts.
+ *
+ *     Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *
+ *     Many thanks to Stig Venaas for trying out countless experimental
+ *     patches and reporting/debugging problems patiently!
+ *
+ *     (c) 1999, Multiple IO-APIC support, developed by
+ *     Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
+ *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
+ *     further tested and cleaned up by Zach Brown <zab@redhat.com>
+ *     and Ingo Molnar <mingo@redhat.com>
+ *
+ *     Fixes
+ *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
+ *                                     thanks to Eric Gilmore
+ *                                     and Rolf G. Tews
+ *                                     for testing these extensively
+ *     Paul Diefenbaugh        :       Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/mc146818rtc.h>
+#include <linux/compiler.h>
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/sysdev.h>
+#include <linux/msi.h>
+#include <linux/htirq.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/jiffies.h>     /* time_after() */
+#ifdef CONFIG_ACPI
+#include <acpi/acpi_bus.h>
+#endif
+#include <linux/bootmem.h>
+#include <linux/dmar.h>
+#include <linux/hpet.h>
+
+#include <asm/idle.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/cpu.h>
+#include <asm/desc.h>
+#include <asm/proto.h>
+#include <asm/acpi.h>
+#include <asm/dma.h>
+#include <asm/timer.h>
+#include <asm/i8259.h>
+#include <asm/nmi.h>
+#include <asm/msidef.h>
+#include <asm/hypertransport.h>
+#include <asm/setup.h>
+#include <asm/irq_remapping.h>
+#include <asm/hpet.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_irq.h>
+
+#include <asm/apic.h>
+
+#define __apicdebuginit(type) static type __init
+
+/*
+ *      Is the SiS APIC rmw bug present ?
+ *      -1 = don't know, 0 = no, 1 = yes
+ */
+int sis_apic_bug = -1;
+
+static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
+
+/*
+ * # of IRQ routing registers
+ */
+int nr_ioapic_registers[MAX_IO_APICS];
+
+/* I/O APIC entries */
+struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
+int nr_ioapics;
+
+/* MP IRQ source entries */
+struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* # of MP IRQ source entries */
+int mp_irq_entries;
+
+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+int mp_bus_id_to_type[MAX_MP_BUSSES];
+#endif
+
+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+
+int skip_ioapic_setup;
+
+void arch_disable_smp_support(void)
+{
+#ifdef CONFIG_PCI
+       noioapicquirk = 1;
+       noioapicreroute = -1;
+#endif
+       skip_ioapic_setup = 1;
+}
+
+static int __init parse_noapic(char *str)
+{
+       /* disable IO-APIC */
+       arch_disable_smp_support();
+       return 0;
+}
+early_param("noapic", parse_noapic);
+
+struct irq_pin_list;
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+struct irq_pin_list {
+       int apic, pin;
+       struct irq_pin_list *next;
+};
+
+static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+{
+       struct irq_pin_list *pin;
+       int node;
+
+       node = cpu_to_node(cpu);
+
+       pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
+
+       return pin;
+}
+
+struct irq_cfg {
+       struct irq_pin_list *irq_2_pin;
+       cpumask_var_t domain;
+       cpumask_var_t old_domain;
+       unsigned move_cleanup_count;
+       u8 vector;
+       u8 move_in_progress : 1;
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+       u8 move_desc_pending : 1;
+#endif
+};
+
+/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_cfg irq_cfgx[] = {
+#else
+static struct irq_cfg irq_cfgx[NR_IRQS] = {
+#endif
+       [0]  = { .vector = IRQ0_VECTOR,  },
+       [1]  = { .vector = IRQ1_VECTOR,  },
+       [2]  = { .vector = IRQ2_VECTOR,  },
+       [3]  = { .vector = IRQ3_VECTOR,  },
+       [4]  = { .vector = IRQ4_VECTOR,  },
+       [5]  = { .vector = IRQ5_VECTOR,  },
+       [6]  = { .vector = IRQ6_VECTOR,  },
+       [7]  = { .vector = IRQ7_VECTOR,  },
+       [8]  = { .vector = IRQ8_VECTOR,  },
+       [9]  = { .vector = IRQ9_VECTOR,  },
+       [10] = { .vector = IRQ10_VECTOR, },
+       [11] = { .vector = IRQ11_VECTOR, },
+       [12] = { .vector = IRQ12_VECTOR, },
+       [13] = { .vector = IRQ13_VECTOR, },
+       [14] = { .vector = IRQ14_VECTOR, },
+       [15] = { .vector = IRQ15_VECTOR, },
+};
+
+int __init arch_early_irq_init(void)
+{
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
+       int count;
+       int i;
+
+       cfg = irq_cfgx;
+       count = ARRAY_SIZE(irq_cfgx);
+
+       for (i = 0; i < count; i++) {
+               desc = irq_to_desc(i);
+               desc->chip_data = &cfg[i];
+               alloc_bootmem_cpumask_var(&cfg[i].domain);
+               alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+               if (i < NR_IRQS_LEGACY)
+                       cpumask_setall(cfg[i].domain);
+       }
+
+       return 0;
+}
+
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+       struct irq_cfg *cfg = NULL;
+       struct irq_desc *desc;
+
+       desc = irq_to_desc(irq);
+       if (desc)
+               cfg = desc->chip_data;
+
+       return cfg;
+}
+
+static struct irq_cfg *get_one_free_irq_cfg(int cpu)
+{
+       struct irq_cfg *cfg;
+       int node;
+
+       node = cpu_to_node(cpu);
+
+       cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+       if (cfg) {
+               if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
+                       kfree(cfg);
+                       cfg = NULL;
+               } else if (!alloc_cpumask_var_node(&cfg->old_domain,
+                                                         GFP_ATOMIC, node)) {
+                       free_cpumask_var(cfg->domain);
+                       kfree(cfg);
+                       cfg = NULL;
+               } else {
+                       cpumask_clear(cfg->domain);
+                       cpumask_clear(cfg->old_domain);
+               }
+       }
+
+       return cfg;
+}
+
+int arch_init_chip_data(struct irq_desc *desc, int cpu)
+{
+       struct irq_cfg *cfg;
+
+       cfg = desc->chip_data;
+       if (!cfg) {
+               desc->chip_data = get_one_free_irq_cfg(cpu);
+               if (!desc->chip_data) {
+                       printk(KERN_ERR "can not alloc irq_cfg\n");
+                       BUG_ON(1);
+               }
+       }
+
+       return 0;
+}
+
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+
+static void
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+{
+       struct irq_pin_list *old_entry, *head, *tail, *entry;
+
+       cfg->irq_2_pin = NULL;
+       old_entry = old_cfg->irq_2_pin;
+       if (!old_entry)
+               return;
+
+       entry = get_one_free_irq_2_pin(cpu);
+       if (!entry)
+               return;
+
+       entry->apic     = old_entry->apic;
+       entry->pin      = old_entry->pin;
+       head            = entry;
+       tail            = entry;
+       old_entry       = old_entry->next;
+       while (old_entry) {
+               entry = get_one_free_irq_2_pin(cpu);
+               if (!entry) {
+                       entry = head;
+                       while (entry) {
+                               head = entry->next;
+                               kfree(entry);
+                               entry = head;
+                       }
+                       /* still use the old one */
+                       return;
+               }
+               entry->apic     = old_entry->apic;
+               entry->pin      = old_entry->pin;
+               tail->next      = entry;
+               tail            = entry;
+               old_entry       = old_entry->next;
+       }
+
+       tail->next = NULL;
+       cfg->irq_2_pin = head;
+}
+
+static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
+{
+       struct irq_pin_list *entry, *next;
+
+       if (old_cfg->irq_2_pin == cfg->irq_2_pin)
+               return;
+
+       entry = old_cfg->irq_2_pin;
+
+       while (entry) {
+               next = entry->next;
+               kfree(entry);
+               entry = next;
+       }
+       old_cfg->irq_2_pin = NULL;
+}
+
+void arch_init_copy_chip_data(struct irq_desc *old_desc,
+                                struct irq_desc *desc, int cpu)
+{
+       struct irq_cfg *cfg;
+       struct irq_cfg *old_cfg;
+
+       cfg = get_one_free_irq_cfg(cpu);
+
+       if (!cfg)
+               return;
+
+       desc->chip_data = cfg;
+
+       old_cfg = old_desc->chip_data;
+
+       memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+
+       init_copy_irq_2_pin(old_cfg, cfg, cpu);
+}
+
+static void free_irq_cfg(struct irq_cfg *old_cfg)
+{
+       kfree(old_cfg);
+}
+
+void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+       struct irq_cfg *old_cfg, *cfg;
+
+       old_cfg = old_desc->chip_data;
+       cfg = desc->chip_data;
+
+       if (old_cfg == cfg)
+               return;
+
+       if (old_cfg) {
+               free_irq_2_pin(old_cfg, cfg);
+               free_irq_cfg(old_cfg);
+               old_desc->chip_data = NULL;
+       }
+}
+
+static void
+set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+       struct irq_cfg *cfg = desc->chip_data;
+
+       if (!cfg->move_in_progress) {
+               /* it means that domain is not changed */
+               if (!cpumask_intersects(desc->affinity, mask))
+                       cfg->move_desc_pending = 1;
+       }
+}
+#endif
+
+#else
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+       return irq < nr_irqs ? irq_cfgx + irq : NULL;
+}
+
+#endif
+
+#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
+static inline void
+set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+}
+#endif
+
+struct io_apic {
+       unsigned int index;
+       unsigned int unused[3];
+       unsigned int data;
+};
+
+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+{
+       return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
+               + (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
+}
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+       struct io_apic __iomem *io_apic = io_apic_base(apic);
+       writel(reg, &io_apic->index);
+       return readl(&io_apic->data);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+       struct io_apic __iomem *io_apic = io_apic_base(apic);
+       writel(reg, &io_apic->index);
+       writel(value, &io_apic->data);
+}
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index register
+ */
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
+{
+       struct io_apic __iomem *io_apic = io_apic_base(apic);
+
+       if (sis_apic_bug)
+               writel(reg, &io_apic->index);
+       writel(value, &io_apic->data);
+}
+
+static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
+{
+       struct irq_pin_list *entry;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       entry = cfg->irq_2_pin;
+       for (;;) {
+               unsigned int reg;
+               int pin;
+
+               if (!entry)
+                       break;
+               pin = entry->pin;
+               reg = io_apic_read(entry->apic, 0x10 + pin*2);
+               /* Is the remote IRR bit set? */
+               if (reg & IO_APIC_REDIR_REMOTE_IRR) {
+                       spin_unlock_irqrestore(&ioapic_lock, flags);
+                       return true;
+               }
+               if (!entry->next)
+                       break;
+               entry = entry->next;
+       }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return false;
+}
+
+union entry_union {
+       struct { u32 w1, w2; };
+       struct IO_APIC_route_entry entry;
+};
+
+static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
+{
+       union entry_union eu;
+       unsigned long flags;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
+       eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+       return eu.entry;
+}
+
+/*
+ * When we write a new IO APIC routing entry, we need to write the high
+ * word first! If the mask bit in the low word is clear, we will enable
+ * the interrupt, and we need to make sure the entry is fully populated
+ * before that happens.
+ */
+static void
+__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+       union entry_union eu;
+       eu.entry = e;
+       io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+       io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+}
+
+void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+       unsigned long flags;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __ioapic_write_entry(apic, pin, e);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * When we mask an IO APIC routing entry, we need to write the low
+ * word first, in order to set the mask bit before we change the
+ * high bits!
+ */
+static void ioapic_mask_entry(int apic, int pin)
+{
+       unsigned long flags;
+       union entry_union eu = { .entry.mask = 1 };
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+       io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+#ifdef CONFIG_SMP
+static void send_cleanup_vector(struct irq_cfg *cfg)
+{
+       cpumask_var_t cleanup_mask;
+
+       if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+               unsigned int i;
+               cfg->move_cleanup_count = 0;
+               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+                       cfg->move_cleanup_count++;
+               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+                       apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+       } else {
+               cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+               cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+               apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+               free_cpumask_var(cleanup_mask);
+       }
+       cfg->move_in_progress = 0;
+}
+
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+{
+       int apic, pin;
+       struct irq_pin_list *entry;
+       u8 vector = cfg->vector;
+
+       entry = cfg->irq_2_pin;
+       for (;;) {
+               unsigned int reg;
+
+               if (!entry)
+                       break;
+
+               apic = entry->apic;
+               pin = entry->pin;
+#ifdef CONFIG_INTR_REMAP
+               /*
+                * With interrupt-remapping, destination information comes
+                * from interrupt-remapping table entry.
+                */
+               if (!irq_remapped(irq))
+                       io_apic_write(apic, 0x11 + pin*2, dest);
+#else
+               io_apic_write(apic, 0x11 + pin*2, dest);
+#endif
+               reg = io_apic_read(apic, 0x10 + pin*2);
+               reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+               reg |= vector;
+               io_apic_modify(apic, 0x10 + pin*2, reg);
+               if (!entry->next)
+                       break;
+               entry = entry->next;
+       }
+}
+
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
+
+/*
+ * Either sets desc->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
+ * leaves desc->affinity untouched.
+ */
+static unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+{
+       struct irq_cfg *cfg;
+       unsigned int irq;
+
+       if (!cpumask_intersects(mask, cpu_online_mask))
+               return BAD_APICID;
+
+       irq = desc->irq;
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, mask))
+               return BAD_APICID;
+
+       cpumask_and(desc->affinity, cfg->domain, mask);
+       set_extra_move_desc(desc, mask);
+
+       return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask);
+}
+
+static void
+set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+       struct irq_cfg *cfg;
+       unsigned long flags;
+       unsigned int dest;
+       unsigned int irq;
+
+       irq = desc->irq;
+       cfg = desc->chip_data;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       dest = set_desc_affinity(desc, mask);
+       if (dest != BAD_APICID) {
+               /* Only the high 8 bits are valid. */
+               dest = SET_APIC_LOGICAL_ID(dest);
+               __target_IO_APIC_irq(irq, dest, cfg);
+       }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
+{
+       struct irq_desc *desc;
+
+       desc = irq_to_desc(irq);
+
+       set_ioapic_affinity_irq_desc(desc, mask);
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+ * shared ISA-space IRQs, so we have to support them. We are super
+ * fast in the common case, and fast for shared ISA-space IRQs.
+ */
+static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
+{
+       struct irq_pin_list *entry;
+
+       entry = cfg->irq_2_pin;
+       if (!entry) {
+               entry = get_one_free_irq_2_pin(cpu);
+               if (!entry) {
+                       printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
+                                       apic, pin);
+                       return;
+               }
+               cfg->irq_2_pin = entry;
+               entry->apic = apic;
+               entry->pin = pin;
+               return;
+       }
+
+       while (entry->next) {
+               /* not again, please */
+               if (entry->apic == apic && entry->pin == pin)
+                       return;
+
+               entry = entry->next;
+       }
+
+       entry->next = get_one_free_irq_2_pin(cpu);
+       entry = entry->next;
+       entry->apic = apic;
+       entry->pin = pin;
+}
+
+/*
+ * Reroute an IRQ to a different pin.
+ */
+static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
+                                     int oldapic, int oldpin,
+                                     int newapic, int newpin)
+{
+       struct irq_pin_list *entry = cfg->irq_2_pin;
+       int replaced = 0;
+
+       while (entry) {
+               if (entry->apic == oldapic && entry->pin == oldpin) {
+                       entry->apic = newapic;
+                       entry->pin = newpin;
+                       replaced = 1;
+                       /* every one is different, right? */
+                       break;
+               }
+               entry = entry->next;
+       }
+
+       /* why? call replace before add? */
+       if (!replaced)
+               add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
+}
+
+static inline void io_apic_modify_irq(struct irq_cfg *cfg,
+                               int mask_and, int mask_or,
+                               void (*final)(struct irq_pin_list *entry))
+{
+       int pin;
+       struct irq_pin_list *entry;
+
+       for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+               unsigned int reg;
+               pin = entry->pin;
+               reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+               reg &= mask_and;
+               reg |= mask_or;
+               io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+               if (final)
+                       final(entry);
+       }
+}
+
+static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
+{
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
+}
+
+#ifdef CONFIG_X86_64
+static void io_apic_sync(struct irq_pin_list *entry)
+{
+       /*
+        * Synchronize the IO-APIC and the CPU by doing
+        * a dummy read from the IO-APIC
+        */
+       struct io_apic __iomem *io_apic;
+       io_apic = io_apic_base(entry->apic);
+       readl(&io_apic->data);
+}
+
+static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
+{
+       io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+}
+#else /* CONFIG_X86_32 */
+static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
+{
+       io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
+}
+
+static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
+{
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+                       IO_APIC_REDIR_MASKED, NULL);
+}
+
+static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
+{
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
+                       IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
+}
+#endif /* CONFIG_X86_32 */
+
+static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
+{
+       struct irq_cfg *cfg = desc->chip_data;
+       unsigned long flags;
+
+       BUG_ON(!cfg);
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __mask_IO_APIC_irq(cfg);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
+{
+       struct irq_cfg *cfg = desc->chip_data;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __unmask_IO_APIC_irq(cfg);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void mask_IO_APIC_irq(unsigned int irq)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       mask_IO_APIC_irq_desc(desc);
+}
+static void unmask_IO_APIC_irq(unsigned int irq)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       unmask_IO_APIC_irq_desc(desc);
+}
+
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+{
+       struct IO_APIC_route_entry entry;
+
+       /* Check delivery_mode to be sure we're not clearing an SMI pin */
+       entry = ioapic_read_entry(apic, pin);
+       if (entry.delivery_mode == dest_SMI)
+               return;
+       /*
+        * Disable it in the IO-APIC irq-routing table:
+        */
+       ioapic_mask_entry(apic, pin);
+}
+
+static void clear_IO_APIC (void)
+{
+       int apic, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++)
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+                       clear_IO_APIC_pin(apic, pin);
+}
+
+#ifdef CONFIG_X86_32
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+static int pirq_entries[MAX_PIRQS] = {
+       [0 ... MAX_PIRQS - 1] = -1
+};
+
+static int __init ioapic_pirq_setup(char *str)
+{
+       int i, max;
+       int ints[MAX_PIRQS+1];
+
+       get_options(str, ARRAY_SIZE(ints), ints);
+
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "PIRQ redirection, working around broken MP-BIOS.\n");
+       max = MAX_PIRQS;
+       if (ints[0] < MAX_PIRQS)
+               max = ints[0];
+
+       for (i = 0; i < max; i++) {
+               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                               "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+               /*
+                * PIRQs are mapped upside down, usually.
+                */
+               pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+       }
+       return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_INTR_REMAP
+/* I/O APIC RTE contents at the OS boot up */
+static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
+/*
+ * Saves and masks all the unmasked IO-APIC RTE's
+ */
+int save_mask_IO_APIC_setup(void)
+{
+       union IO_APIC_reg_01 reg_01;
+       unsigned long flags;
+       int apic, pin;
+
+       /*
+        * The number of IO-APIC IRQ registers (== #pins):
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_01.raw = io_apic_read(apic, 1);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+       }
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               early_ioapic_entries[apic] =
+                       kzalloc(sizeof(struct IO_APIC_route_entry) *
+                               nr_ioapic_registers[apic], GFP_KERNEL);
+               if (!early_ioapic_entries[apic])
+                       goto nomem;
+       }
+
+       for (apic = 0; apic < nr_ioapics; apic++)
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       struct IO_APIC_route_entry entry;
+
+                       entry = early_ioapic_entries[apic][pin] =
+                               ioapic_read_entry(apic, pin);
+                       if (!entry.mask) {
+                               entry.mask = 1;
+                               ioapic_write_entry(apic, pin, entry);
+                       }
+               }
+
+       return 0;
+
+nomem:
+       while (apic >= 0)
+               kfree(early_ioapic_entries[apic--]);
+       memset(early_ioapic_entries, 0,
+               ARRAY_SIZE(early_ioapic_entries));
+
+       return -ENOMEM;
+}
+
+void restore_IO_APIC_setup(void)
+{
+       int apic, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               if (!early_ioapic_entries[apic])
+                       break;
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+                       ioapic_write_entry(apic, pin,
+                                          early_ioapic_entries[apic][pin]);
+               kfree(early_ioapic_entries[apic]);
+               early_ioapic_entries[apic] = NULL;
+       }
+}
+
+void reinit_intr_remapped_IO_APIC(int intr_remapping)
+{
+       /*
+        * for now plain restore of previous settings.
+        * TBD: In the case of OS enabling interrupt-remapping,
+        * IO-APIC RTE's need to be setup to point to interrupt-remapping
+        * table entries. for now, do a plain restore, and wait for
+        * the setup_IO_APIC_irqs() to do proper initialization.
+        */
+       restore_IO_APIC_setup();
+}
+#endif
+
+/*
+ * Find the IRQ entry number of a certain pin.
+ */
+static int find_irq_entry(int apic, int pin, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++)
+               if (mp_irqs[i].irqtype == type &&
+                   (mp_irqs[i].dstapic == mp_ioapics[apic].apicid ||
+                    mp_irqs[i].dstapic == MP_APIC_ALL) &&
+                   mp_irqs[i].dstirq == pin)
+                       return i;
+
+       return -1;
+}
+
+/*
+ * Find the pin to which IRQ[irq] (ISA) is connected
+ */
+static int __init find_isa_irq_pin(int irq, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].srcbus;
+
+               if (test_bit(lbus, mp_bus_not_pci) &&
+                   (mp_irqs[i].irqtype == type) &&
+                   (mp_irqs[i].srcbusirq == irq))
+
+                       return mp_irqs[i].dstirq;
+       }
+       return -1;
+}
+
+static int __init find_isa_irq_apic(int irq, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].srcbus;
+
+               if (test_bit(lbus, mp_bus_not_pci) &&
+                   (mp_irqs[i].irqtype == type) &&
+                   (mp_irqs[i].srcbusirq == irq))
+                       break;
+       }
+       if (i < mp_irq_entries) {
+               int apic;
+               for(apic = 0; apic < nr_ioapics; apic++) {
+                       if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic)
+                               return apic;
+               }
+       }
+
+       return -1;
+}
+
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+static int pin_2_irq(int idx, int apic, int pin);
+
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+{
+       int apic, i, best_guess = -1;
+
+       apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+               bus, slot, pin);
+       if (test_bit(bus, mp_bus_not_pci)) {
+               apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+               return -1;
+       }
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].srcbus;
+
+               for (apic = 0; apic < nr_ioapics; apic++)
+                       if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
+                           mp_irqs[i].dstapic == MP_APIC_ALL)
+                               break;
+
+               if (!test_bit(lbus, mp_bus_not_pci) &&
+                   !mp_irqs[i].irqtype &&
+                   (bus == lbus) &&
+                   (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+                       int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
+
+                       if (!(apic || IO_APIC_IRQ(irq)))
+                               continue;
+
+                       if (pin == (mp_irqs[i].srcbusirq & 3))
+                               return irq;
+                       /*
+                        * Use the first all-but-pin matching entry as a
+                        * best-guess fuzzy result for broken mptables.
+                        */
+                       if (best_guess < 0)
+                               best_guess = irq;
+               }
+       }
+       return best_guess;
+}
+
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int EISA_ELCR(unsigned int irq)
+{
+       if (irq < NR_IRQS_LEGACY) {
+               unsigned int port = 0x4d0 + (irq >> 3);
+               return (inb(port) >> (irq & 7)) & 1;
+       }
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "Broken MPtable reports ISA irq %d\n", irq);
+       return 0;
+}
+
+#endif
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx)       (0)
+#define default_ISA_polarity(idx)      (0)
+
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value.  If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx)      (EISA_ELCR(mp_irqs[idx].srcbusirq))
+#define default_EISA_polarity(idx)     default_ISA_polarity(idx)
+
+/* PCI interrupts are always polarity one level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_PCI_trigger(idx)       (1)
+#define default_PCI_polarity(idx)      (1)
+
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx)       (1)
+#define default_MCA_polarity(idx)      default_ISA_polarity(idx)
+
+static int MPBIOS_polarity(int idx)
+{
+       int bus = mp_irqs[idx].srcbus;
+       int polarity;
+
+       /*
+        * Determine IRQ line polarity (high active or low active):
+        */
+       switch (mp_irqs[idx].irqflag & 3)
+       {
+               case 0: /* conforms, ie. bus-type dependent polarity */
+                       if (test_bit(bus, mp_bus_not_pci))
+                               polarity = default_ISA_polarity(idx);
+                       else
+                               polarity = default_PCI_polarity(idx);
+                       break;
+               case 1: /* high active */
+               {
+                       polarity = 0;
+                       break;
+               }
+               case 2: /* reserved */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       polarity = 1;
+                       break;
+               }
+               case 3: /* low active */
+               {
+                       polarity = 1;
+                       break;
+               }
+               default: /* invalid */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       polarity = 1;
+                       break;
+               }
+       }
+       return polarity;
+}
+
+static int MPBIOS_trigger(int idx)
+{
+       int bus = mp_irqs[idx].srcbus;
+       int trigger;
+
+       /*
+        * Determine IRQ trigger mode (edge or level sensitive):
+        */
+       switch ((mp_irqs[idx].irqflag>>2) & 3)
+       {
+               case 0: /* conforms, ie. bus-type dependent */
+                       if (test_bit(bus, mp_bus_not_pci))
+                               trigger = default_ISA_trigger(idx);
+                       else
+                               trigger = default_PCI_trigger(idx);
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+                       switch (mp_bus_id_to_type[bus]) {
+                               case MP_BUS_ISA: /* ISA pin */
+                               {
+                                       /* set before the switch */
+                                       break;
+                               }
+                               case MP_BUS_EISA: /* EISA pin */
+                               {
+                                       trigger = default_EISA_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_PCI: /* PCI pin */
+                               {
+                                       /* set before the switch */
+                                       break;
+                               }
+                               case MP_BUS_MCA: /* MCA pin */
+                               {
+                                       trigger = default_MCA_trigger(idx);
+                                       break;
+                               }
+                               default:
+                               {
+                                       printk(KERN_WARNING "broken BIOS!!\n");
+                                       trigger = 1;
+                                       break;
+                               }
+                       }
+#endif
+                       break;
+               case 1: /* edge */
+               {
+                       trigger = 0;
+                       break;
+               }
+               case 2: /* reserved */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       trigger = 1;
+                       break;
+               }
+               case 3: /* level */
+               {
+                       trigger = 1;
+                       break;
+               }
+               default: /* invalid */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       trigger = 0;
+                       break;
+               }
+       }
+       return trigger;
+}
+
+static inline int irq_polarity(int idx)
+{
+       return MPBIOS_polarity(idx);
+}
+
+static inline int irq_trigger(int idx)
+{
+       return MPBIOS_trigger(idx);
+}
+
+int (*ioapic_renumber_irq)(int ioapic, int irq);
+static int pin_2_irq(int idx, int apic, int pin)
+{
+       int irq, i;
+       int bus = mp_irqs[idx].srcbus;
+
+       /*
+        * Debugging check, we are in big trouble if this message pops up!
+        */
+       if (mp_irqs[idx].dstirq != pin)
+               printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+
+       if (test_bit(bus, mp_bus_not_pci)) {
+               irq = mp_irqs[idx].srcbusirq;
+       } else {
+               /*
+                * PCI IRQs are mapped in order
+                */
+               i = irq = 0;
+               while (i < apic)
+                       irq += nr_ioapic_registers[i++];
+               irq += pin;
+               /*
+                 * For MPS mode, so far only needed by ES7000 platform
+                 */
+               if (ioapic_renumber_irq)
+                       irq = ioapic_renumber_irq(apic, irq);
+       }
+
+#ifdef CONFIG_X86_32
+       /*
+        * PCI IRQ command line redirection. Yes, limits are hardcoded.
+        */
+       if ((pin >= 16) && (pin <= 23)) {
+               if (pirq_entries[pin-16] != -1) {
+                       if (!pirq_entries[pin-16]) {
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               "disabling PIRQ%d\n", pin-16);
+                       } else {
+                               irq = pirq_entries[pin-16];
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               "using PIRQ%d -> IRQ %d\n",
+                                               pin-16, irq);
+                       }
+               }
+       }
+#endif
+
+       return irq;
+}
+
+void lock_vector_lock(void)
+{
+       /* Used to the online set of cpus does not change
+        * during assign_irq_vector.
+        */
+       spin_lock(&vector_lock);
+}
+
+void unlock_vector_lock(void)
+{
+       spin_unlock(&vector_lock);
+}
+
+static int
+__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+{
+       /*
+        * NOTE! The local APIC isn't very good at handling
+        * multiple interrupts at the same interrupt level.
+        * As the interrupt level is determined by taking the
+        * vector number and shifting that right by 4, we
+        * want to spread these out a bit so that they don't
+        * all fall in the same interrupt level.
+        *
+        * Also, we've got to be careful not to trash gate
+        * 0x80, because int 0x80 is hm, kind of importantish. ;)
+        */
+       static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+       unsigned int old_vector;
+       int cpu, err;
+       cpumask_var_t tmp_mask;
+
+       if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+               return -EBUSY;
+
+       if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+               return -ENOMEM;
+
+       old_vector = cfg->vector;
+       if (old_vector) {
+               cpumask_and(tmp_mask, mask, cpu_online_mask);
+               cpumask_and(tmp_mask, cfg->domain, tmp_mask);
+               if (!cpumask_empty(tmp_mask)) {
+                       free_cpumask_var(tmp_mask);
+                       return 0;
+               }
+       }
+
+       /* Only try and allocate irqs on cpus that are present */
+       err = -ENOSPC;
+       for_each_cpu_and(cpu, mask, cpu_online_mask) {
+               int new_cpu;
+               int vector, offset;
+
+               apic->vector_allocation_domain(cpu, tmp_mask);
+
+               vector = current_vector;
+               offset = current_offset;
+next:
+               vector += 8;
+               if (vector >= first_system_vector) {
+                       /* If out of vectors on large boxen, must share them. */
+                       offset = (offset + 1) % 8;
+                       vector = FIRST_DEVICE_VECTOR + offset;
+               }
+               if (unlikely(current_vector == vector))
+                       continue;
+
+               if (test_bit(vector, used_vectors))
+                       goto next;
+
+               for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
+                       if (per_cpu(vector_irq, new_cpu)[vector] != -1)
+                               goto next;
+               /* Found one! */
+               current_vector = vector;
+               current_offset = offset;
+               if (old_vector) {
+                       cfg->move_in_progress = 1;
+                       cpumask_copy(cfg->old_domain, cfg->domain);
+               }
+               for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
+                       per_cpu(vector_irq, new_cpu)[vector] = irq;
+               cfg->vector = vector;
+               cpumask_copy(cfg->domain, tmp_mask);
+               err = 0;
+               break;
+       }
+       free_cpumask_var(tmp_mask);
+       return err;
+}
+
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+{
+       int err;
+       unsigned long flags;
+
+       spin_lock_irqsave(&vector_lock, flags);
+       err = __assign_irq_vector(irq, cfg, mask);
+       spin_unlock_irqrestore(&vector_lock, flags);
+       return err;
+}
+
+static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
+{
+       int cpu, vector;
+
+       BUG_ON(!cfg->vector);
+
+       vector = cfg->vector;
+       for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
+               per_cpu(vector_irq, cpu)[vector] = -1;
+
+       cfg->vector = 0;
+       cpumask_clear(cfg->domain);
+
+       if (likely(!cfg->move_in_progress))
+               return;
+       for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
+               for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
+                                                               vector++) {
+                       if (per_cpu(vector_irq, cpu)[vector] != irq)
+                               continue;
+                       per_cpu(vector_irq, cpu)[vector] = -1;
+                       break;
+               }
+       }
+       cfg->move_in_progress = 0;
+}
+
+void __setup_vector_irq(int cpu)
+{
+       /* Initialize vector_irq on a new cpu */
+       /* This function must be called with vector_lock held */
+       int irq, vector;
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
+
+       /* Mark the inuse vectors */
+       for_each_irq_desc(irq, desc) {
+               cfg = desc->chip_data;
+               if (!cpumask_test_cpu(cpu, cfg->domain))
+                       continue;
+               vector = cfg->vector;
+               per_cpu(vector_irq, cpu)[vector] = irq;
+       }
+       /* Mark the free vectors */
+       for (vector = 0; vector < NR_VECTORS; ++vector) {
+               irq = per_cpu(vector_irq, cpu)[vector];
+               if (irq < 0)
+                       continue;
+
+               cfg = irq_cfg(irq);
+               if (!cpumask_test_cpu(cpu, cfg->domain))
+                       per_cpu(vector_irq, cpu)[vector] = -1;
+       }
+}
+
+static struct irq_chip ioapic_chip;
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip;
+#endif
+
+#define IOAPIC_AUTO     -1
+#define IOAPIC_EDGE     0
+#define IOAPIC_LEVEL    1
+
+#ifdef CONFIG_X86_32
+static inline int IO_APIC_irq_trigger(int irq)
+{
+       int apic, idx, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       idx = find_irq_entry(apic, pin, mp_INT);
+                       if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+                               return irq_trigger(idx);
+               }
+       }
+       /*
+         * nonexistent IRQs are edge default
+         */
+       return 0;
+}
+#else
+static inline int IO_APIC_irq_trigger(int irq)
+{
+       return 1;
+}
+#endif
+
+static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
+{
+
+       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+           trigger == IOAPIC_LEVEL)
+               desc->status |= IRQ_LEVEL;
+       else
+               desc->status &= ~IRQ_LEVEL;
+
+#ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               desc->status |= IRQ_MOVE_PCNTXT;
+               if (trigger)
+                       set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+                                                     handle_fasteoi_irq,
+                                                    "fasteoi");
+               else
+                       set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+                                                     handle_edge_irq, "edge");
+               return;
+       }
+#endif
+       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+           trigger == IOAPIC_LEVEL)
+               set_irq_chip_and_handler_name(irq, &ioapic_chip,
+                                             handle_fasteoi_irq,
+                                             "fasteoi");
+       else
+               set_irq_chip_and_handler_name(irq, &ioapic_chip,
+                                             handle_edge_irq, "edge");
+}
+
+int setup_ioapic_entry(int apic_id, int irq,
+                      struct IO_APIC_route_entry *entry,
+                      unsigned int destination, int trigger,
+                      int polarity, int vector)
+{
+       /*
+        * add it to the IO-APIC irq-routing table:
+        */
+       memset(entry,0,sizeof(*entry));
+
+#ifdef CONFIG_INTR_REMAP
+       if (intr_remapping_enabled) {
+               struct intel_iommu *iommu = map_ioapic_to_ir(apic_id);
+               struct irte irte;
+               struct IR_IO_APIC_route_entry *ir_entry =
+                       (struct IR_IO_APIC_route_entry *) entry;
+               int index;
+
+               if (!iommu)
+                       panic("No mapping iommu for ioapic %d\n", apic_id);
+
+               index = alloc_irte(iommu, irq, 1);
+               if (index < 0)
+                       panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
+
+               memset(&irte, 0, sizeof(irte));
+
+               irte.present = 1;
+               irte.dst_mode = apic->irq_dest_mode;
+               irte.trigger_mode = trigger;
+               irte.dlvry_mode = apic->irq_delivery_mode;
+               irte.vector = vector;
+               irte.dest_id = IRTE_DEST(destination);
+
+               modify_irte(irq, &irte);
+
+               ir_entry->index2 = (index >> 15) & 0x1;
+               ir_entry->zero = 0;
+               ir_entry->format = 1;
+               ir_entry->index = (index & 0x7fff);
+       } else
+#endif
+       {
+               entry->delivery_mode = apic->irq_delivery_mode;
+               entry->dest_mode = apic->irq_dest_mode;
+               entry->dest = destination;
+       }
+
+       entry->mask = 0;                                /* enable IRQ */
+       entry->trigger = trigger;
+       entry->polarity = polarity;
+       entry->vector = vector;
+
+       /* Mask level triggered irqs.
+        * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+        */
+       if (trigger)
+               entry->mask = 1;
+       return 0;
+}
+
+static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc,
+                             int trigger, int polarity)
+{
+       struct irq_cfg *cfg;
+       struct IO_APIC_route_entry entry;
+       unsigned int dest;
+
+       if (!IO_APIC_IRQ(irq))
+               return;
+
+       cfg = desc->chip_data;
+
+       if (assign_irq_vector(irq, cfg, apic->target_cpus()))
+               return;
+
+       dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+
+       apic_printk(APIC_VERBOSE,KERN_DEBUG
+                   "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
+                   "IRQ %d Mode:%i Active:%i)\n",
+                   apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector,
+                   irq, trigger, polarity);
+
+
+       if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry,
+                              dest, trigger, polarity, cfg->vector)) {
+               printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+                      mp_ioapics[apic_id].apicid, pin);
+               __clear_irq_vector(irq, cfg);
+               return;
+       }
+
+       ioapic_register_intr(irq, desc, trigger);
+       if (irq < NR_IRQS_LEGACY)
+               disable_8259A_irq(irq);
+
+       ioapic_write_entry(apic_id, pin, entry);
+}
+
+static void __init setup_IO_APIC_irqs(void)
+{
+       int apic_id, pin, idx, irq;
+       int notcon = 0;
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       int cpu = boot_cpu_id;
+
+       apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+       for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
+               for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+
+                       idx = find_irq_entry(apic_id, pin, mp_INT);
+                       if (idx == -1) {
+                               if (!notcon) {
+                                       notcon = 1;
+                                       apic_printk(APIC_VERBOSE,
+                                               KERN_DEBUG " %d-%d",
+                                               mp_ioapics[apic_id].apicid, pin);
+                               } else
+                                       apic_printk(APIC_VERBOSE, " %d-%d",
+                                               mp_ioapics[apic_id].apicid, pin);
+                               continue;
+                       }
+                       if (notcon) {
+                               apic_printk(APIC_VERBOSE,
+                                       " (apicid-pin) not connected\n");
+                               notcon = 0;
+                       }
+
+                       irq = pin_2_irq(idx, apic_id, pin);
+
+                       /*
+                        * Skip the timer IRQ if there's a quirk handler
+                        * installed and if it returns 1:
+                        */
+                       if (apic->multi_timer_check &&
+                                       apic->multi_timer_check(apic_id, irq))
+                               continue;
+
+                       desc = irq_to_desc_alloc_cpu(irq, cpu);
+                       if (!desc) {
+                               printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+                               continue;
+                       }
+                       cfg = desc->chip_data;
+                       add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
+
+                       setup_IO_APIC_irq(apic_id, pin, irq, desc,
+                                       irq_trigger(idx), irq_polarity(idx));
+               }
+       }
+
+       if (notcon)
+               apic_printk(APIC_VERBOSE,
+                       " (apicid-pin) not connected\n");
+}
+
+/*
+ * Set up the timer pin, possibly with the 8259A-master behind.
+ */
+static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
+                                       int vector)
+{
+       struct IO_APIC_route_entry entry;
+
+#ifdef CONFIG_INTR_REMAP
+       if (intr_remapping_enabled)
+               return;
+#endif
+
+       memset(&entry, 0, sizeof(entry));
+
+       /*
+        * We use logical delivery to get the timer IRQ
+        * to the first CPU.
+        */
+       entry.dest_mode = apic->irq_dest_mode;
+       entry.mask = 0;                 /* don't mask IRQ for edge */
+       entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus());
+       entry.delivery_mode = apic->irq_delivery_mode;
+       entry.polarity = 0;
+       entry.trigger = 0;
+       entry.vector = vector;
+
+       /*
+        * The timer IRQ doesn't have to know that behind the
+        * scene we may have a 8259A-master in AEOI mode ...
+        */
+       set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
+
+       /*
+        * Add it to the IO-APIC irq-routing table:
+        */
+       ioapic_write_entry(apic_id, pin, entry);
+}
+
+
+__apicdebuginit(void) print_IO_APIC(void)
+{
+       int apic, i;
+       union IO_APIC_reg_00 reg_00;
+       union IO_APIC_reg_01 reg_01;
+       union IO_APIC_reg_02 reg_02;
+       union IO_APIC_reg_03 reg_03;
+       unsigned long flags;
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
+       unsigned int irq;
+
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
+       printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+       for (i = 0; i < nr_ioapics; i++)
+               printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+                      mp_ioapics[i].apicid, nr_ioapic_registers[i]);
+
+       /*
+        * We are a bit conservative about what we expect.  We have to
+        * know about every hardware change ASAP.
+        */
+       printk(KERN_INFO "testing the IO APIC.......................\n");
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_00.raw = io_apic_read(apic, 0);
+       reg_01.raw = io_apic_read(apic, 1);
+       if (reg_01.bits.version >= 0x10)
+               reg_02.raw = io_apic_read(apic, 2);
+       if (reg_01.bits.version >= 0x20)
+               reg_03.raw = io_apic_read(apic, 3);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       printk("\n");
+       printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
+       printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
+       printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
+       printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
+       printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
+
+       printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
+       printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
+
+       printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
+       printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
+
+       /*
+        * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+        * but the value of reg_02 is read as the previous read register
+        * value, so ignore it if reg_02 == reg_01.
+        */
+       if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
+               printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
+               printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
+       }
+
+       /*
+        * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+        * or reg_03, but the value of reg_0[23] is read as the previous read
+        * register value, so ignore it if reg_03 == reg_0[12].
+        */
+       if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+           reg_03.raw != reg_01.raw) {
+               printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+               printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
+       }
+
+       printk(KERN_DEBUG ".... IRQ redirection table:\n");
+
+       printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
+                         " Stat Dmod Deli Vect:   \n");
+
+       for (i = 0; i <= reg_01.bits.entries; i++) {
+               struct IO_APIC_route_entry entry;
+
+               entry = ioapic_read_entry(apic, i);
+
+               printk(KERN_DEBUG " %02x %03X ",
+                       i,
+                       entry.dest
+               );
+
+               printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
+                       entry.mask,
+                       entry.trigger,
+                       entry.irr,
+                       entry.polarity,
+                       entry.delivery_status,
+                       entry.dest_mode,
+                       entry.delivery_mode,
+                       entry.vector
+               );
+       }
+       }
+       printk(KERN_DEBUG "IRQ to pin mappings:\n");
+       for_each_irq_desc(irq, desc) {
+               struct irq_pin_list *entry;
+
+               cfg = desc->chip_data;
+               entry = cfg->irq_2_pin;
+               if (!entry)
+                       continue;
+               printk(KERN_DEBUG "IRQ%d ", irq);
+               for (;;) {
+                       printk("-> %d:%d", entry->apic, entry->pin);
+                       if (!entry->next)
+                               break;
+                       entry = entry->next;
+               }
+               printk("\n");
+       }
+
+       printk(KERN_INFO ".................................... done.\n");
+
+       return;
+}
+
+__apicdebuginit(void) print_APIC_bitfield(int base)
+{
+       unsigned int v;
+       int i, j;
+
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
+       printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
+       for (i = 0; i < 8; i++) {
+               v = apic_read(base + i*0x10);
+               for (j = 0; j < 32; j++) {
+                       if (v & (1<<j))
+                               printk("1");
+                       else
+                               printk("0");
+               }
+               printk("\n");
+       }
+}
+
+__apicdebuginit(void) print_local_APIC(void *dummy)
+{
+       unsigned int v, ver, maxlvt;
+       u64 icr;
+
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
+       printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+               smp_processor_id(), hard_smp_processor_id());
+       v = apic_read(APIC_ID);
+       printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
+       v = apic_read(APIC_LVR);
+       printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+       ver = GET_APIC_VERSION(v);
+       maxlvt = lapic_get_maxlvt();
+
+       v = apic_read(APIC_TASKPRI);
+       printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+       if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
+               if (!APIC_XAPIC(ver)) {
+                       v = apic_read(APIC_ARBPRI);
+                       printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+                              v & APIC_ARBPRI_MASK);
+               }
+               v = apic_read(APIC_PROCPRI);
+               printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+       }
+
+       /*
+        * Remote read supported only in the 82489DX and local APIC for
+        * Pentium processors.
+        */
+       if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+               v = apic_read(APIC_RRR);
+               printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+       }
+
+       v = apic_read(APIC_LDR);
+       printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
+       if (!x2apic_enabled()) {
+               v = apic_read(APIC_DFR);
+               printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+       }
+       v = apic_read(APIC_SPIV);
+       printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
+
+       printk(KERN_DEBUG "... APIC ISR field:\n");
+       print_APIC_bitfield(APIC_ISR);
+       printk(KERN_DEBUG "... APIC TMR field:\n");
+       print_APIC_bitfield(APIC_TMR);
+       printk(KERN_DEBUG "... APIC IRR field:\n");
+       print_APIC_bitfield(APIC_IRR);
+
+       if (APIC_INTEGRATED(ver)) {             /* !82489DX */
+               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+                       apic_write(APIC_ESR, 0);
+
+               v = apic_read(APIC_ESR);
+               printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+       }
+
+       icr = apic_icr_read();
+       printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
+       printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
+
+       v = apic_read(APIC_LVTT);
+       printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
+
+       if (maxlvt > 3) {                       /* PC is LVT#4. */
+               v = apic_read(APIC_LVTPC);
+               printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
+       }
+       v = apic_read(APIC_LVT0);
+       printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
+       v = apic_read(APIC_LVT1);
+       printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
+
+       if (maxlvt > 2) {                       /* ERR is LVT#3. */
+               v = apic_read(APIC_LVTERR);
+               printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
+       }
+
+       v = apic_read(APIC_TMICT);
+       printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
+       v = apic_read(APIC_TMCCT);
+       printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
+       v = apic_read(APIC_TDCR);
+       printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+       printk("\n");
+}
+
+__apicdebuginit(void) print_all_local_APICs(void)
+{
+       int cpu;
+
+       preempt_disable();
+       for_each_online_cpu(cpu)
+               smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+       preempt_enable();
+}
+
+__apicdebuginit(void) print_PIC(void)
+{
+       unsigned int v;
+       unsigned long flags;
+
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
+       printk(KERN_DEBUG "\nprinting PIC contents\n");
+
+       spin_lock_irqsave(&i8259A_lock, flags);
+
+       v = inb(0xa1) << 8 | inb(0x21);
+       printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
+
+       v = inb(0xa0) << 8 | inb(0x20);
+       printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
+
+       outb(0x0b,0xa0);
+       outb(0x0b,0x20);
+       v = inb(0xa0) << 8 | inb(0x20);
+       outb(0x0a,0xa0);
+       outb(0x0a,0x20);
+
+       spin_unlock_irqrestore(&i8259A_lock, flags);
+
+       printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
+
+       v = inb(0x4d1) << 8 | inb(0x4d0);
+       printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+}
+
+__apicdebuginit(int) print_all_ICs(void)
+{
+       print_PIC();
+       print_all_local_APICs();
+       print_IO_APIC();
+
+       return 0;
+}
+
+fs_initcall(print_all_ICs);
+
+
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
+void __init enable_IO_APIC(void)
+{
+       union IO_APIC_reg_01 reg_01;
+       int i8259_apic, i8259_pin;
+       int apic;
+       unsigned long flags;
+
+       /*
+        * The number of IO-APIC IRQ registers (== #pins):
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_01.raw = io_apic_read(apic, 1);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+       }
+       for(apic = 0; apic < nr_ioapics; apic++) {
+               int pin;
+               /* See if any of the pins is in ExtINT mode */
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       struct IO_APIC_route_entry entry;
+                       entry = ioapic_read_entry(apic, pin);
+
+                       /* If the interrupt line is enabled and in ExtInt mode
+                        * I have found the pin where the i8259 is connected.
+                        */
+                       if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
+                               ioapic_i8259.apic = apic;
+                               ioapic_i8259.pin  = pin;
+                               goto found_i8259;
+                       }
+               }
+       }
+ found_i8259:
+       /* Look to see what if the MP table has reported the ExtINT */
+       /* If we could not find the appropriate pin by looking at the ioapic
+        * the i8259 probably is not connected the ioapic but give the
+        * mptable a chance anyway.
+        */
+       i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
+       i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
+       /* Trust the MP table if nothing is setup in the hardware */
+       if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
+               printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
+               ioapic_i8259.pin  = i8259_pin;
+               ioapic_i8259.apic = i8259_apic;
+       }
+       /* Complain if the MP table and the hardware disagree */
+       if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
+               (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
+       {
+               printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
+       }
+
+       /*
+        * Do not trust the IO-APIC being empty at bootup
+        */
+       clear_IO_APIC();
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
+       /*
+        * Clear the IO-APIC before rebooting:
+        */
+       clear_IO_APIC();
+
+       /*
+        * If the i8259 is routed through an IOAPIC
+        * Put that IOAPIC in virtual wire mode
+        * so legacy interrupts can be delivered.
+        */
+       if (ioapic_i8259.pin != -1) {
+               struct IO_APIC_route_entry entry;
+
+               memset(&entry, 0, sizeof(entry));
+               entry.mask            = 0; /* Enabled */
+               entry.trigger         = 0; /* Edge */
+               entry.irr             = 0;
+               entry.polarity        = 0; /* High */
+               entry.delivery_status = 0;
+               entry.dest_mode       = 0; /* Physical */
+               entry.delivery_mode   = dest_ExtINT; /* ExtInt */
+               entry.vector          = 0;
+               entry.dest            = read_apic_id();
+
+               /*
+                * Add it to the IO-APIC irq-routing table:
+                */
+               ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
+       }
+
+       disconnect_bsp_APIC(ioapic_i8259.pin != -1);
+}
+
+#ifdef CONFIG_X86_32
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
+ */
+
+static void __init setup_ioapic_ids_from_mpc(void)
+{
+       union IO_APIC_reg_00 reg_00;
+       physid_mask_t phys_id_present_map;
+       int apic_id;
+       int i;
+       unsigned char old_id;
+       unsigned long flags;
+
+       if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
+               return;
+
+       /*
+        * Don't check I/O APIC IDs for xAPIC systems.  They have
+        * no meaning without the serial APIC bus.
+        */
+       if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+               || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+               return;
+       /*
+        * This is broken; anything with a real cpu count has to
+        * circumvent this idiocy regardless.
+        */
+       phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
+
+       /*
+        * Set the IOAPIC ID to the value stored in the MPC table.
+        */
+       for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
+
+               /* Read the register 0 value */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_00.raw = io_apic_read(apic_id, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               old_id = mp_ioapics[apic_id].apicid;
+
+               if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) {
+                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+                               apic_id, mp_ioapics[apic_id].apicid);
+                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+                               reg_00.bits.ID);
+                       mp_ioapics[apic_id].apicid = reg_00.bits.ID;
+               }
+
+               /*
+                * Sanity check, is the ID really free? Every APIC in a
+                * system must have a unique ID or we get lots of nice
+                * 'stuck on smp_invalidate_needed IPI wait' messages.
+                */
+               if (apic->check_apicid_used(phys_id_present_map,
+                                       mp_ioapics[apic_id].apicid)) {
+                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+                               apic_id, mp_ioapics[apic_id].apicid);
+                       for (i = 0; i < get_physical_broadcast(); i++)
+                               if (!physid_isset(i, phys_id_present_map))
+                                       break;
+                       if (i >= get_physical_broadcast())
+                               panic("Max APIC ID exceeded!\n");
+                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+                               i);
+                       physid_set(i, phys_id_present_map);
+                       mp_ioapics[apic_id].apicid = i;
+               } else {
+                       physid_mask_t tmp;
+                       tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
+                       apic_printk(APIC_VERBOSE, "Setting %d in the "
+                                       "phys_id_present_map\n",
+                                       mp_ioapics[apic_id].apicid);
+                       physids_or(phys_id_present_map, phys_id_present_map, tmp);
+               }
+
+
+               /*
+                * We need to adjust the IRQ routing table
+                * if the ID changed.
+                */
+               if (old_id != mp_ioapics[apic_id].apicid)
+                       for (i = 0; i < mp_irq_entries; i++)
+                               if (mp_irqs[i].dstapic == old_id)
+                                       mp_irqs[i].dstapic
+                                               = mp_ioapics[apic_id].apicid;
+
+               /*
+                * Read the right value from the MPC table and
+                * write it into the ID register.
+                */
+               apic_printk(APIC_VERBOSE, KERN_INFO
+                       "...changing IO-APIC physical APIC ID to %d ...",
+                       mp_ioapics[apic_id].apicid);
+
+               reg_00.bits.ID = mp_ioapics[apic_id].apicid;
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(apic_id, 0, reg_00.raw);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               /*
+                * Sanity check
+                */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_00.raw = io_apic_read(apic_id, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
+                       printk("could not set ID!\n");
+               else
+                       apic_printk(APIC_VERBOSE, " ok.\n");
+       }
+}
+#endif
+
+int no_timer_check __initdata;
+
+static int __init notimercheck(char *s)
+{
+       no_timer_check = 1;
+       return 1;
+}
+__setup("no_timer_check", notimercheck);
+
+/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+ *     - timer IRQ defaults to IO-APIC IRQ
+ *     - if this function detects that timer IRQs are defunct, then we fall
+ *       back to ISA timer IRQs
+ */
+static int __init timer_irq_works(void)
+{
+       unsigned long t1 = jiffies;
+       unsigned long flags;
+
+       if (no_timer_check)
+               return 1;
+
+       local_save_flags(flags);
+       local_irq_enable();
+       /* Let ten ticks pass... */
+       mdelay((10 * 1000) / HZ);
+       local_irq_restore(flags);
+
+       /*
+        * Expect a few ticks at least, to be sure some possible
+        * glue logic does not lock up after one or two first
+        * ticks in a non-ExtINT mode.  Also the local APIC
+        * might have cached one ExtINT interrupt.  Finally, at
+        * least one tick may be lost due to delays.
+        */
+
+       /* jiffies wrap? */
+       if (time_after(jiffies, t1 + 4))
+               return 1;
+       return 0;
+}
+
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+/*
+ * Edge triggered needs to resend any interrupt
+ * that was delayed but this is now handled in the device
+ * independent code.
+ */
+
+/*
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+ * return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
+ */
+
+static unsigned int startup_ioapic_irq(unsigned int irq)
+{
+       int was_pending = 0;
+       unsigned long flags;
+       struct irq_cfg *cfg;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       if (irq < NR_IRQS_LEGACY) {
+               disable_8259A_irq(irq);
+               if (i8259A_irq_pending(irq))
+                       was_pending = 1;
+       }
+       cfg = irq_cfg(irq);
+       __unmask_IO_APIC_irq(cfg);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return was_pending;
+}
+
+#ifdef CONFIG_X86_64
+static int ioapic_retrigger_irq(unsigned int irq)
+{
+
+       struct irq_cfg *cfg = irq_cfg(irq);
+       unsigned long flags;
+
+       spin_lock_irqsave(&vector_lock, flags);
+       apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
+       spin_unlock_irqrestore(&vector_lock, flags);
+
+       return 1;
+}
+#else
+static int ioapic_retrigger_irq(unsigned int irq)
+{
+       apic->send_IPI_self(irq_cfg(irq)->vector);
+
+       return 1;
+}
+#endif
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+
+#ifdef CONFIG_SMP
+
+#ifdef CONFIG_INTR_REMAP
+static void ir_irq_migration(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
+
+/*
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
+ *
+ * For edge triggered, irq migration is a simple atomic update(of vector
+ * and cpu destination) of IRTE and flush the hardware cache.
+ *
+ * For level triggered, we need to modify the io-apic RTE aswell with the update
+ * vector information, along with modifying IRTE with vector and destination.
+ * So irq migration for level triggered is little  bit more complex compared to
+ * edge triggered migration. But the good news is, we use the same algorithm
+ * for level triggered migration as we have today, only difference being,
+ * we now initiate the irq migration from process context instead of the
+ * interrupt context.
+ *
+ * In future, when we do a directed EOI (combined with cpu EOI broadcast
+ * suppression) to the IO-APIC, level triggered irq migration will also be
+ * as simple as edge triggered migration and we can do the irq migration
+ * with a simple atomic update to IO-APIC RTE.
+ */
+static void
+migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+       struct irq_cfg *cfg;
+       struct irte irte;
+       int modify_ioapic_rte;
+       unsigned int dest;
+       unsigned long flags;
+       unsigned int irq;
+
+       if (!cpumask_intersects(mask, cpu_online_mask))
+               return;
+
+       irq = desc->irq;
+       if (get_irte(irq, &irte))
+               return;
+
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, mask))
+               return;
+
+       set_extra_move_desc(desc, mask);
+
+       dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
+
+       modify_ioapic_rte = desc->status & IRQ_LEVEL;
+       if (modify_ioapic_rte) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               __target_IO_APIC_irq(irq, dest, cfg);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+       }
+
+       irte.vector = cfg->vector;
+       irte.dest_id = IRTE_DEST(dest);
+
+       /*
+        * Modified the IRTE and flushes the Interrupt entry cache.
+        */
+       modify_irte(irq, &irte);
+
+       if (cfg->move_in_progress)
+               send_cleanup_vector(cfg);
+
+       cpumask_copy(desc->affinity, mask);
+}
+
+static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
+{
+       int ret = -1;
+       struct irq_cfg *cfg = desc->chip_data;
+
+       mask_IO_APIC_irq_desc(desc);
+
+       if (io_apic_level_ack_pending(cfg)) {
+               /*
+                * Interrupt in progress. Migrating irq now will change the
+                * vector information in the IO-APIC RTE and that will confuse
+                * the EOI broadcast performed by cpu.
+                * So, delay the irq migration to the next instance.
+                */
+               schedule_delayed_work(&ir_migration_work, 1);
+               goto unmask;
+       }
+
+       /* everthing is clear. we have right of way */
+       migrate_ioapic_irq_desc(desc, desc->pending_mask);
+
+       ret = 0;
+       desc->status &= ~IRQ_MOVE_PENDING;
+       cpumask_clear(desc->pending_mask);
+
+unmask:
+       unmask_IO_APIC_irq_desc(desc);
+
+       return ret;
+}
+
+static void ir_irq_migration(struct work_struct *work)
+{
+       unsigned int irq;
+       struct irq_desc *desc;
+
+       for_each_irq_desc(irq, desc) {
+               if (desc->status & IRQ_MOVE_PENDING) {
+                       unsigned long flags;
+
+                       spin_lock_irqsave(&desc->lock, flags);
+                       if (!desc->chip->set_affinity ||
+                           !(desc->status & IRQ_MOVE_PENDING)) {
+                               desc->status &= ~IRQ_MOVE_PENDING;
+                               spin_unlock_irqrestore(&desc->lock, flags);
+                               continue;
+                       }
+
+                       desc->chip->set_affinity(irq, desc->pending_mask);
+                       spin_unlock_irqrestore(&desc->lock, flags);
+               }
+       }
+}
+
+/*
+ * Migrates the IRQ destination in the process context.
+ */
+static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+                                           const struct cpumask *mask)
+{
+       if (desc->status & IRQ_LEVEL) {
+               desc->status |= IRQ_MOVE_PENDING;
+               cpumask_copy(desc->pending_mask, mask);
+               migrate_irq_remapped_level_desc(desc);
+               return;
+       }
+
+       migrate_ioapic_irq_desc(desc, mask);
+}
+static void set_ir_ioapic_affinity_irq(unsigned int irq,
+                                      const struct cpumask *mask)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       set_ir_ioapic_affinity_irq_desc(desc, mask);
+}
+#endif
+
+asmlinkage void smp_irq_move_cleanup_interrupt(void)
+{
+       unsigned vector, me;
+
+       ack_APIC_irq();
+       exit_idle();
+       irq_enter();
+
+       me = smp_processor_id();
+       for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+               unsigned int irq;
+               struct irq_desc *desc;
+               struct irq_cfg *cfg;
+               irq = __get_cpu_var(vector_irq)[vector];
+
+               if (irq == -1)
+                       continue;
+
+               desc = irq_to_desc(irq);
+               if (!desc)
+                       continue;
+
+               cfg = irq_cfg(irq);
+               spin_lock(&desc->lock);
+               if (!cfg->move_cleanup_count)
+                       goto unlock;
+
+               if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+                       goto unlock;
+
+               __get_cpu_var(vector_irq)[vector] = -1;
+               cfg->move_cleanup_count--;
+unlock:
+               spin_unlock(&desc->lock);
+       }
+
+       irq_exit();
+}
+
+static void irq_complete_move(struct irq_desc **descp)
+{
+       struct irq_desc *desc = *descp;
+       struct irq_cfg *cfg = desc->chip_data;
+       unsigned vector, me;
+
+       if (likely(!cfg->move_in_progress)) {
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+               if (likely(!cfg->move_desc_pending))
+                       return;
+
+               /* domain has not changed, but affinity did */
+               me = smp_processor_id();
+               if (cpumask_test_cpu(me, desc->affinity)) {
+                       *descp = desc = move_irq_desc(desc, me);
+                       /* get the new one */
+                       cfg = desc->chip_data;
+                       cfg->move_desc_pending = 0;
+               }
+#endif
+               return;
+       }
+
+       vector = ~get_irq_regs()->orig_ax;
+       me = smp_processor_id();
+
+       if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+               *descp = desc = move_irq_desc(desc, me);
+               /* get the new one */
+               cfg = desc->chip_data;
+#endif
+               send_cleanup_vector(cfg);
+       }
+}
+#else
+static inline void irq_complete_move(struct irq_desc **descp) {}
+#endif
+
+#ifdef CONFIG_INTR_REMAP
+static void ack_x2apic_level(unsigned int irq)
+{
+       ack_x2APIC_irq();
+}
+
+static void ack_x2apic_edge(unsigned int irq)
+{
+       ack_x2APIC_irq();
+}
+
+#endif
+
+static void ack_apic_edge(unsigned int irq)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       irq_complete_move(&desc);
+       move_native_irq(irq);
+       ack_APIC_irq();
+}
+
+atomic_t irq_mis_count;
+
+static void ack_apic_level(unsigned int irq)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+#ifdef CONFIG_X86_32
+       unsigned long v;
+       int i;
+#endif
+       struct irq_cfg *cfg;
+       int do_unmask_irq = 0;
+
+       irq_complete_move(&desc);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+       /* If we are moving the irq we need to mask it */
+       if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
+               do_unmask_irq = 1;
+               mask_IO_APIC_irq_desc(desc);
+       }
+#endif
+
+#ifdef CONFIG_X86_32
+       /*
+       * It appears there is an erratum which affects at least version 0x11
+       * of I/O APIC (that's the 82093AA and cores integrated into various
+       * chipsets).  Under certain conditions a level-triggered interrupt is
+       * erroneously delivered as edge-triggered one but the respective IRR
+       * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+       * message but it will never arrive and further interrupts are blocked
+       * from the source.  The exact reason is so far unknown, but the
+       * phenomenon was observed when two consecutive interrupt requests
+       * from a given source get delivered to the same CPU and the source is
+       * temporarily disabled in between.
+       *
+       * A workaround is to simulate an EOI message manually.  We achieve it
+       * by setting the trigger mode to edge and then to level when the edge
+       * trigger mode gets detected in the TMR of a local APIC for a
+       * level-triggered interrupt.  We mask the source for the time of the
+       * operation to prevent an edge-triggered interrupt escaping meanwhile.
+       * The idea is from Manfred Spraul.  --macro
+       */
+       cfg = desc->chip_data;
+       i = cfg->vector;
+
+       v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+#endif
+
+       /*
+        * We must acknowledge the irq before we move it or the acknowledge will
+        * not propagate properly.
+        */
+       ack_APIC_irq();
+
+       /* Now we can move and renable the irq */
+       if (unlikely(do_unmask_irq)) {
+               /* Only migrate the irq if the ack has been received.
+                *
+                * On rare occasions the broadcast level triggered ack gets
+                * delayed going to ioapics, and if we reprogram the
+                * vector while Remote IRR is still set the irq will never
+                * fire again.
+                *
+                * To prevent this scenario we read the Remote IRR bit
+                * of the ioapic.  This has two effects.
+                * - On any sane system the read of the ioapic will
+                *   flush writes (and acks) going to the ioapic from
+                *   this cpu.
+                * - We get to see if the ACK has actually been delivered.
+                *
+                * Based on failed experiments of reprogramming the
+                * ioapic entry from outside of irq context starting
+                * with masking the ioapic entry and then polling until
+                * Remote IRR was clear before reprogramming the
+                * ioapic I don't trust the Remote IRR bit to be
+                * completey accurate.
+                *
+                * However there appears to be no other way to plug
+                * this race, so if the Remote IRR bit is not
+                * accurate and is causing problems then it is a hardware bug
+                * and you can go talk to the chipset vendor about it.
+                */
+               cfg = desc->chip_data;
+               if (!io_apic_level_ack_pending(cfg))
+                       move_masked_irq(irq);
+               unmask_IO_APIC_irq_desc(desc);
+       }
+
+#ifdef CONFIG_X86_32
+       if (!(v & (1 << (i & 0x1f)))) {
+               atomic_inc(&irq_mis_count);
+               spin_lock(&ioapic_lock);
+               __mask_and_edge_IO_APIC_irq(cfg);
+               __unmask_and_level_IO_APIC_irq(cfg);
+               spin_unlock(&ioapic_lock);
+       }
+#endif
+}
+
+static struct irq_chip ioapic_chip __read_mostly = {
+       .name           = "IO-APIC",
+       .startup        = startup_ioapic_irq,
+       .mask           = mask_IO_APIC_irq,
+       .unmask         = unmask_IO_APIC_irq,
+       .ack            = ack_apic_edge,
+       .eoi            = ack_apic_level,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_ioapic_affinity_irq,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip __read_mostly = {
+       .name           = "IR-IO-APIC",
+       .startup        = startup_ioapic_irq,
+       .mask           = mask_IO_APIC_irq,
+       .unmask         = unmask_IO_APIC_irq,
+       .ack            = ack_x2apic_edge,
+       .eoi            = ack_x2apic_level,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_ir_ioapic_affinity_irq,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+#endif
+
+static inline void init_IO_APIC_traps(void)
+{
+       int irq;
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+
+       /*
+        * NOTE! The local APIC isn't very good at handling
+        * multiple interrupts at the same interrupt level.
+        * As the interrupt level is determined by taking the
+        * vector number and shifting that right by 4, we
+        * want to spread these out a bit so that they don't
+        * all fall in the same interrupt level.
+        *
+        * Also, we've got to be careful not to trash gate
+        * 0x80, because int 0x80 is hm, kind of importantish. ;)
+        */
+       for_each_irq_desc(irq, desc) {
+               cfg = desc->chip_data;
+               if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
+                       /*
+                        * Hmm.. We don't have an entry for this,
+                        * so default to an old-fashioned 8259
+                        * interrupt if we can..
+                        */
+                       if (irq < NR_IRQS_LEGACY)
+                               make_8259A_irq(irq);
+                       else
+                               /* Strange. Oh, well.. */
+                               desc->chip = &no_irq_chip;
+               }
+       }
+}
+
+/*
+ * The local APIC irq-chip implementation:
+ */
+
+static void mask_lapic_irq(unsigned int irq)
+{
+       unsigned long v;
+
+       v = apic_read(APIC_LVT0);
+       apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+}
+
+static void unmask_lapic_irq(unsigned int irq)
+{
+       unsigned long v;
+
+       v = apic_read(APIC_LVT0);
+       apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
+
+static void ack_lapic_irq(unsigned int irq)
+{
+       ack_APIC_irq();
+}
+
+static struct irq_chip lapic_chip __read_mostly = {
+       .name           = "local-APIC",
+       .mask           = mask_lapic_irq,
+       .unmask         = unmask_lapic_irq,
+       .ack            = ack_lapic_irq,
+};
+
+static void lapic_register_intr(int irq, struct irq_desc *desc)
+{
+       desc->status &= ~IRQ_LEVEL;
+       set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+                                     "edge");
+}
+
+static void __init setup_nmi(void)
+{
+       /*
+        * Dirty trick to enable the NMI watchdog ...
+        * We put the 8259A master into AEOI mode and
+        * unmask on all local APICs LVT0 as NMI.
+        *
+        * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
+        * is from Maciej W. Rozycki - so we do not have to EOI from
+        * the NMI handler or the timer interrupt.
+        */
+       apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
+
+       enable_NMI_through_LVT0();
+
+       apic_printk(APIC_VERBOSE, " done.\n");
+}
+
+/*
+ * This looks a bit hackish but it's about the only one way of sending
+ * a few INTA cycles to 8259As and any associated glue logic.  ICR does
+ * not support the ExtINT mode, unfortunately.  We need to send these
+ * cycles as some i82489DX-based boards have glue logic that keeps the
+ * 8259A interrupt line asserted until INTA.  --macro
+ */
+static inline void __init unlock_ExtINT_logic(void)
+{
+       int apic, pin, i;
+       struct IO_APIC_route_entry entry0, entry1;
+       unsigned char save_control, save_freq_select;
+
+       pin  = find_isa_irq_pin(8, mp_INT);
+       if (pin == -1) {
+               WARN_ON_ONCE(1);
+               return;
+       }
+       apic = find_isa_irq_apic(8, mp_INT);
+       if (apic == -1) {
+               WARN_ON_ONCE(1);
+               return;
+       }
+
+       entry0 = ioapic_read_entry(apic, pin);
+       clear_IO_APIC_pin(apic, pin);
+
+       memset(&entry1, 0, sizeof(entry1));
+
+       entry1.dest_mode = 0;                   /* physical delivery */
+       entry1.mask = 0;                        /* unmask IRQ now */
+       entry1.dest = hard_smp_processor_id();
+       entry1.delivery_mode = dest_ExtINT;
+       entry1.polarity = entry0.polarity;
+       entry1.trigger = 0;
+       entry1.vector = 0;
+
+       ioapic_write_entry(apic, pin, entry1);
+
+       save_control = CMOS_READ(RTC_CONTROL);
+       save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+       CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+                  RTC_FREQ_SELECT);
+       CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+
+       i = 100;
+       while (i-- > 0) {
+               mdelay(10);
+               if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+                       i -= 10;
+       }
+
+       CMOS_WRITE(save_control, RTC_CONTROL);
+       CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+       clear_IO_APIC_pin(apic, pin);
+
+       ioapic_write_entry(apic, pin, entry0);
+}
+
+static int disable_timer_pin_1 __initdata;
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
+static int __init disable_timer_pin_setup(char *arg)
+{
+       disable_timer_pin_1 = 1;
+       return 0;
+}
+early_param("disable_timer_pin_1", disable_timer_pin_setup);
+
+int timer_through_8259 __initdata;
+
+/*
+ * This code may look a bit paranoid, but it's supposed to cooperate with
+ * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
+ * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
+ * fanatically on his truly buggy board.
+ *
+ * FIXME: really need to revamp this for all platforms.
+ */
+static inline void __init check_timer(void)
+{
+       struct irq_desc *desc = irq_to_desc(0);
+       struct irq_cfg *cfg = desc->chip_data;
+       int cpu = boot_cpu_id;
+       int apic1, pin1, apic2, pin2;
+       unsigned long flags;
+       int no_pin1 = 0;
+
+       local_irq_save(flags);
+
+       /*
+        * get/set the timer IRQ vector:
+        */
+       disable_8259A_irq(0);
+       assign_irq_vector(0, cfg, apic->target_cpus());
+
+       /*
+        * As IRQ0 is to be enabled in the 8259A, the virtual
+        * wire has to be disabled in the local APIC.  Also
+        * timer interrupts need to be acknowledged manually in
+        * the 8259A for the i82489DX when using the NMI
+        * watchdog as that APIC treats NMIs as level-triggered.
+        * The AEOI mode will finish them in the 8259A
+        * automatically.
+        */
+       apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+       init_8259A(1);
+#ifdef CONFIG_X86_32
+       {
+               unsigned int ver;
+
+               ver = apic_read(APIC_LVR);
+               ver = GET_APIC_VERSION(ver);
+               timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
+       }
+#endif
+
+       pin1  = find_isa_irq_pin(0, mp_INT);
+       apic1 = find_isa_irq_apic(0, mp_INT);
+       pin2  = ioapic_i8259.pin;
+       apic2 = ioapic_i8259.apic;
+
+       apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
+                   "apic1=%d pin1=%d apic2=%d pin2=%d\n",
+                   cfg->vector, apic1, pin1, apic2, pin2);
+
+       /*
+        * Some BIOS writers are clueless and report the ExtINTA
+        * I/O APIC input from the cascaded 8259A as the timer
+        * interrupt input.  So just in case, if only one pin
+        * was found above, try it both directly and through the
+        * 8259A.
+        */
+       if (pin1 == -1) {
+#ifdef CONFIG_INTR_REMAP
+               if (intr_remapping_enabled)
+                       panic("BIOS bug: timer not connected to IO-APIC");
+#endif
+               pin1 = pin2;
+               apic1 = apic2;
+               no_pin1 = 1;
+       } else if (pin2 == -1) {
+               pin2 = pin1;
+               apic2 = apic1;
+       }
+
+       if (pin1 != -1) {
+               /*
+                * Ok, does IRQ0 through the IOAPIC work?
+                */
+               if (no_pin1) {
+                       add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
+                       setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
+               } else {
+                       /* for edge trigger, setup_IO_APIC_irq already
+                        * leave it unmasked.
+                        * so only need to unmask if it is level-trigger
+                        * do we really have level trigger timer?
+                        */
+                       int idx;
+                       idx = find_irq_entry(apic1, pin1, mp_INT);
+                       if (idx != -1 && irq_trigger(idx))
+                               unmask_IO_APIC_irq_desc(desc);
+               }
+               if (timer_irq_works()) {
+                       if (nmi_watchdog == NMI_IO_APIC) {
+                               setup_nmi();
+                               enable_8259A_irq(0);
+                       }
+                       if (disable_timer_pin_1 > 0)
+                               clear_IO_APIC_pin(0, pin1);
+                       goto out;
+               }
+#ifdef CONFIG_INTR_REMAP
+               if (intr_remapping_enabled)
+                       panic("timer doesn't work through Interrupt-remapped IO-APIC");
+#endif
+               local_irq_disable();
+               clear_IO_APIC_pin(apic1, pin1);
+               if (!no_pin1)
+                       apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
+                                   "8254 timer not connected to IO-APIC\n");
+
+               apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
+                           "(IRQ0) through the 8259A ...\n");
+               apic_printk(APIC_QUIET, KERN_INFO
+                           "..... (found apic %d pin %d) ...\n", apic2, pin2);
+               /*
+                * legacy devices should be connected to IO APIC #0
+                */
+               replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
+               setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
+               enable_8259A_irq(0);
+               if (timer_irq_works()) {
+                       apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
+                       timer_through_8259 = 1;
+                       if (nmi_watchdog == NMI_IO_APIC) {
+                               disable_8259A_irq(0);
+                               setup_nmi();
+                               enable_8259A_irq(0);
+                       }
+                       goto out;
+               }
+               /*
+                * Cleanup, just in case ...
+                */
+               local_irq_disable();
+               disable_8259A_irq(0);
+               clear_IO_APIC_pin(apic2, pin2);
+               apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
+       }
+
+       if (nmi_watchdog == NMI_IO_APIC) {
+               apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
+                           "through the IO-APIC - disabling NMI Watchdog!\n");
+               nmi_watchdog = NMI_NONE;
+       }
+#ifdef CONFIG_X86_32
+       timer_ack = 0;
+#endif
+
+       apic_printk(APIC_QUIET, KERN_INFO
+                   "...trying to set up timer as Virtual Wire IRQ...\n");
+
+       lapic_register_intr(0, desc);
+       apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);     /* Fixed mode */
+       enable_8259A_irq(0);
+
+       if (timer_irq_works()) {
+               apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
+               goto out;
+       }
+       local_irq_disable();
+       disable_8259A_irq(0);
+       apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
+       apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
+
+       apic_printk(APIC_QUIET, KERN_INFO
+                   "...trying to set up timer as ExtINT IRQ...\n");
+
+       init_8259A(0);
+       make_8259A_irq(0);
+       apic_write(APIC_LVT0, APIC_DM_EXTINT);
+
+       unlock_ExtINT_logic();
+
+       if (timer_irq_works()) {
+               apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
+               goto out;
+       }
+       local_irq_disable();
+       apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
+       panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
+               "report.  Then try booting with the 'noapic' option.\n");
+out:
+       local_irq_restore(flags);
+}
+
+/*
+ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
+ * to devices.  However there may be an I/O APIC pin available for
+ * this interrupt regardless.  The pin may be left unconnected, but
+ * typically it will be reused as an ExtINT cascade interrupt for
+ * the master 8259A.  In the MPS case such a pin will normally be
+ * reported as an ExtINT interrupt in the MP table.  With ACPI
+ * there is no provision for ExtINT interrupts, and in the absence
+ * of an override it would be treated as an ordinary ISA I/O APIC
+ * interrupt, that is edge-triggered and unmasked by default.  We
+ * used to do this, but it caused problems on some systems because
+ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
+ * the same ExtINT cascade interrupt to drive the local APIC of the
+ * bootstrap processor.  Therefore we refrain from routing IRQ2 to
+ * the I/O APIC in all cases now.  No actual device should request
+ * it anyway.  --macro
+ */
+#define PIC_IRQS       (1 << PIC_CASCADE_IR)
+
+void __init setup_IO_APIC(void)
+{
+
+       /*
+        * calling enable_IO_APIC() is moved to setup_local_APIC for BP
+        */
+
+       io_apic_irqs = ~PIC_IRQS;
+
+       apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
+       /*
+         * Set up IO-APIC IRQ routing.
+         */
+#ifdef CONFIG_X86_32
+       if (!acpi_ioapic)
+               setup_ioapic_ids_from_mpc();
+#endif
+       sync_Arb_IDs();
+       setup_IO_APIC_irqs();
+       init_IO_APIC_traps();
+       check_timer();
+}
+
+/*
+ *      Called after all the initialization is done. If we didnt find any
+ *      APIC bugs then we can allow the modify fast path
+ */
+
+static int __init io_apic_bug_finalize(void)
+{
+       if (sis_apic_bug == -1)
+               sis_apic_bug = 0;
+       return 0;
+}
+
+late_initcall(io_apic_bug_finalize);
+
+struct sysfs_ioapic_data {
+       struct sys_device dev;
+       struct IO_APIC_route_entry entry[0];
+};
+static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
+
+static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
+{
+       struct IO_APIC_route_entry *entry;
+       struct sysfs_ioapic_data *data;
+       int i;
+
+       data = container_of(dev, struct sysfs_ioapic_data, dev);
+       entry = data->entry;
+       for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
+               *entry = ioapic_read_entry(dev->id, i);
+
+       return 0;
+}
+
+static int ioapic_resume(struct sys_device *dev)
+{
+       struct IO_APIC_route_entry *entry;
+       struct sysfs_ioapic_data *data;
+       unsigned long flags;
+       union IO_APIC_reg_00 reg_00;
+       int i;
+
+       data = container_of(dev, struct sysfs_ioapic_data, dev);
+       entry = data->entry;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_00.raw = io_apic_read(dev->id, 0);
+       if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
+               reg_00.bits.ID = mp_ioapics[dev->id].apicid;
+               io_apic_write(dev->id, 0, reg_00.raw);
+       }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+       for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
+               ioapic_write_entry(dev->id, i, entry[i]);
+
+       return 0;
+}
+
+static struct sysdev_class ioapic_sysdev_class = {
+       .name = "ioapic",
+       .suspend = ioapic_suspend,
+       .resume = ioapic_resume,
+};
+
+static int __init ioapic_init_sysfs(void)
+{
+       struct sys_device * dev;
+       int i, size, error;
+
+       error = sysdev_class_register(&ioapic_sysdev_class);
+       if (error)
+               return error;
+
+       for (i = 0; i < nr_ioapics; i++ ) {
+               size = sizeof(struct sys_device) + nr_ioapic_registers[i]
+                       * sizeof(struct IO_APIC_route_entry);
+               mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
+               if (!mp_ioapic_data[i]) {
+                       printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+                       continue;
+               }
+               dev = &mp_ioapic_data[i]->dev;
+               dev->id = i;
+               dev->cls = &ioapic_sysdev_class;
+               error = sysdev_register(dev);
+               if (error) {
+                       kfree(mp_ioapic_data[i]);
+                       mp_ioapic_data[i] = NULL;
+                       printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+                       continue;
+               }
+       }
+
+       return 0;
+}
+
+device_initcall(ioapic_init_sysfs);
+
+static int nr_irqs_gsi = NR_IRQS_LEGACY;
+/*
+ * Dynamic irq allocate and deallocation
+ */
+unsigned int create_irq_nr(unsigned int irq_want)
+{
+       /* Allocate an unused irq */
+       unsigned int irq;
+       unsigned int new;
+       unsigned long flags;
+       struct irq_cfg *cfg_new = NULL;
+       int cpu = boot_cpu_id;
+       struct irq_desc *desc_new = NULL;
+
+       irq = 0;
+       if (irq_want < nr_irqs_gsi)
+               irq_want = nr_irqs_gsi;
+
+       spin_lock_irqsave(&vector_lock, flags);
+       for (new = irq_want; new < nr_irqs; new++) {
+               desc_new = irq_to_desc_alloc_cpu(new, cpu);
+               if (!desc_new) {
+                       printk(KERN_INFO "can not get irq_desc for %d\n", new);
+                       continue;
+               }
+               cfg_new = desc_new->chip_data;
+
+               if (cfg_new->vector != 0)
+                       continue;
+               if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
+                       irq = new;
+               break;
+       }
+       spin_unlock_irqrestore(&vector_lock, flags);
+
+       if (irq > 0) {
+               dynamic_irq_init(irq);
+               /* restore it, in case dynamic_irq_init clear it */
+               if (desc_new)
+                       desc_new->chip_data = cfg_new;
+       }
+       return irq;
+}
+
+int create_irq(void)
+{
+       unsigned int irq_want;
+       int irq;
+
+       irq_want = nr_irqs_gsi;
+       irq = create_irq_nr(irq_want);
+
+       if (irq == 0)
+               irq = -1;
+
+       return irq;
+}
+
+void destroy_irq(unsigned int irq)
+{
+       unsigned long flags;
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
+
+       /* store it, in case dynamic_irq_cleanup clear it */
+       desc = irq_to_desc(irq);
+       cfg = desc->chip_data;
+       dynamic_irq_cleanup(irq);
+       /* connect back irq_cfg */
+       if (desc)
+               desc->chip_data = cfg;
+
+#ifdef CONFIG_INTR_REMAP
+       free_irte(irq);
+#endif
+       spin_lock_irqsave(&vector_lock, flags);
+       __clear_irq_vector(irq, cfg);
+       spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+/*
+ * MSI message composition
+ */
+#ifdef CONFIG_PCI_MSI
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+{
+       struct irq_cfg *cfg;
+       int err;
+       unsigned dest;
+
+       if (disable_apic)
+               return -ENXIO;
+
+       cfg = irq_cfg(irq);
+       err = assign_irq_vector(irq, cfg, apic->target_cpus());
+       if (err)
+               return err;
+
+       dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+
+#ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               struct irte irte;
+               int ir_index;
+               u16 sub_handle;
+
+               ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+               BUG_ON(ir_index == -1);
+
+               memset (&irte, 0, sizeof(irte));
+
+               irte.present = 1;
+               irte.dst_mode = apic->irq_dest_mode;
+               irte.trigger_mode = 0; /* edge */
+               irte.dlvry_mode = apic->irq_delivery_mode;
+               irte.vector = cfg->vector;
+               irte.dest_id = IRTE_DEST(dest);
+
+               modify_irte(irq, &irte);
+
+               msg->address_hi = MSI_ADDR_BASE_HI;
+               msg->data = sub_handle;
+               msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+                                 MSI_ADDR_IR_SHV |
+                                 MSI_ADDR_IR_INDEX1(ir_index) |
+                                 MSI_ADDR_IR_INDEX2(ir_index);
+       } else
+#endif
+       {
+               msg->address_hi = MSI_ADDR_BASE_HI;
+               msg->address_lo =
+                       MSI_ADDR_BASE_LO |
+                       ((apic->irq_dest_mode == 0) ?
+                               MSI_ADDR_DEST_MODE_PHYSICAL:
+                               MSI_ADDR_DEST_MODE_LOGICAL) |
+                       ((apic->irq_delivery_mode != dest_LowestPrio) ?
+                               MSI_ADDR_REDIRECTION_CPU:
+                               MSI_ADDR_REDIRECTION_LOWPRI) |
+                       MSI_ADDR_DEST_ID(dest);
+
+               msg->data =
+                       MSI_DATA_TRIGGER_EDGE |
+                       MSI_DATA_LEVEL_ASSERT |
+                       ((apic->irq_delivery_mode != dest_LowestPrio) ?
+                               MSI_DATA_DELIVERY_FIXED:
+                               MSI_DATA_DELIVERY_LOWPRI) |
+                       MSI_DATA_VECTOR(cfg->vector);
+       }
+       return err;
+}
+
+#ifdef CONFIG_SMP
+static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg;
+       struct msi_msg msg;
+       unsigned int dest;
+
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
+               return;
+
+       cfg = desc->chip_data;
+
+       read_msi_msg_desc(desc, &msg);
+
+       msg.data &= ~MSI_DATA_VECTOR_MASK;
+       msg.data |= MSI_DATA_VECTOR(cfg->vector);
+       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+       msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+       write_msi_msg_desc(desc, &msg);
+}
+#ifdef CONFIG_INTR_REMAP
+/*
+ * Migrate the MSI irq to another cpumask. This migration is
+ * done in the process context using interrupt-remapping hardware.
+ */
+static void
+ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg = desc->chip_data;
+       unsigned int dest;
+       struct irte irte;
+
+       if (get_irte(irq, &irte))
+               return;
+
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
+               return;
+
+       irte.vector = cfg->vector;
+       irte.dest_id = IRTE_DEST(dest);
+
+       /*
+        * atomically update the IRTE with the new destination and vector.
+        */
+       modify_irte(irq, &irte);
+
+       /*
+        * After this point, all the interrupts will start arriving
+        * at the new destination. So, time to cleanup the previous
+        * vector allocation.
+        */
+       if (cfg->move_in_progress)
+               send_cleanup_vector(cfg);
+}
+
+#endif
+#endif /* CONFIG_SMP */
+
+/*
+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
+ * which implement the MSI or MSI-X Capability Structure.
+ */
+static struct irq_chip msi_chip = {
+       .name           = "PCI-MSI",
+       .unmask         = unmask_msi_irq,
+       .mask           = mask_msi_irq,
+       .ack            = ack_apic_edge,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_msi_irq_affinity,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip msi_ir_chip = {
+       .name           = "IR-PCI-MSI",
+       .unmask         = unmask_msi_irq,
+       .mask           = mask_msi_irq,
+       .ack            = ack_x2apic_edge,
+#ifdef CONFIG_SMP
+       .set_affinity   = ir_set_msi_irq_affinity,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+
+/*
+ * Map the PCI dev to the corresponding remapping hardware unit
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
+ * in it.
+ */
+static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
+{
+       struct intel_iommu *iommu;
+       int index;
+
+       iommu = map_dev_to_ir(dev);
+       if (!iommu) {
+               printk(KERN_ERR
+                      "Unable to map PCI %s to iommu\n", pci_name(dev));
+               return -ENOENT;
+       }
+
+       index = alloc_irte(iommu, irq, nvec);
+       if (index < 0) {
+               printk(KERN_ERR
+                      "Unable to allocate %d IRTE for PCI %s\n", nvec,
+                      pci_name(dev));
+               return -ENOSPC;
+       }
+       return index;
+}
+#endif
+
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
+{
+       int ret;
+       struct msi_msg msg;
+
+       ret = msi_compose_msg(dev, irq, &msg);
+       if (ret < 0)
+               return ret;
+
+       set_irq_msi(irq, msidesc);
+       write_msi_msg(irq, &msg);
+
+#ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               struct irq_desc *desc = irq_to_desc(irq);
+               /*
+                * irq migration in process context
+                */
+               desc->status |= IRQ_MOVE_PCNTXT;
+               set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
+       } else
+#endif
+               set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+
+       dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
+
+       return 0;
+}
+
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+       unsigned int irq;
+       int ret, sub_handle;
+       struct msi_desc *msidesc;
+       unsigned int irq_want;
+
+#ifdef CONFIG_INTR_REMAP
+       struct intel_iommu *iommu = 0;
+       int index = 0;
+#endif
+
+       irq_want = nr_irqs_gsi;
+       sub_handle = 0;
+       list_for_each_entry(msidesc, &dev->msi_list, list) {
+               irq = create_irq_nr(irq_want);
+               if (irq == 0)
+                       return -1;
+               irq_want = irq + 1;
+#ifdef CONFIG_INTR_REMAP
+               if (!intr_remapping_enabled)
+                       goto no_ir;
+
+               if (!sub_handle) {
+                       /*
+                        * allocate the consecutive block of IRTE's
+                        * for 'nvec'
+                        */
+                       index = msi_alloc_irte(dev, irq, nvec);
+                       if (index < 0) {
+                               ret = index;
+                               goto error;
+                       }
+               } else {
+                       iommu = map_dev_to_ir(dev);
+                       if (!iommu) {
+                               ret = -ENOENT;
+                               goto error;
+                       }
+                       /*
+                        * setup the mapping between the irq and the IRTE
+                        * base index, the sub_handle pointing to the
+                        * appropriate interrupt remap table entry.
+                        */
+                       set_irte_irq(irq, iommu, index, sub_handle);
+               }
+no_ir:
+#endif
+               ret = setup_msi_irq(dev, msidesc, irq);
+               if (ret < 0)
+                       goto error;
+               sub_handle++;
+       }
+       return 0;
+
+error:
+       destroy_irq(irq);
+       return ret;
+}
+
+void arch_teardown_msi_irq(unsigned int irq)
+{
+       destroy_irq(irq);
+}
+
+#ifdef CONFIG_DMAR
+#ifdef CONFIG_SMP
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg;
+       struct msi_msg msg;
+       unsigned int dest;
+
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
+               return;
+
+       cfg = desc->chip_data;
+
+       dmar_msi_read(irq, &msg);
+
+       msg.data &= ~MSI_DATA_VECTOR_MASK;
+       msg.data |= MSI_DATA_VECTOR(cfg->vector);
+       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+       msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+       dmar_msi_write(irq, &msg);
+}
+
+#endif /* CONFIG_SMP */
+
+struct irq_chip dmar_msi_type = {
+       .name = "DMAR_MSI",
+       .unmask = dmar_msi_unmask,
+       .mask = dmar_msi_mask,
+       .ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+       .set_affinity = dmar_msi_set_affinity,
+#endif
+       .retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+       int ret;
+       struct msi_msg msg;
+
+       ret = msi_compose_msg(NULL, irq, &msg);
+       if (ret < 0)
+               return ret;
+       dmar_msi_write(irq, &msg);
+       set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+               "edge");
+       return 0;
+}
+#endif
+
+#ifdef CONFIG_HPET_TIMER
+
+#ifdef CONFIG_SMP
+static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg;
+       struct msi_msg msg;
+       unsigned int dest;
+
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
+               return;
+
+       cfg = desc->chip_data;
+
+       hpet_msi_read(irq, &msg);
+
+       msg.data &= ~MSI_DATA_VECTOR_MASK;
+       msg.data |= MSI_DATA_VECTOR(cfg->vector);
+       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+       msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+       hpet_msi_write(irq, &msg);
+}
+
+#endif /* CONFIG_SMP */
+
+struct irq_chip hpet_msi_type = {
+       .name = "HPET_MSI",
+       .unmask = hpet_msi_unmask,
+       .mask = hpet_msi_mask,
+       .ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+       .set_affinity = hpet_msi_set_affinity,
+#endif
+       .retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_hpet_msi(unsigned int irq)
+{
+       int ret;
+       struct msi_msg msg;
+
+       ret = msi_compose_msg(NULL, irq, &msg);
+       if (ret < 0)
+               return ret;
+
+       hpet_msi_write(irq, &msg);
+       set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
+               "edge");
+
+       return 0;
+}
+#endif
+
+#endif /* CONFIG_PCI_MSI */
+/*
+ * Hypertransport interrupt support
+ */
+#ifdef CONFIG_HT_IRQ
+
+#ifdef CONFIG_SMP
+
+static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
+{
+       struct ht_irq_msg msg;
+       fetch_ht_irq_msg(irq, &msg);
+
+       msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
+       msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+
+       msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
+       msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
+
+       write_ht_irq_msg(irq, &msg);
+}
+
+static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg;
+       unsigned int dest;
+
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
+               return;
+
+       cfg = desc->chip_data;
+
+       target_ht_irq(irq, dest, cfg->vector);
+}
+
+#endif
+
+static struct irq_chip ht_irq_chip = {
+       .name           = "PCI-HT",
+       .mask           = mask_ht_irq,
+       .unmask         = unmask_ht_irq,
+       .ack            = ack_apic_edge,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_ht_irq_affinity,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+
+int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
+{
+       struct irq_cfg *cfg;
+       int err;
+
+       if (disable_apic)
+               return -ENXIO;
+
+       cfg = irq_cfg(irq);
+       err = assign_irq_vector(irq, cfg, apic->target_cpus());
+       if (!err) {
+               struct ht_irq_msg msg;
+               unsigned dest;
+
+               dest = apic->cpu_mask_to_apicid_and(cfg->domain,
+                                                   apic->target_cpus());
+
+               msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+
+               msg.address_lo =
+                       HT_IRQ_LOW_BASE |
+                       HT_IRQ_LOW_DEST_ID(dest) |
+                       HT_IRQ_LOW_VECTOR(cfg->vector) |
+                       ((apic->irq_dest_mode == 0) ?
+                               HT_IRQ_LOW_DM_PHYSICAL :
+                               HT_IRQ_LOW_DM_LOGICAL) |
+                       HT_IRQ_LOW_RQEOI_EDGE |
+                       ((apic->irq_delivery_mode != dest_LowestPrio) ?
+                               HT_IRQ_LOW_MT_FIXED :
+                               HT_IRQ_LOW_MT_ARBITRATED) |
+                       HT_IRQ_LOW_IRQ_MASKED;
+
+               write_ht_irq_msg(irq, &msg);
+
+               set_irq_chip_and_handler_name(irq, &ht_irq_chip,
+                                             handle_edge_irq, "edge");
+
+               dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
+       }
+       return err;
+}
+#endif /* CONFIG_HT_IRQ */
+
+#ifdef CONFIG_X86_UV
+/*
+ * Re-target the irq to the specified CPU and enable the specified MMR located
+ * on the specified blade to allow the sending of MSIs to the specified CPU.
+ */
+int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+                      unsigned long mmr_offset)
+{
+       const struct cpumask *eligible_cpu = cpumask_of(cpu);
+       struct irq_cfg *cfg;
+       int mmr_pnode;
+       unsigned long mmr_value;
+       struct uv_IO_APIC_route_entry *entry;
+       unsigned long flags;
+       int err;
+
+       cfg = irq_cfg(irq);
+
+       err = assign_irq_vector(irq, cfg, eligible_cpu);
+       if (err != 0)
+               return err;
+
+       spin_lock_irqsave(&vector_lock, flags);
+       set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+                                     irq_name);
+       spin_unlock_irqrestore(&vector_lock, flags);
+
+       mmr_value = 0;
+       entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
+       entry->vector = cfg->vector;
+       entry->delivery_mode = apic->irq_delivery_mode;
+       entry->dest_mode = apic->irq_dest_mode;
+       entry->polarity = 0;
+       entry->trigger = 0;
+       entry->mask = 0;
+       entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
+
+       mmr_pnode = uv_blade_to_pnode(mmr_blade);
+       uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+       return irq;
+}
+
+/*
+ * Disable the specified MMR located on the specified blade so that MSIs are
+ * longer allowed to be sent.
+ */
+void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
+{
+       unsigned long mmr_value;
+       struct uv_IO_APIC_route_entry *entry;
+       int mmr_pnode;
+
+       mmr_value = 0;
+       entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
+       entry->mask = 1;
+
+       mmr_pnode = uv_blade_to_pnode(mmr_blade);
+       uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+}
+#endif /* CONFIG_X86_64 */
+
+int __init io_apic_get_redir_entries (int ioapic)
+{
+       union IO_APIC_reg_01    reg_01;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_01.raw = io_apic_read(ioapic, 1);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return reg_01.bits.entries;
+}
+
+void __init probe_nr_irqs_gsi(void)
+{
+       int nr = 0;
+
+       nr = acpi_probe_gsi();
+       if (nr > nr_irqs_gsi) {
+               nr_irqs_gsi = nr;
+       } else {
+               /* for acpi=off or acpi is not compiled in */
+               int idx;
+
+               nr = 0;
+               for (idx = 0; idx < nr_ioapics; idx++)
+                       nr += io_apic_get_redir_entries(idx) + 1;
+
+               if (nr > nr_irqs_gsi)
+                       nr_irqs_gsi = nr;
+       }
+
+       printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
+}
+
+#ifdef CONFIG_SPARSE_IRQ
+int __init arch_probe_nr_irqs(void)
+{
+       int nr;
+
+       if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
+               nr_irqs = NR_VECTORS * nr_cpu_ids;
+
+       nr = nr_irqs_gsi + 8 * nr_cpu_ids;
+#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
+       /*
+        * for MSI and HT dyn irq
+        */
+       nr += nr_irqs_gsi * 16;
+#endif
+       if (nr < nr_irqs)
+               nr_irqs = nr;
+
+       return 0;
+}
+#endif
+
+/* --------------------------------------------------------------------------
+                          ACPI-based IOAPIC Configuration
+   -------------------------------------------------------------------------- */
+
+#ifdef CONFIG_ACPI
+
+#ifdef CONFIG_X86_32
+int __init io_apic_get_unique_id(int ioapic, int apic_id)
+{
+       union IO_APIC_reg_00 reg_00;
+       static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+       physid_mask_t tmp;
+       unsigned long flags;
+       int i = 0;
+
+       /*
+        * The P4 platform supports up to 256 APIC IDs on two separate APIC
+        * buses (one for LAPICs, one for IOAPICs), where predecessors only
+        * supports up to 16 on one shared APIC bus.
+        *
+        * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+        *      advantage of new APIC bus architecture.
+        */
+
+       if (physids_empty(apic_id_map))
+               apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_00.raw = io_apic_read(ioapic, 0);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       if (apic_id >= get_physical_broadcast()) {
+               printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+                       "%d\n", ioapic, apic_id, reg_00.bits.ID);
+               apic_id = reg_00.bits.ID;
+       }
+
+       /*
+        * Every APIC in a system must have a unique ID or we get lots of nice
+        * 'stuck on smp_invalidate_needed IPI wait' messages.
+        */
+       if (apic->check_apicid_used(apic_id_map, apic_id)) {
+
+               for (i = 0; i < get_physical_broadcast(); i++) {
+                       if (!apic->check_apicid_used(apic_id_map, i))
+                               break;
+               }
+
+               if (i == get_physical_broadcast())
+                       panic("Max apic_id exceeded!\n");
+
+               printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+                       "trying %d\n", ioapic, apic_id, i);
+
+               apic_id = i;
+       }
+
+       tmp = apic->apicid_to_cpu_present(apic_id);
+       physids_or(apic_id_map, apic_id_map, tmp);
+
+       if (reg_00.bits.ID != apic_id) {
+               reg_00.bits.ID = apic_id;
+
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(ioapic, 0, reg_00.raw);
+               reg_00.raw = io_apic_read(ioapic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               /* Sanity check */
+               if (reg_00.bits.ID != apic_id) {
+                       printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
+                       return -1;
+               }
+       }
+
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+
+       return apic_id;
+}
+
+int __init io_apic_get_version(int ioapic)
+{
+       union IO_APIC_reg_01    reg_01;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_01.raw = io_apic_read(ioapic, 1);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return reg_01.bits.version;
+}
+#endif
+
+int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
+{
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       int cpu = boot_cpu_id;
+
+       if (!IO_APIC_IRQ(irq)) {
+               apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+                       ioapic);
+               return -EINVAL;
+       }
+
+       desc = irq_to_desc_alloc_cpu(irq, cpu);
+       if (!desc) {
+               printk(KERN_INFO "can not get irq_desc %d\n", irq);
+               return 0;
+       }
+
+       /*
+        * IRQs < 16 are already in the irq_2_pin[] map
+        */
+       if (irq >= NR_IRQS_LEGACY) {
+               cfg = desc->chip_data;
+               add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
+       }
+
+       setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
+
+       return 0;
+}
+
+
+int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
+{
+       int i;
+
+       if (skip_ioapic_setup)
+               return -1;
+
+       for (i = 0; i < mp_irq_entries; i++)
+               if (mp_irqs[i].irqtype == mp_INT &&
+                   mp_irqs[i].srcbusirq == bus_irq)
+                       break;
+       if (i >= mp_irq_entries)
+               return -1;
+
+       *trigger = irq_trigger(i);
+       *polarity = irq_polarity(i);
+       return 0;
+}
+
+#endif /* CONFIG_ACPI */
+
+/*
+ * This function currently is only a helper for the i386 smp boot process where
+ * we need to reprogram the ioredtbls to cater for the cpus which have come online
+ * so mask in all cases should simply be apic->target_cpus()
+ */
+#ifdef CONFIG_SMP
+void __init setup_ioapic_dest(void)
+{
+       int pin, ioapic, irq, irq_entry;
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       const struct cpumask *mask;
+
+       if (skip_ioapic_setup == 1)
+               return;
+
+       for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
+               for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+                       irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+                       if (irq_entry == -1)
+                               continue;
+                       irq = pin_2_irq(irq_entry, ioapic, pin);
+
+                       /* setup_IO_APIC_irqs could fail to get vector for some device
+                        * when you have too many devices, because at that time only boot
+                        * cpu is online.
+                        */
+                       desc = irq_to_desc(irq);
+                       cfg = desc->chip_data;
+                       if (!cfg->vector) {
+                               setup_IO_APIC_irq(ioapic, pin, irq, desc,
+                                                 irq_trigger(irq_entry),
+                                                 irq_polarity(irq_entry));
+                               continue;
+
+                       }
+
+                       /*
+                        * Honour affinities which have been set in early boot
+                        */
+                       if (desc->status &
+                           (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+                               mask = desc->affinity;
+                       else
+                               mask = apic->target_cpus();
+
+#ifdef CONFIG_INTR_REMAP
+                       if (intr_remapping_enabled)
+                               set_ir_ioapic_affinity_irq_desc(desc, mask);
+                       else
+#endif
+                               set_ioapic_affinity_irq_desc(desc, mask);
+               }
+
+       }
+}
+#endif
+
+#define IOAPIC_RESOURCE_NAME_SIZE 11
+
+static struct resource *ioapic_resources;
+
+static struct resource * __init ioapic_setup_resources(void)
+{
+       unsigned long n;
+       struct resource *res;
+       char *mem;
+       int i;
+
+       if (nr_ioapics <= 0)
+               return NULL;
+
+       n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
+       n *= nr_ioapics;
+
+       mem = alloc_bootmem(n);
+       res = (void *)mem;
+
+       if (mem != NULL) {
+               mem += sizeof(struct resource) * nr_ioapics;
+
+               for (i = 0; i < nr_ioapics; i++) {
+                       res[i].name = mem;
+                       res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+                       sprintf(mem,  "IOAPIC %u", i);
+                       mem += IOAPIC_RESOURCE_NAME_SIZE;
+               }
+       }
+
+       ioapic_resources = res;
+
+       return res;
+}
+
+void __init ioapic_init_mappings(void)
+{
+       unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
+       struct resource *ioapic_res;
+       int i;
+
+       ioapic_res = ioapic_setup_resources();
+       for (i = 0; i < nr_ioapics; i++) {
+               if (smp_found_config) {
+                       ioapic_phys = mp_ioapics[i].apicaddr;
+#ifdef CONFIG_X86_32
+                       if (!ioapic_phys) {
+                               printk(KERN_ERR
+                                      "WARNING: bogus zero IO-APIC "
+                                      "address found in MPTABLE, "
+                                      "disabling IO/APIC support!\n");
+                               smp_found_config = 0;
+                               skip_ioapic_setup = 1;
+                               goto fake_ioapic_page;
+                       }
+#endif
+               } else {
+#ifdef CONFIG_X86_32
+fake_ioapic_page:
+#endif
+                       ioapic_phys = (unsigned long)
+                               alloc_bootmem_pages(PAGE_SIZE);
+                       ioapic_phys = __pa(ioapic_phys);
+               }
+               set_fixmap_nocache(idx, ioapic_phys);
+               apic_printk(APIC_VERBOSE,
+                           "mapped IOAPIC to %08lx (%08lx)\n",
+                           __fix_to_virt(idx), ioapic_phys);
+               idx++;
+
+               if (ioapic_res != NULL) {
+                       ioapic_res->start = ioapic_phys;
+                       ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+                       ioapic_res++;
+               }
+       }
+}
+
+static int __init ioapic_insert_resources(void)
+{
+       int i;
+       struct resource *r = ioapic_resources;
+
+       if (!r) {
+               printk(KERN_ERR
+                      "IO APIC resources could be not be allocated.\n");
+               return -1;
+       }
+
+       for (i = 0; i < nr_ioapics; i++) {
+               insert_resource(&iomem_resource, r);
+               r++;
+       }
+
+       return 0;
+}
+
+/* Insert the IO APIC resources after PCI initialization has occured to handle
+ * IO APICS that are mapped in on a BAR in PCI space. */
+late_initcall(ioapic_insert_resources);
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
new file mode 100644 (file)
index 0000000..dbf5445
--- /dev/null
@@ -0,0 +1,164 @@
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/cache.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/apic.h>
+#include <asm/proto.h>
+#include <asm/ipi.h>
+
+void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
+{
+       unsigned long query_cpu;
+       unsigned long flags;
+
+       /*
+        * Hack. The clustered APIC addressing mode doesn't allow us to send
+        * to an arbitrary mask, so I do a unicast to each CPU instead.
+        * - mbligh
+        */
+       local_irq_save(flags);
+       for_each_cpu(query_cpu, mask) {
+               __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
+                               query_cpu), vector, APIC_DEST_PHYSICAL);
+       }
+       local_irq_restore(flags);
+}
+
+void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
+                                                int vector)
+{
+       unsigned int this_cpu = smp_processor_id();
+       unsigned int query_cpu;
+       unsigned long flags;
+
+       /* See Hack comment above */
+
+       local_irq_save(flags);
+       for_each_cpu(query_cpu, mask) {
+               if (query_cpu == this_cpu)
+                       continue;
+               __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
+                                query_cpu), vector, APIC_DEST_PHYSICAL);
+       }
+       local_irq_restore(flags);
+}
+
+void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
+                                                int vector)
+{
+       unsigned long flags;
+       unsigned int query_cpu;
+
+       /*
+        * Hack. The clustered APIC addressing mode doesn't allow us to send
+        * to an arbitrary mask, so I do a unicasts to each CPU instead. This
+        * should be modified to do 1 message per cluster ID - mbligh
+        */
+
+       local_irq_save(flags);
+       for_each_cpu(query_cpu, mask)
+               __default_send_IPI_dest_field(
+                       apic->cpu_to_logical_apicid(query_cpu), vector,
+                       apic->dest_logical);
+       local_irq_restore(flags);
+}
+
+void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
+                                                int vector)
+{
+       unsigned long flags;
+       unsigned int query_cpu;
+       unsigned int this_cpu = smp_processor_id();
+
+       /* See Hack comment above */
+
+       local_irq_save(flags);
+       for_each_cpu(query_cpu, mask) {
+               if (query_cpu == this_cpu)
+                       continue;
+               __default_send_IPI_dest_field(
+                       apic->cpu_to_logical_apicid(query_cpu), vector,
+                       apic->dest_logical);
+               }
+       local_irq_restore(flags);
+}
+
+#ifdef CONFIG_X86_32
+
+/*
+ * This is only used on smaller machines.
+ */
+void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
+{
+       unsigned long mask = cpumask_bits(cpumask)[0];
+       unsigned long flags;
+
+       local_irq_save(flags);
+       WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
+       __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
+       local_irq_restore(flags);
+}
+
+void default_send_IPI_allbutself(int vector)
+{
+       /*
+        * if there are no other CPUs in the system then we get an APIC send
+        * error if we try to broadcast, thus avoid sending IPIs in this case.
+        */
+       if (!(num_online_cpus() > 1))
+               return;
+
+       __default_local_send_IPI_allbutself(vector);
+}
+
+void default_send_IPI_all(int vector)
+{
+       __default_local_send_IPI_all(vector);
+}
+
+void default_send_IPI_self(int vector)
+{
+       __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical);
+}
+
+/* must come after the send_IPI functions above for inlining */
+static int convert_apicid_to_cpu(int apic_id)
+{
+       int i;
+
+       for_each_possible_cpu(i) {
+               if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
+                       return i;
+       }
+       return -1;
+}
+
+int safe_smp_processor_id(void)
+{
+       int apicid, cpuid;
+
+       if (!boot_cpu_has(X86_FEATURE_APIC))
+               return 0;
+
+       apicid = hard_smp_processor_id();
+       if (apicid == BAD_APICID)
+               return 0;
+
+       cpuid = convert_apicid_to_cpu(apicid);
+
+       return cpuid >= 0 ? cpuid : 0;
+}
+#endif
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
new file mode 100644 (file)
index 0000000..bdfad80
--- /dev/null
@@ -0,0 +1,564 @@
+/*
+ *  NMI watchdog support on APIC systems
+ *
+ *  Started by Ingo Molnar <mingo@redhat.com>
+ *
+ *  Fixes:
+ *  Mikael Pettersson  : AMD K7 support for local APIC NMI watchdog.
+ *  Mikael Pettersson  : Power Management for local APIC NMI watchdog.
+ *  Mikael Pettersson  : Pentium 4 support for local APIC NMI watchdog.
+ *  Pavel Machek and
+ *  Mikael Pettersson  : PM converted to driver model. Disable/enable API.
+ */
+
+#include <asm/apic.h>
+
+#include <linux/nmi.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/sysdev.h>
+#include <linux/sysctl.h>
+#include <linux/percpu.h>
+#include <linux/kprobes.h>
+#include <linux/cpumask.h>
+#include <linux/kernel_stat.h>
+#include <linux/kdebug.h>
+#include <linux/smp.h>
+
+#include <asm/i8259.h>
+#include <asm/io_apic.h>
+#include <asm/proto.h>
+#include <asm/timer.h>
+
+#include <asm/mce.h>
+
+#include <asm/mach_traps.h>
+
+int unknown_nmi_panic;
+int nmi_watchdog_enabled;
+
+static cpumask_t backtrace_mask = CPU_MASK_NONE;
+
+/* nmi_active:
+ * >0: the lapic NMI watchdog is active, but can be disabled
+ * <0: the lapic NMI watchdog has not been set up, and cannot
+ *     be enabled
+ *  0: the lapic NMI watchdog is disabled, but can be enabled
+ */
+atomic_t nmi_active = ATOMIC_INIT(0);          /* oprofile uses this */
+EXPORT_SYMBOL(nmi_active);
+
+unsigned int nmi_watchdog = NMI_NONE;
+EXPORT_SYMBOL(nmi_watchdog);
+
+static int panic_on_timeout;
+
+static unsigned int nmi_hz = HZ;
+static DEFINE_PER_CPU(short, wd_enabled);
+static int endflag __initdata;
+
+static inline unsigned int get_nmi_count(int cpu)
+{
+       return per_cpu(irq_stat, cpu).__nmi_count;
+}
+
+static inline int mce_in_progress(void)
+{
+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+       return atomic_read(&mce_entry) > 0;
+#endif
+       return 0;
+}
+
+/*
+ * Take the local apic timer and PIT/HPET into account. We don't
+ * know which one is active, when we have highres/dyntick on
+ */
+static inline unsigned int get_timer_irqs(int cpu)
+{
+       return per_cpu(irq_stat, cpu).apic_timer_irqs +
+               per_cpu(irq_stat, cpu).irq0_irqs;
+}
+
+#ifdef CONFIG_SMP
+/*
+ * The performance counters used by NMI_LOCAL_APIC don't trigger when
+ * the CPU is idle. To make sure the NMI watchdog really ticks on all
+ * CPUs during the test make them busy.
+ */
+static __init void nmi_cpu_busy(void *data)
+{
+       local_irq_enable_in_hardirq();
+       /*
+        * Intentionally don't use cpu_relax here. This is
+        * to make sure that the performance counter really ticks,
+        * even if there is a simulator or similar that catches the
+        * pause instruction. On a real HT machine this is fine because
+        * all other CPUs are busy with "useless" delay loops and don't
+        * care if they get somewhat less cycles.
+        */
+       while (endflag == 0)
+               mb();
+}
+#endif
+
+static void report_broken_nmi(int cpu, int *prev_nmi_count)
+{
+       printk(KERN_CONT "\n");
+
+       printk(KERN_WARNING
+               "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
+                       cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
+
+       printk(KERN_WARNING
+               "Please report this to bugzilla.kernel.org,\n");
+       printk(KERN_WARNING
+               "and attach the output of the 'dmesg' command.\n");
+
+       per_cpu(wd_enabled, cpu) = 0;
+       atomic_dec(&nmi_active);
+}
+
+static void __acpi_nmi_disable(void *__unused)
+{
+       apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
+}
+
+int __init check_nmi_watchdog(void)
+{
+       unsigned int *prev_nmi_count;
+       int cpu;
+
+       if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
+               return 0;
+
+       prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
+       if (!prev_nmi_count)
+               goto error;
+
+       printk(KERN_INFO "Testing NMI watchdog ... ");
+
+#ifdef CONFIG_SMP
+       if (nmi_watchdog == NMI_LOCAL_APIC)
+               smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
+#endif
+
+       for_each_possible_cpu(cpu)
+               prev_nmi_count[cpu] = get_nmi_count(cpu);
+       local_irq_enable();
+       mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
+
+       for_each_online_cpu(cpu) {
+               if (!per_cpu(wd_enabled, cpu))
+                       continue;
+               if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
+                       report_broken_nmi(cpu, prev_nmi_count);
+       }
+       endflag = 1;
+       if (!atomic_read(&nmi_active)) {
+               kfree(prev_nmi_count);
+               atomic_set(&nmi_active, -1);
+               goto error;
+       }
+       printk("OK.\n");
+
+       /*
+        * now that we know it works we can reduce NMI frequency to
+        * something more reasonable; makes a difference in some configs
+        */
+       if (nmi_watchdog == NMI_LOCAL_APIC)
+               nmi_hz = lapic_adjust_nmi_hz(1);
+
+       kfree(prev_nmi_count);
+       return 0;
+error:
+       if (nmi_watchdog == NMI_IO_APIC) {
+               if (!timer_through_8259)
+                       disable_8259A_irq(0);
+               on_each_cpu(__acpi_nmi_disable, NULL, 1);
+       }
+
+#ifdef CONFIG_X86_32
+       timer_ack = 0;
+#endif
+       return -1;
+}
+
+static int __init setup_nmi_watchdog(char *str)
+{
+       unsigned int nmi;
+
+       if (!strncmp(str, "panic", 5)) {
+               panic_on_timeout = 1;
+               str = strchr(str, ',');
+               if (!str)
+                       return 1;
+               ++str;
+       }
+
+       if (!strncmp(str, "lapic", 5))
+               nmi_watchdog = NMI_LOCAL_APIC;
+       else if (!strncmp(str, "ioapic", 6))
+               nmi_watchdog = NMI_IO_APIC;
+       else {
+               get_option(&str, &nmi);
+               if (nmi >= NMI_INVALID)
+                       return 0;
+               nmi_watchdog = nmi;
+       }
+
+       return 1;
+}
+__setup("nmi_watchdog=", setup_nmi_watchdog);
+
+/*
+ * Suspend/resume support
+ */
+#ifdef CONFIG_PM
+
+static int nmi_pm_active; /* nmi_active before suspend */
+
+static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
+{
+       /* only CPU0 goes here, other CPUs should be offline */
+       nmi_pm_active = atomic_read(&nmi_active);
+       stop_apic_nmi_watchdog(NULL);
+       BUG_ON(atomic_read(&nmi_active) != 0);
+       return 0;
+}
+
+static int lapic_nmi_resume(struct sys_device *dev)
+{
+       /* only CPU0 goes here, other CPUs should be offline */
+       if (nmi_pm_active > 0) {
+               setup_apic_nmi_watchdog(NULL);
+               touch_nmi_watchdog();
+       }
+       return 0;
+}
+
+static struct sysdev_class nmi_sysclass = {
+       .name           = "lapic_nmi",
+       .resume         = lapic_nmi_resume,
+       .suspend        = lapic_nmi_suspend,
+};
+
+static struct sys_device device_lapic_nmi = {
+       .id     = 0,
+       .cls    = &nmi_sysclass,
+};
+
+static int __init init_lapic_nmi_sysfs(void)
+{
+       int error;
+
+       /*
+        * should really be a BUG_ON but b/c this is an
+        * init call, it just doesn't work.  -dcz
+        */
+       if (nmi_watchdog != NMI_LOCAL_APIC)
+               return 0;
+
+       if (atomic_read(&nmi_active) < 0)
+               return 0;
+
+       error = sysdev_class_register(&nmi_sysclass);
+       if (!error)
+               error = sysdev_register(&device_lapic_nmi);
+       return error;
+}
+
+/* must come after the local APIC's device_initcall() */
+late_initcall(init_lapic_nmi_sysfs);
+
+#endif /* CONFIG_PM */
+
+static void __acpi_nmi_enable(void *__unused)
+{
+       apic_write(APIC_LVT0, APIC_DM_NMI);
+}
+
+/*
+ * Enable timer based NMIs on all CPUs:
+ */
+void acpi_nmi_enable(void)
+{
+       if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+               on_each_cpu(__acpi_nmi_enable, NULL, 1);
+}
+
+/*
+ * Disable timer based NMIs on all CPUs:
+ */
+void acpi_nmi_disable(void)
+{
+       if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+               on_each_cpu(__acpi_nmi_disable, NULL, 1);
+}
+
+/*
+ * This function is called as soon the LAPIC NMI watchdog driver has everything
+ * in place and it's ready to check if the NMIs belong to the NMI watchdog
+ */
+void cpu_nmi_set_wd_enabled(void)
+{
+       __get_cpu_var(wd_enabled) = 1;
+}
+
+void setup_apic_nmi_watchdog(void *unused)
+{
+       if (__get_cpu_var(wd_enabled))
+               return;
+
+       /* cheap hack to support suspend/resume */
+       /* if cpu0 is not active neither should the other cpus */
+       if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
+               return;
+
+       switch (nmi_watchdog) {
+       case NMI_LOCAL_APIC:
+               if (lapic_watchdog_init(nmi_hz) < 0) {
+                       __get_cpu_var(wd_enabled) = 0;
+                       return;
+               }
+               /* FALL THROUGH */
+       case NMI_IO_APIC:
+               __get_cpu_var(wd_enabled) = 1;
+               atomic_inc(&nmi_active);
+       }
+}
+
+void stop_apic_nmi_watchdog(void *unused)
+{
+       /* only support LOCAL and IO APICs for now */
+       if (!nmi_watchdog_active())
+               return;
+       if (__get_cpu_var(wd_enabled) == 0)
+               return;
+       if (nmi_watchdog == NMI_LOCAL_APIC)
+               lapic_watchdog_stop();
+       else
+               __acpi_nmi_disable(NULL);
+       __get_cpu_var(wd_enabled) = 0;
+       atomic_dec(&nmi_active);
+}
+
+/*
+ * the best way to detect whether a CPU has a 'hard lockup' problem
+ * is to check it's local APIC timer IRQ counts. If they are not
+ * changing then that CPU has some problem.
+ *
+ * as these watchdog NMI IRQs are generated on every CPU, we only
+ * have to check the current processor.
+ *
+ * since NMIs don't listen to _any_ locks, we have to be extremely
+ * careful not to rely on unsafe variables. The printk might lock
+ * up though, so we have to break up any console locks first ...
+ * [when there will be more tty-related locks, break them up here too!]
+ */
+
+static DEFINE_PER_CPU(unsigned, last_irq_sum);
+static DEFINE_PER_CPU(local_t, alert_counter);
+static DEFINE_PER_CPU(int, nmi_touch);
+
+void touch_nmi_watchdog(void)
+{
+       if (nmi_watchdog_active()) {
+               unsigned cpu;
+
+               /*
+                * Tell other CPUs to reset their alert counters. We cannot
+                * do it ourselves because the alert count increase is not
+                * atomic.
+                */
+               for_each_present_cpu(cpu) {
+                       if (per_cpu(nmi_touch, cpu) != 1)
+                               per_cpu(nmi_touch, cpu) = 1;
+               }
+       }
+
+       /*
+        * Tickle the softlockup detector too:
+        */
+       touch_softlockup_watchdog();
+}
+EXPORT_SYMBOL(touch_nmi_watchdog);
+
+notrace __kprobes int
+nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
+{
+       /*
+        * Since current_thread_info()-> is always on the stack, and we
+        * always switch the stack NMI-atomically, it's safe to use
+        * smp_processor_id().
+        */
+       unsigned int sum;
+       int touched = 0;
+       int cpu = smp_processor_id();
+       int rc = 0;
+
+       /* check for other users first */
+       if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
+                       == NOTIFY_STOP) {
+               rc = 1;
+               touched = 1;
+       }
+
+       sum = get_timer_irqs(cpu);
+
+       if (__get_cpu_var(nmi_touch)) {
+               __get_cpu_var(nmi_touch) = 0;
+               touched = 1;
+       }
+
+       if (cpu_isset(cpu, backtrace_mask)) {
+               static DEFINE_SPINLOCK(lock);   /* Serialise the printks */
+
+               spin_lock(&lock);
+               printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
+               dump_stack();
+               spin_unlock(&lock);
+               cpu_clear(cpu, backtrace_mask);
+       }
+
+       /* Could check oops_in_progress here too, but it's safer not to */
+       if (mce_in_progress())
+               touched = 1;
+
+       /* if the none of the timers isn't firing, this cpu isn't doing much */
+       if (!touched && __get_cpu_var(last_irq_sum) == sum) {
+               /*
+                * Ayiee, looks like this CPU is stuck ...
+                * wait a few IRQs (5 seconds) before doing the oops ...
+                */
+               local_inc(&__get_cpu_var(alert_counter));
+               if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz)
+                       /*
+                        * die_nmi will return ONLY if NOTIFY_STOP happens..
+                        */
+                       die_nmi("BUG: NMI Watchdog detected LOCKUP",
+                               regs, panic_on_timeout);
+       } else {
+               __get_cpu_var(last_irq_sum) = sum;
+               local_set(&__get_cpu_var(alert_counter), 0);
+       }
+
+       /* see if the nmi watchdog went off */
+       if (!__get_cpu_var(wd_enabled))
+               return rc;
+       switch (nmi_watchdog) {
+       case NMI_LOCAL_APIC:
+               rc |= lapic_wd_event(nmi_hz);
+               break;
+       case NMI_IO_APIC:
+               /*
+                * don't know how to accurately check for this.
+                * just assume it was a watchdog timer interrupt
+                * This matches the old behaviour.
+                */
+               rc = 1;
+               break;
+       }
+       return rc;
+}
+
+#ifdef CONFIG_SYSCTL
+
+static void enable_ioapic_nmi_watchdog_single(void *unused)
+{
+       __get_cpu_var(wd_enabled) = 1;
+       atomic_inc(&nmi_active);
+       __acpi_nmi_enable(NULL);
+}
+
+static void enable_ioapic_nmi_watchdog(void)
+{
+       on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
+       touch_nmi_watchdog();
+}
+
+static void disable_ioapic_nmi_watchdog(void)
+{
+       on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
+}
+
+static int __init setup_unknown_nmi_panic(char *str)
+{
+       unknown_nmi_panic = 1;
+       return 1;
+}
+__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
+
+static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
+{
+       unsigned char reason = get_nmi_reason();
+       char buf[64];
+
+       sprintf(buf, "NMI received for unknown reason %02x\n", reason);
+       die_nmi(buf, regs, 1); /* Always panic here */
+       return 0;
+}
+
+/*
+ * proc handler for /proc/sys/kernel/nmi
+ */
+int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
+                       void __user *buffer, size_t *length, loff_t *ppos)
+{
+       int old_state;
+
+       nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
+       old_state = nmi_watchdog_enabled;
+       proc_dointvec(table, write, file, buffer, length, ppos);
+       if (!!old_state == !!nmi_watchdog_enabled)
+               return 0;
+
+       if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
+               printk(KERN_WARNING
+                       "NMI watchdog is permanently disabled\n");
+               return -EIO;
+       }
+
+       if (nmi_watchdog == NMI_LOCAL_APIC) {
+               if (nmi_watchdog_enabled)
+                       enable_lapic_nmi_watchdog();
+               else
+                       disable_lapic_nmi_watchdog();
+       } else if (nmi_watchdog == NMI_IO_APIC) {
+               if (nmi_watchdog_enabled)
+                       enable_ioapic_nmi_watchdog();
+               else
+                       disable_ioapic_nmi_watchdog();
+       } else {
+               printk(KERN_WARNING
+                       "NMI watchdog doesn't know what hardware to touch\n");
+               return -EIO;
+       }
+       return 0;
+}
+
+#endif /* CONFIG_SYSCTL */
+
+int do_nmi_callback(struct pt_regs *regs, int cpu)
+{
+#ifdef CONFIG_SYSCTL
+       if (unknown_nmi_panic)
+               return unknown_nmi_panic_callback(regs, cpu);
+#endif
+       return 0;
+}
+
+void __trigger_all_cpu_backtrace(void)
+{
+       int i;
+
+       backtrace_mask = cpu_online_map;
+       /* Wait for up to 10 seconds for all CPUs to do the backtrace */
+       for (i = 0; i < 10 * 1000; i++) {
+               if (cpus_empty(backtrace_mask))
+                       break;
+               mdelay(1);
+       }
+}
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
new file mode 100644 (file)
index 0000000..4e39d9a
--- /dev/null
@@ -0,0 +1,243 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+
+DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+
+static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+       if (cpu_has_x2apic)
+               return 1;
+
+       return 0;
+}
+
+/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+
+static const struct cpumask *x2apic_target_cpus(void)
+{
+       return cpumask_of(0);
+}
+
+/*
+ * for now each logical cpu is in its own vector allocation domain.
+ */
+static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+       cpumask_clear(retmask);
+       cpumask_set_cpu(cpu, retmask);
+}
+
+static void
+ __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
+{
+       unsigned long cfg;
+
+       cfg = __prepare_ICR(0, vector, dest);
+
+       /*
+        * send the IPI.
+        */
+       native_x2apic_icr_write(cfg, apicid);
+}
+
+/*
+ * for now, we send the IPI's one by one in the cpumask.
+ * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
+ * at once. We have 16 cpu's in a cluster. This will minimize IPI register
+ * writes.
+ */
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+       unsigned long query_cpu;
+       unsigned long flags;
+
+       local_irq_save(flags);
+       for_each_cpu(query_cpu, mask) {
+               __x2apic_send_IPI_dest(
+                       per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+                       vector, apic->dest_logical);
+       }
+       local_irq_restore(flags);
+}
+
+static void
+ x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+       unsigned long this_cpu = smp_processor_id();
+       unsigned long query_cpu;
+       unsigned long flags;
+
+       local_irq_save(flags);
+       for_each_cpu(query_cpu, mask) {
+               if (query_cpu == this_cpu)
+                       continue;
+               __x2apic_send_IPI_dest(
+                               per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+                               vector, apic->dest_logical);
+       }
+       local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+       unsigned long this_cpu = smp_processor_id();
+       unsigned long query_cpu;
+       unsigned long flags;
+
+       local_irq_save(flags);
+       for_each_online_cpu(query_cpu) {
+               if (query_cpu == this_cpu)
+                       continue;
+               __x2apic_send_IPI_dest(
+                               per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+                               vector, apic->dest_logical);
+       }
+       local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+       x2apic_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static int x2apic_apic_id_registered(void)
+{
+       return 1;
+}
+
+static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+       /*
+        * We're using fixed IRQ delivery, can only return one logical APIC ID.
+        * May as well be the first.
+        */
+       int cpu = cpumask_first(cpumask);
+
+       if ((unsigned)cpu < nr_cpu_ids)
+               return per_cpu(x86_cpu_to_logical_apicid, cpu);
+       else
+               return BAD_APICID;
+}
+
+static unsigned int
+x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+                             const struct cpumask *andmask)
+{
+       int cpu;
+
+       /*
+        * We're using fixed IRQ delivery, can only return one logical APIC ID.
+        * May as well be the first.
+        */
+       for_each_cpu_and(cpu, cpumask, andmask) {
+               if (cpumask_test_cpu(cpu, cpu_online_mask))
+                       break;
+       }
+
+       if (cpu < nr_cpu_ids)
+               return per_cpu(x86_cpu_to_logical_apicid, cpu);
+
+       return BAD_APICID;
+}
+
+static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x)
+{
+       unsigned int id;
+
+       id = x;
+       return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+       unsigned long x;
+
+       x = id;
+       return x;
+}
+
+static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb)
+{
+       return current_cpu_data.initial_apicid >> index_msb;
+}
+
+static void x2apic_send_IPI_self(int vector)
+{
+       apic_write(APIC_SELF_IPI, vector);
+}
+
+static void init_x2apic_ldr(void)
+{
+       int cpu = smp_processor_id();
+
+       per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
+}
+
+struct apic apic_x2apic_cluster = {
+
+       .name                           = "cluster x2apic",
+       .probe                          = NULL,
+       .acpi_madt_oem_check            = x2apic_acpi_madt_oem_check,
+       .apic_id_registered             = x2apic_apic_id_registered,
+
+       .irq_delivery_mode              = dest_LowestPrio,
+       .irq_dest_mode                  = 1, /* logical */
+
+       .target_cpus                    = x2apic_target_cpus,
+       .disable_esr                    = 0,
+       .dest_logical                   = APIC_DEST_LOGICAL,
+       .check_apicid_used              = NULL,
+       .check_apicid_present           = NULL,
+
+       .vector_allocation_domain       = x2apic_vector_allocation_domain,
+       .init_apic_ldr                  = init_x2apic_ldr,
+
+       .ioapic_phys_id_map             = NULL,
+       .setup_apic_routing             = NULL,
+       .multi_timer_check              = NULL,
+       .apicid_to_node                 = NULL,
+       .cpu_to_logical_apicid          = NULL,
+       .cpu_present_to_apicid          = default_cpu_present_to_apicid,
+       .apicid_to_cpu_present          = NULL,
+       .setup_portio_remap             = NULL,
+       .check_phys_apicid_present      = default_check_phys_apicid_present,
+       .enable_apic_mode               = NULL,
+       .phys_pkg_id                    = x2apic_cluster_phys_pkg_id,
+       .mps_oem_check                  = NULL,
+
+       .get_apic_id                    = x2apic_cluster_phys_get_apic_id,
+       .set_apic_id                    = set_apic_id,
+       .apic_id_mask                   = 0xFFFFFFFFu,
+
+       .cpu_mask_to_apicid             = x2apic_cpu_mask_to_apicid,
+       .cpu_mask_to_apicid_and         = x2apic_cpu_mask_to_apicid_and,
+
+       .send_IPI_mask                  = x2apic_send_IPI_mask,
+       .send_IPI_mask_allbutself       = x2apic_send_IPI_mask_allbutself,
+       .send_IPI_allbutself            = x2apic_send_IPI_allbutself,
+       .send_IPI_all                   = x2apic_send_IPI_all,
+       .send_IPI_self                  = x2apic_send_IPI_self,
+
+       .wakeup_cpu                     = NULL,
+       .trampoline_phys_low            = DEFAULT_TRAMPOLINE_PHYS_LOW,
+       .trampoline_phys_high           = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+       .wait_for_init_deassert         = NULL,
+       .smp_callin_clear_local_apic    = NULL,
+       .inquire_remote_apic            = NULL,
+
+       .read                           = native_apic_msr_read,
+       .write                          = native_apic_msr_write,
+       .icr_read                       = native_x2apic_icr_read,
+       .icr_write                      = native_x2apic_icr_write,
+       .wait_icr_idle                  = native_x2apic_wait_icr_idle,
+       .safe_wait_icr_idle             = native_safe_x2apic_wait_icr_idle,
+};
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
new file mode 100644 (file)
index 0000000..d2d52eb
--- /dev/null
@@ -0,0 +1,229 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+
+static int x2apic_phys;
+
+static int set_x2apic_phys_mode(char *arg)
+{
+       x2apic_phys = 1;
+       return 0;
+}
+early_param("x2apic_phys", set_x2apic_phys_mode);
+
+static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+       if (cpu_has_x2apic && x2apic_phys)
+               return 1;
+
+       return 0;
+}
+
+/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+
+static const struct cpumask *x2apic_target_cpus(void)
+{
+       return cpumask_of(0);
+}
+
+static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+       cpumask_clear(retmask);
+       cpumask_set_cpu(cpu, retmask);
+}
+
+static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+                                  unsigned int dest)
+{
+       unsigned long cfg;
+
+       cfg = __prepare_ICR(0, vector, dest);
+
+       /*
+        * send the IPI.
+        */
+       native_x2apic_icr_write(cfg, apicid);
+}
+
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+       unsigned long query_cpu;
+       unsigned long flags;
+
+       local_irq_save(flags);
+       for_each_cpu(query_cpu, mask) {
+               __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
+                                      vector, APIC_DEST_PHYSICAL);
+       }
+       local_irq_restore(flags);
+}
+
+static void
+ x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+       unsigned long this_cpu = smp_processor_id();
+       unsigned long query_cpu;
+       unsigned long flags;
+
+       local_irq_save(flags);
+       for_each_cpu(query_cpu, mask) {
+               if (query_cpu != this_cpu)
+                       __x2apic_send_IPI_dest(
+                               per_cpu(x86_cpu_to_apicid, query_cpu),
+                               vector, APIC_DEST_PHYSICAL);
+       }
+       local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+       unsigned long this_cpu = smp_processor_id();
+       unsigned long query_cpu;
+       unsigned long flags;
+
+       local_irq_save(flags);
+       for_each_online_cpu(query_cpu) {
+               if (query_cpu == this_cpu)
+                       continue;
+               __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
+                                      vector, APIC_DEST_PHYSICAL);
+       }
+       local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+       x2apic_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static int x2apic_apic_id_registered(void)
+{
+       return 1;
+}
+
+static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+       /*
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+       int cpu = cpumask_first(cpumask);
+
+       if ((unsigned)cpu < nr_cpu_ids)
+               return per_cpu(x86_cpu_to_apicid, cpu);
+       else
+               return BAD_APICID;
+}
+
+static unsigned int
+x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+                             const struct cpumask *andmask)
+{
+       int cpu;
+
+       /*
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+       for_each_cpu_and(cpu, cpumask, andmask) {
+               if (cpumask_test_cpu(cpu, cpu_online_mask))
+                       break;
+       }
+
+       if (cpu < nr_cpu_ids)
+               return per_cpu(x86_cpu_to_apicid, cpu);
+
+       return BAD_APICID;
+}
+
+static unsigned int x2apic_phys_get_apic_id(unsigned long x)
+{
+       return x;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+       return id;
+}
+
+static int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
+{
+       return current_cpu_data.initial_apicid >> index_msb;
+}
+
+static void x2apic_send_IPI_self(int vector)
+{
+       apic_write(APIC_SELF_IPI, vector);
+}
+
+static void init_x2apic_ldr(void)
+{
+}
+
+struct apic apic_x2apic_phys = {
+
+       .name                           = "physical x2apic",
+       .probe                          = NULL,
+       .acpi_madt_oem_check            = x2apic_acpi_madt_oem_check,
+       .apic_id_registered             = x2apic_apic_id_registered,
+
+       .irq_delivery_mode              = dest_Fixed,
+       .irq_dest_mode                  = 0, /* physical */
+
+       .target_cpus                    = x2apic_target_cpus,
+       .disable_esr                    = 0,
+       .dest_logical                   = 0,
+       .check_apicid_used              = NULL,
+       .check_apicid_present           = NULL,
+
+       .vector_allocation_domain       = x2apic_vector_allocation_domain,
+       .init_apic_ldr                  = init_x2apic_ldr,
+
+       .ioapic_phys_id_map             = NULL,
+       .setup_apic_routing             = NULL,
+       .multi_timer_check              = NULL,
+       .apicid_to_node                 = NULL,
+       .cpu_to_logical_apicid          = NULL,
+       .cpu_present_to_apicid          = default_cpu_present_to_apicid,
+       .apicid_to_cpu_present          = NULL,
+       .setup_portio_remap             = NULL,
+       .check_phys_apicid_present      = default_check_phys_apicid_present,
+       .enable_apic_mode               = NULL,
+       .phys_pkg_id                    = x2apic_phys_pkg_id,
+       .mps_oem_check                  = NULL,
+
+       .get_apic_id                    = x2apic_phys_get_apic_id,
+       .set_apic_id                    = set_apic_id,
+       .apic_id_mask                   = 0xFFFFFFFFu,
+
+       .cpu_mask_to_apicid             = x2apic_cpu_mask_to_apicid,
+       .cpu_mask_to_apicid_and         = x2apic_cpu_mask_to_apicid_and,
+
+       .send_IPI_mask                  = x2apic_send_IPI_mask,
+       .send_IPI_mask_allbutself       = x2apic_send_IPI_mask_allbutself,
+       .send_IPI_allbutself            = x2apic_send_IPI_allbutself,
+       .send_IPI_all                   = x2apic_send_IPI_all,
+       .send_IPI_self                  = x2apic_send_IPI_self,
+
+       .wakeup_cpu                     = NULL,
+       .trampoline_phys_low            = DEFAULT_TRAMPOLINE_PHYS_LOW,
+       .trampoline_phys_high           = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+       .wait_for_init_deassert         = NULL,
+       .smp_callin_clear_local_apic    = NULL,
+       .inquire_remote_apic            = NULL,
+
+       .read                           = native_apic_msr_read,
+       .write                          = native_apic_msr_write,
+       .icr_read                       = native_x2apic_icr_read,
+       .icr_write                      = native_x2apic_icr_write,
+       .wait_icr_idle                  = native_x2apic_wait_icr_idle,
+       .safe_wait_icr_idle             = native_safe_x2apic_wait_icr_idle,
+};
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
new file mode 100644 (file)
index 0000000..20b4ad0
--- /dev/null
@@ -0,0 +1,643 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV APIC functions (note: not an Intel compatible APIC)
+ *
+ * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/timer.h>
+#include <linux/proc_fs.h>
+#include <asm/current.h>
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/pgtable.h>
+#include <asm/uv/uv.h>
+#include <asm/uv/uv_mmrs.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/bios.h>
+
+DEFINE_PER_CPU(int, x2apic_extra_bits);
+
+static enum uv_system_type uv_system_type;
+
+static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+       if (!strcmp(oem_id, "SGI")) {
+               if (!strcmp(oem_table_id, "UVL"))
+                       uv_system_type = UV_LEGACY_APIC;
+               else if (!strcmp(oem_table_id, "UVX"))
+                       uv_system_type = UV_X2APIC;
+               else if (!strcmp(oem_table_id, "UVH")) {
+                       uv_system_type = UV_NON_UNIQUE_APIC;
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+enum uv_system_type get_uv_system_type(void)
+{
+       return uv_system_type;
+}
+
+int is_uv_system(void)
+{
+       return uv_system_type != UV_NONE;
+}
+EXPORT_SYMBOL_GPL(is_uv_system);
+
+DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
+EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
+
+struct uv_blade_info *uv_blade_info;
+EXPORT_SYMBOL_GPL(uv_blade_info);
+
+short *uv_node_to_blade;
+EXPORT_SYMBOL_GPL(uv_node_to_blade);
+
+short *uv_cpu_to_blade;
+EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
+
+short uv_possible_blades;
+EXPORT_SYMBOL_GPL(uv_possible_blades);
+
+unsigned long sn_rtc_cycles_per_second;
+EXPORT_SYMBOL(sn_rtc_cycles_per_second);
+
+/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+
+static const struct cpumask *uv_target_cpus(void)
+{
+       return cpumask_of(0);
+}
+
+static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+       cpumask_clear(retmask);
+       cpumask_set_cpu(cpu, retmask);
+}
+
+int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
+{
+       unsigned long val;
+       int pnode;
+
+       pnode = uv_apicid_to_pnode(phys_apicid);
+       val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+           (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+           (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
+           APIC_DM_INIT;
+       uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+       mdelay(10);
+
+       val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+           (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+           (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
+           APIC_DM_STARTUP;
+       uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+       return 0;
+}
+
+static void uv_send_IPI_one(int cpu, int vector)
+{
+       unsigned long val, apicid;
+       int pnode;
+
+       apicid = per_cpu(x86_cpu_to_apicid, cpu);
+       pnode = uv_apicid_to_pnode(apicid);
+
+       val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+             (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+             (vector << UVH_IPI_INT_VECTOR_SHFT);
+
+       uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+}
+
+static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+       unsigned int cpu;
+
+       for_each_cpu(cpu, mask)
+               uv_send_IPI_one(cpu, vector);
+}
+
+static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+       unsigned int this_cpu = smp_processor_id();
+       unsigned int cpu;
+
+       for_each_cpu(cpu, mask) {
+               if (cpu != this_cpu)
+                       uv_send_IPI_one(cpu, vector);
+       }
+}
+
+static void uv_send_IPI_allbutself(int vector)
+{
+       unsigned int this_cpu = smp_processor_id();
+       unsigned int cpu;
+
+       for_each_online_cpu(cpu) {
+               if (cpu != this_cpu)
+                       uv_send_IPI_one(cpu, vector);
+       }
+}
+
+static void uv_send_IPI_all(int vector)
+{
+       uv_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static int uv_apic_id_registered(void)
+{
+       return 1;
+}
+
+static void uv_init_apic_ldr(void)
+{
+}
+
+static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+       /*
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+       int cpu = cpumask_first(cpumask);
+
+       if ((unsigned)cpu < nr_cpu_ids)
+               return per_cpu(x86_cpu_to_apicid, cpu);
+       else
+               return BAD_APICID;
+}
+
+static unsigned int
+uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+                         const struct cpumask *andmask)
+{
+       int cpu;
+
+       /*
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+       for_each_cpu_and(cpu, cpumask, andmask) {
+               if (cpumask_test_cpu(cpu, cpu_online_mask))
+                       break;
+       }
+       if (cpu < nr_cpu_ids)
+               return per_cpu(x86_cpu_to_apicid, cpu);
+
+       return BAD_APICID;
+}
+
+static unsigned int x2apic_get_apic_id(unsigned long x)
+{
+       unsigned int id;
+
+       WARN_ON(preemptible() && num_online_cpus() > 1);
+       id = x | __get_cpu_var(x2apic_extra_bits);
+
+       return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+       unsigned long x;
+
+       /* maskout x2apic_extra_bits ? */
+       x = id;
+       return x;
+}
+
+static unsigned int uv_read_apic_id(void)
+{
+
+       return x2apic_get_apic_id(apic_read(APIC_ID));
+}
+
+static int uv_phys_pkg_id(int initial_apicid, int index_msb)
+{
+       return uv_read_apic_id() >> index_msb;
+}
+
+static void uv_send_IPI_self(int vector)
+{
+       apic_write(APIC_SELF_IPI, vector);
+}
+
+struct apic apic_x2apic_uv_x = {
+
+       .name                           = "UV large system",
+       .probe                          = NULL,
+       .acpi_madt_oem_check            = uv_acpi_madt_oem_check,
+       .apic_id_registered             = uv_apic_id_registered,
+
+       .irq_delivery_mode              = dest_Fixed,
+       .irq_dest_mode                  = 1, /* logical */
+
+       .target_cpus                    = uv_target_cpus,
+       .disable_esr                    = 0,
+       .dest_logical                   = APIC_DEST_LOGICAL,
+       .check_apicid_used              = NULL,
+       .check_apicid_present           = NULL,
+
+       .vector_allocation_domain       = uv_vector_allocation_domain,
+       .init_apic_ldr                  = uv_init_apic_ldr,
+
+       .ioapic_phys_id_map             = NULL,
+       .setup_apic_routing             = NULL,
+       .multi_timer_check              = NULL,
+       .apicid_to_node                 = NULL,
+       .cpu_to_logical_apicid          = NULL,
+       .cpu_present_to_apicid          = default_cpu_present_to_apicid,
+       .apicid_to_cpu_present          = NULL,
+       .setup_portio_remap             = NULL,
+       .check_phys_apicid_present      = default_check_phys_apicid_present,
+       .enable_apic_mode               = NULL,
+       .phys_pkg_id                    = uv_phys_pkg_id,
+       .mps_oem_check                  = NULL,
+
+       .get_apic_id                    = x2apic_get_apic_id,
+       .set_apic_id                    = set_apic_id,
+       .apic_id_mask                   = 0xFFFFFFFFu,
+
+       .cpu_mask_to_apicid             = uv_cpu_mask_to_apicid,
+       .cpu_mask_to_apicid_and         = uv_cpu_mask_to_apicid_and,
+
+       .send_IPI_mask                  = uv_send_IPI_mask,
+       .send_IPI_mask_allbutself       = uv_send_IPI_mask_allbutself,
+       .send_IPI_allbutself            = uv_send_IPI_allbutself,
+       .send_IPI_all                   = uv_send_IPI_all,
+       .send_IPI_self                  = uv_send_IPI_self,
+
+       .wakeup_cpu                     = NULL,
+       .trampoline_phys_low            = DEFAULT_TRAMPOLINE_PHYS_LOW,
+       .trampoline_phys_high           = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+       .wait_for_init_deassert         = NULL,
+       .smp_callin_clear_local_apic    = NULL,
+       .inquire_remote_apic            = NULL,
+
+       .read                           = native_apic_msr_read,
+       .write                          = native_apic_msr_write,
+       .icr_read                       = native_x2apic_icr_read,
+       .icr_write                      = native_x2apic_icr_write,
+       .wait_icr_idle                  = native_x2apic_wait_icr_idle,
+       .safe_wait_icr_idle             = native_safe_x2apic_wait_icr_idle,
+};
+
+static __cpuinit void set_x2apic_extra_bits(int pnode)
+{
+       __get_cpu_var(x2apic_extra_bits) = (pnode << 6);
+}
+
+/*
+ * Called on boot cpu.
+ */
+static __init int boot_pnode_to_blade(int pnode)
+{
+       int blade;
+
+       for (blade = 0; blade < uv_num_possible_blades(); blade++)
+               if (pnode == uv_blade_info[blade].pnode)
+                       return blade;
+       BUG();
+}
+
+struct redir_addr {
+       unsigned long redirect;
+       unsigned long alias;
+};
+
+#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
+
+static __initdata struct redir_addr redir_addrs[] = {
+       {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG},
+       {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG},
+       {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG},
+};
+
+static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
+{
+       union uvh_si_alias0_overlay_config_u alias;
+       union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
+               alias.v = uv_read_local_mmr(redir_addrs[i].alias);
+               if (alias.s.base == 0) {
+                       *size = (1UL << alias.s.m_alias);
+                       redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
+                       *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
+                       return;
+               }
+       }
+       BUG();
+}
+
+static __init void map_low_mmrs(void)
+{
+       init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
+       init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
+}
+
+enum map_type {map_wb, map_uc};
+
+static __init void map_high(char *id, unsigned long base, int shift,
+                           int max_pnode, enum map_type map_type)
+{
+       unsigned long bytes, paddr;
+
+       paddr = base << shift;
+       bytes = (1UL << shift) * (max_pnode + 1);
+       printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
+                                               paddr + bytes);
+       if (map_type == map_uc)
+               init_extra_mapping_uc(paddr, bytes);
+       else
+               init_extra_mapping_wb(paddr, bytes);
+
+}
+static __init void map_gru_high(int max_pnode)
+{
+       union uvh_rh_gam_gru_overlay_config_mmr_u gru;
+       int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+       gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
+       if (gru.s.enable)
+               map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
+}
+
+static __init void map_config_high(int max_pnode)
+{
+       union uvh_rh_gam_cfg_overlay_config_mmr_u cfg;
+       int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+       cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
+       if (cfg.s.enable)
+               map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc);
+}
+
+static __init void map_mmr_high(int max_pnode)
+{
+       union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
+       int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+       mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
+       if (mmr.s.enable)
+               map_high("MMR", mmr.s.base, shift, max_pnode, map_uc);
+}
+
+static __init void map_mmioh_high(int max_pnode)
+{
+       union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
+       int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+       mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
+       if (mmioh.s.enable)
+               map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc);
+}
+
+static __init void uv_rtc_init(void)
+{
+       long status;
+       u64 ticks_per_sec;
+
+       status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
+                                       &ticks_per_sec);
+       if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
+               printk(KERN_WARNING
+                       "unable to determine platform RTC clock frequency, "
+                       "guessing.\n");
+               /* BIOS gives wrong value for clock freq. so guess */
+               sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
+       } else
+               sn_rtc_cycles_per_second = ticks_per_sec;
+}
+
+/*
+ * percpu heartbeat timer
+ */
+static void uv_heartbeat(unsigned long ignored)
+{
+       struct timer_list *timer = &uv_hub_info->scir.timer;
+       unsigned char bits = uv_hub_info->scir.state;
+
+       /* flip heartbeat bit */
+       bits ^= SCIR_CPU_HEARTBEAT;
+
+       /* is this cpu idle? */
+       if (idle_cpu(raw_smp_processor_id()))
+               bits &= ~SCIR_CPU_ACTIVITY;
+       else
+               bits |= SCIR_CPU_ACTIVITY;
+
+       /* update system controller interface reg */
+       uv_set_scir_bits(bits);
+
+       /* enable next timer period */
+       mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
+}
+
+static void __cpuinit uv_heartbeat_enable(int cpu)
+{
+       if (!uv_cpu_hub_info(cpu)->scir.enabled) {
+               struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
+
+               uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
+               setup_timer(timer, uv_heartbeat, cpu);
+               timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
+               add_timer_on(timer, cpu);
+               uv_cpu_hub_info(cpu)->scir.enabled = 1;
+       }
+
+       /* check boot cpu */
+       if (!uv_cpu_hub_info(0)->scir.enabled)
+               uv_heartbeat_enable(0);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void __cpuinit uv_heartbeat_disable(int cpu)
+{
+       if (uv_cpu_hub_info(cpu)->scir.enabled) {
+               uv_cpu_hub_info(cpu)->scir.enabled = 0;
+               del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
+       }
+       uv_set_cpu_scir_bits(cpu, 0xff);
+}
+
+/*
+ * cpu hotplug notifier
+ */
+static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self,
+                                      unsigned long action, void *hcpu)
+{
+       long cpu = (long)hcpu;
+
+       switch (action) {
+       case CPU_ONLINE:
+               uv_heartbeat_enable(cpu);
+               break;
+       case CPU_DOWN_PREPARE:
+               uv_heartbeat_disable(cpu);
+               break;
+       default:
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+       hotcpu_notifier(uv_scir_cpu_notify, 0);
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+}
+
+static __init int uv_init_heartbeat(void)
+{
+       int cpu;
+
+       if (is_uv_system())
+               for_each_online_cpu(cpu)
+                       uv_heartbeat_enable(cpu);
+       return 0;
+}
+
+late_initcall(uv_init_heartbeat);
+
+#endif /* !CONFIG_HOTPLUG_CPU */
+
+/*
+ * Called on each cpu to initialize the per_cpu UV data area.
+ *     ZZZ hotplug not supported yet
+ */
+void __cpuinit uv_cpu_init(void)
+{
+       /* CPU 0 initilization will be done via uv_system_init. */
+       if (!uv_blade_info)
+               return;
+
+       uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+
+       if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
+               set_x2apic_extra_bits(uv_hub_info->pnode);
+}
+
+
+void __init uv_system_init(void)
+{
+       union uvh_si_addr_map_config_u m_n_config;
+       union uvh_node_id_u node_id;
+       unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
+       int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
+       int max_pnode = 0;
+       unsigned long mmr_base, present;
+
+       map_low_mmrs();
+
+       m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
+       m_val = m_n_config.s.m_skt;
+       n_val = m_n_config.s.n_skt;
+       mmr_base =
+           uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
+           ~UV_MMR_ENABLE;
+       printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
+
+       for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
+               uv_possible_blades +=
+                 hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
+       printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
+
+       bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
+       uv_blade_info = kmalloc(bytes, GFP_KERNEL);
+
+       get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
+
+       bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
+       uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
+       memset(uv_node_to_blade, 255, bytes);
+
+       bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
+       uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
+       memset(uv_cpu_to_blade, 255, bytes);
+
+       blade = 0;
+       for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
+               present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
+               for (j = 0; j < 64; j++) {
+                       if (!test_bit(j, &present))
+                               continue;
+                       uv_blade_info[blade].pnode = (i * 64 + j);
+                       uv_blade_info[blade].nr_possible_cpus = 0;
+                       uv_blade_info[blade].nr_online_cpus = 0;
+                       blade++;
+               }
+       }
+
+       node_id.v = uv_read_local_mmr(UVH_NODE_ID);
+       gnode_upper = (((unsigned long)node_id.s.node_id) &
+                      ~((1 << n_val) - 1)) << m_val;
+
+       uv_bios_init();
+       uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
+                           &sn_coherency_id, &sn_region_size);
+       uv_rtc_init();
+
+       for_each_present_cpu(cpu) {
+               nid = cpu_to_node(cpu);
+               pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu));
+               blade = boot_pnode_to_blade(pnode);
+               lcpu = uv_blade_info[blade].nr_possible_cpus;
+               uv_blade_info[blade].nr_possible_cpus++;
+
+               uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
+               uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
+               uv_cpu_hub_info(cpu)->m_val = m_val;
+               uv_cpu_hub_info(cpu)->n_val = m_val;
+               uv_cpu_hub_info(cpu)->numa_blade_id = blade;
+               uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
+               uv_cpu_hub_info(cpu)->pnode = pnode;
+               uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) - 1;
+               uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
+               uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
+               uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
+               uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
+               uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
+               uv_node_to_blade[nid] = blade;
+               uv_cpu_to_blade[cpu] = blade;
+               max_pnode = max(pnode, max_pnode);
+
+               printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, "
+                       "lcpu %d, blade %d\n",
+                       cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid,
+                       lcpu, blade);
+       }
+
+       map_gru_high(max_pnode);
+       map_mmr_high(max_pnode);
+       map_config_high(max_pnode);
+       map_mmioh_high(max_pnode);
+
+       uv_cpu_init();
+       uv_scir_register_cpu_notifier();
+       proc_mkdir("sgi_uv", NULL);
+}
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
deleted file mode 100644 (file)
index 70935dd..0000000
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
- *
- * Generic APIC sub-arch probe layer.
- *
- * Hacked for x86-64 by James Cleverdon from i386 architecture code by
- * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
- * James Cleverdon.
- */
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/hardirq.h>
-#include <linux/dmar.h>
-
-#include <asm/smp.h>
-#include <asm/apic.h>
-#include <asm/ipi.h>
-#include <asm/setup.h>
-
-extern struct apic apic_flat;
-extern struct apic apic_physflat;
-extern struct apic apic_x2xpic_uv_x;
-extern struct apic apic_x2apic_phys;
-extern struct apic apic_x2apic_cluster;
-
-struct apic __read_mostly *apic = &apic_flat;
-EXPORT_SYMBOL_GPL(apic);
-
-static struct apic *apic_probe[] __initdata = {
-#ifdef CONFIG_X86_UV
-       &apic_x2apic_uv_x,
-#endif
-#ifdef CONFIG_X86_X2APIC
-       &apic_x2apic_phys,
-       &apic_x2apic_cluster,
-#endif
-       &apic_physflat,
-       NULL,
-};
-
-/*
- * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
- */
-void __init default_setup_apic_routing(void)
-{
-#ifdef CONFIG_X86_X2APIC
-       if (apic == &apic_x2apic_phys || apic == &apic_x2apic_cluster) {
-               if (!intr_remapping_enabled)
-                       apic = &apic_flat;
-       }
-#endif
-
-       if (apic == &apic_flat) {
-               if (max_physical_apicid >= 8)
-                       apic = &apic_physflat;
-               printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
-       }
-
-       if (x86_quirks->update_apic)
-               x86_quirks->update_apic();
-}
-
-/* Same for both flat and physical. */
-
-void apic_send_IPI_self(int vector)
-{
-       __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
-}
-
-int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-       int i;
-
-       for (i = 0; apic_probe[i]; ++i) {
-               if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
-                       apic = apic_probe[i];
-                       printk(KERN_INFO "Setting APIC routing to %s.\n",
-                               apic->name);
-                       return 1;
-               }
-       }
-       return 0;
-}
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
deleted file mode 100644 (file)
index 3b00299..0000000
+++ /dev/null
@@ -1,389 +0,0 @@
-/*
- * Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
- *
- * Flat APIC subarch code.
- *
- * Hacked for x86-64 by James Cleverdon from i386 architecture code by
- * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
- * James Cleverdon.
- */
-#include <linux/errno.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/hardirq.h>
-#include <asm/smp.h>
-#include <asm/apic.h>
-#include <asm/ipi.h>
-
-#ifdef CONFIG_ACPI
-#include <acpi/acpi_bus.h>
-#endif
-
-static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-       return 1;
-}
-
-static const struct cpumask *flat_target_cpus(void)
-{
-       return cpu_online_mask;
-}
-
-static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-       /* Careful. Some cpus do not strictly honor the set of cpus
-        * specified in the interrupt destination when using lowest
-        * priority interrupt delivery mode.
-        *
-        * In particular there was a hyperthreading cpu observed to
-        * deliver interrupts to the wrong hyperthread when only one
-        * hyperthread was specified in the interrupt desitination.
-        */
-       cpumask_clear(retmask);
-       cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
-/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116).  So here it goes...
- */
-static void flat_init_apic_ldr(void)
-{
-       unsigned long val;
-       unsigned long num, id;
-
-       num = smp_processor_id();
-       id = 1UL << num;
-       apic_write(APIC_DFR, APIC_DFR_FLAT);
-       val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
-       val |= SET_APIC_LOGICAL_ID(id);
-       apic_write(APIC_LDR, val);
-}
-
-static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
-       local_irq_restore(flags);
-}
-
-static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
-{
-       unsigned long mask = cpumask_bits(cpumask)[0];
-
-       _flat_send_IPI_mask(mask, vector);
-}
-
-static void
- flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
-{
-       unsigned long mask = cpumask_bits(cpumask)[0];
-       int cpu = smp_processor_id();
-
-       if (cpu < BITS_PER_LONG)
-               clear_bit(cpu, &mask);
-
-       _flat_send_IPI_mask(mask, vector);
-}
-
-static void flat_send_IPI_allbutself(int vector)
-{
-       int cpu = smp_processor_id();
-#ifdef CONFIG_HOTPLUG_CPU
-       int hotplug = 1;
-#else
-       int hotplug = 0;
-#endif
-       if (hotplug || vector == NMI_VECTOR) {
-               if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
-                       unsigned long mask = cpumask_bits(cpu_online_mask)[0];
-
-                       if (cpu < BITS_PER_LONG)
-                               clear_bit(cpu, &mask);
-
-                       _flat_send_IPI_mask(mask, vector);
-               }
-       } else if (num_online_cpus() > 1) {
-               __default_send_IPI_shortcut(APIC_DEST_ALLBUT,
-                                           vector, apic->dest_logical);
-       }
-}
-
-static void flat_send_IPI_all(int vector)
-{
-       if (vector == NMI_VECTOR) {
-               flat_send_IPI_mask(cpu_online_mask, vector);
-       } else {
-               __default_send_IPI_shortcut(APIC_DEST_ALLINC,
-                                           vector, apic->dest_logical);
-       }
-}
-
-static unsigned int flat_get_apic_id(unsigned long x)
-{
-       unsigned int id;
-
-       id = (((x)>>24) & 0xFFu);
-
-       return id;
-}
-
-static unsigned long set_apic_id(unsigned int id)
-{
-       unsigned long x;
-
-       x = ((id & 0xFFu)<<24);
-       return x;
-}
-
-static unsigned int read_xapic_id(void)
-{
-       unsigned int id;
-
-       id = flat_get_apic_id(apic_read(APIC_ID));
-       return id;
-}
-
-static int flat_apic_id_registered(void)
-{
-       return physid_isset(read_xapic_id(), phys_cpu_present_map);
-}
-
-static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
-       return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
-}
-
-static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-                                               const struct cpumask *andmask)
-{
-       unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
-       unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
-
-       return mask1 & mask2;
-}
-
-static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
-{
-       return hard_smp_processor_id() >> index_msb;
-}
-
-struct apic apic_flat =  {
-       .name                           = "flat",
-       .probe                          = NULL,
-       .acpi_madt_oem_check            = flat_acpi_madt_oem_check,
-       .apic_id_registered             = flat_apic_id_registered,
-
-       .irq_delivery_mode              = dest_LowestPrio,
-       .irq_dest_mode                  = 1, /* logical */
-
-       .target_cpus                    = flat_target_cpus,
-       .disable_esr                    = 0,
-       .dest_logical                   = APIC_DEST_LOGICAL,
-       .check_apicid_used              = NULL,
-       .check_apicid_present           = NULL,
-
-       .vector_allocation_domain       = flat_vector_allocation_domain,
-       .init_apic_ldr                  = flat_init_apic_ldr,
-
-       .ioapic_phys_id_map             = NULL,
-       .setup_apic_routing             = NULL,
-       .multi_timer_check              = NULL,
-       .apicid_to_node                 = NULL,
-       .cpu_to_logical_apicid          = NULL,
-       .cpu_present_to_apicid          = default_cpu_present_to_apicid,
-       .apicid_to_cpu_present          = NULL,
-       .setup_portio_remap             = NULL,
-       .check_phys_apicid_present      = default_check_phys_apicid_present,
-       .enable_apic_mode               = NULL,
-       .phys_pkg_id                    = flat_phys_pkg_id,
-       .mps_oem_check                  = NULL,
-
-       .get_apic_id                    = flat_get_apic_id,
-       .set_apic_id                    = set_apic_id,
-       .apic_id_mask                   = 0xFFu << 24,
-
-       .cpu_mask_to_apicid             = flat_cpu_mask_to_apicid,
-       .cpu_mask_to_apicid_and         = flat_cpu_mask_to_apicid_and,
-
-       .send_IPI_mask                  = flat_send_IPI_mask,
-       .send_IPI_mask_allbutself       = flat_send_IPI_mask_allbutself,
-       .send_IPI_allbutself            = flat_send_IPI_allbutself,
-       .send_IPI_all                   = flat_send_IPI_all,
-       .send_IPI_self                  = apic_send_IPI_self,
-
-       .wakeup_cpu                     = NULL,
-       .trampoline_phys_low            = DEFAULT_TRAMPOLINE_PHYS_LOW,
-       .trampoline_phys_high           = DEFAULT_TRAMPOLINE_PHYS_HIGH,
-       .wait_for_init_deassert         = NULL,
-       .smp_callin_clear_local_apic    = NULL,
-       .inquire_remote_apic            = NULL,
-
-       .read                           = native_apic_mem_read,
-       .write                          = native_apic_mem_write,
-       .icr_read                       = native_apic_icr_read,
-       .icr_write                      = native_apic_icr_write,
-       .wait_icr_idle                  = native_apic_wait_icr_idle,
-       .safe_wait_icr_idle             = native_safe_apic_wait_icr_idle,
-};
-
-/*
- * Physflat mode is used when there are more than 8 CPUs on a AMD system.
- * We cannot use logical delivery in this case because the mask
- * overflows, so use physical mode.
- */
-static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-#ifdef CONFIG_ACPI
-       /*
-        * Quirk: some x86_64 machines can only use physical APIC mode
-        * regardless of how many processors are present (x86_64 ES7000
-        * is an example).
-        */
-       if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
-               (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
-               printk(KERN_DEBUG "system APIC only can use physical flat");
-               return 1;
-       }
-#endif
-
-       return 0;
-}
-
-static const struct cpumask *physflat_target_cpus(void)
-{
-       return cpu_online_mask;
-}
-
-static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-       cpumask_clear(retmask);
-       cpumask_set_cpu(cpu, retmask);
-}
-
-static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
-{
-       default_send_IPI_mask_sequence_phys(cpumask, vector);
-}
-
-static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
-                                             int vector)
-{
-       default_send_IPI_mask_allbutself_phys(cpumask, vector);
-}
-
-static void physflat_send_IPI_allbutself(int vector)
-{
-       default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
-}
-
-static void physflat_send_IPI_all(int vector)
-{
-       physflat_send_IPI_mask(cpu_online_mask, vector);
-}
-
-static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
-       int cpu;
-
-       /*
-        * We're using fixed IRQ delivery, can only return one phys APIC ID.
-        * May as well be the first.
-        */
-       cpu = cpumask_first(cpumask);
-       if ((unsigned)cpu < nr_cpu_ids)
-               return per_cpu(x86_cpu_to_apicid, cpu);
-       else
-               return BAD_APICID;
-}
-
-static unsigned int
-physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-                               const struct cpumask *andmask)
-{
-       int cpu;
-
-       /*
-        * We're using fixed IRQ delivery, can only return one phys APIC ID.
-        * May as well be the first.
-        */
-       for_each_cpu_and(cpu, cpumask, andmask) {
-               if (cpumask_test_cpu(cpu, cpu_online_mask))
-                       break;
-       }
-       if (cpu < nr_cpu_ids)
-               return per_cpu(x86_cpu_to_apicid, cpu);
-
-       return BAD_APICID;
-}
-
-struct apic apic_physflat =  {
-
-       .name                           = "physical flat",
-       .probe                          = NULL,
-       .acpi_madt_oem_check            = physflat_acpi_madt_oem_check,
-       .apic_id_registered             = flat_apic_id_registered,
-
-       .irq_delivery_mode              = dest_Fixed,
-       .irq_dest_mode                  = 0, /* physical */
-
-       .target_cpus                    = physflat_target_cpus,
-       .disable_esr                    = 0,
-       .dest_logical                   = 0,
-       .check_apicid_used              = NULL,
-       .check_apicid_present           = NULL,
-
-       .vector_allocation_domain       = physflat_vector_allocation_domain,
-       /* not needed, but shouldn't hurt: */
-       .init_apic_ldr                  = flat_init_apic_ldr,
-
-       .ioapic_phys_id_map             = NULL,
-       .setup_apic_routing             = NULL,
-       .multi_timer_check              = NULL,
-       .apicid_to_node                 = NULL,
-       .cpu_to_logical_apicid          = NULL,
-       .cpu_present_to_apicid          = default_cpu_present_to_apicid,
-       .apicid_to_cpu_present          = NULL,
-       .setup_portio_remap             = NULL,
-       .check_phys_apicid_present      = default_check_phys_apicid_present,
-       .enable_apic_mode               = NULL,
-       .phys_pkg_id                    = flat_phys_pkg_id,
-       .mps_oem_check                  = NULL,
-
-       .get_apic_id                    = flat_get_apic_id,
-       .set_apic_id                    = set_apic_id,
-       .apic_id_mask                   = 0xFFu << 24,
-
-       .cpu_mask_to_apicid             = physflat_cpu_mask_to_apicid,
-       .cpu_mask_to_apicid_and         = physflat_cpu_mask_to_apicid_and,
-
-       .send_IPI_mask                  = physflat_send_IPI_mask,
-       .send_IPI_mask_allbutself       = physflat_send_IPI_mask_allbutself,
-       .send_IPI_allbutself            = physflat_send_IPI_allbutself,
-       .send_IPI_all                   = physflat_send_IPI_all,
-       .send_IPI_self                  = apic_send_IPI_self,
-
-       .wakeup_cpu                     = NULL,
-       .trampoline_phys_low            = DEFAULT_TRAMPOLINE_PHYS_LOW,
-       .trampoline_phys_high           = DEFAULT_TRAMPOLINE_PHYS_HIGH,
-       .wait_for_init_deassert         = NULL,
-       .smp_callin_clear_local_apic    = NULL,
-       .inquire_remote_apic            = NULL,
-
-       .read                           = native_apic_mem_read,
-       .write                          = native_apic_mem_write,
-       .icr_read                       = native_apic_icr_read,
-       .icr_write                      = native_apic_icr_write,
-       .wait_icr_idle                  = native_apic_wait_icr_idle,
-       .safe_wait_icr_idle             = native_safe_apic_wait_icr_idle,
-};
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
deleted file mode 100644 (file)
index 4e39d9a..0000000
+++ /dev/null
@@ -1,243 +0,0 @@
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/dmar.h>
-
-#include <asm/smp.h>
-#include <asm/apic.h>
-#include <asm/ipi.h>
-
-DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
-
-static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-       if (cpu_has_x2apic)
-               return 1;
-
-       return 0;
-}
-
-/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
-
-static const struct cpumask *x2apic_target_cpus(void)
-{
-       return cpumask_of(0);
-}
-
-/*
- * for now each logical cpu is in its own vector allocation domain.
- */
-static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-       cpumask_clear(retmask);
-       cpumask_set_cpu(cpu, retmask);
-}
-
-static void
- __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
-{
-       unsigned long cfg;
-
-       cfg = __prepare_ICR(0, vector, dest);
-
-       /*
-        * send the IPI.
-        */
-       native_x2apic_icr_write(cfg, apicid);
-}
-
-/*
- * for now, we send the IPI's one by one in the cpumask.
- * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
- * at once. We have 16 cpu's in a cluster. This will minimize IPI register
- * writes.
- */
-static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
-{
-       unsigned long query_cpu;
-       unsigned long flags;
-
-       local_irq_save(flags);
-       for_each_cpu(query_cpu, mask) {
-               __x2apic_send_IPI_dest(
-                       per_cpu(x86_cpu_to_logical_apicid, query_cpu),
-                       vector, apic->dest_logical);
-       }
-       local_irq_restore(flags);
-}
-
-static void
- x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
-{
-       unsigned long this_cpu = smp_processor_id();
-       unsigned long query_cpu;
-       unsigned long flags;
-
-       local_irq_save(flags);
-       for_each_cpu(query_cpu, mask) {
-               if (query_cpu == this_cpu)
-                       continue;
-               __x2apic_send_IPI_dest(
-                               per_cpu(x86_cpu_to_logical_apicid, query_cpu),
-                               vector, apic->dest_logical);
-       }
-       local_irq_restore(flags);
-}
-
-static void x2apic_send_IPI_allbutself(int vector)
-{
-       unsigned long this_cpu = smp_processor_id();
-       unsigned long query_cpu;
-       unsigned long flags;
-
-       local_irq_save(flags);
-       for_each_online_cpu(query_cpu) {
-               if (query_cpu == this_cpu)
-                       continue;
-               __x2apic_send_IPI_dest(
-                               per_cpu(x86_cpu_to_logical_apicid, query_cpu),
-                               vector, apic->dest_logical);
-       }
-       local_irq_restore(flags);
-}
-
-static void x2apic_send_IPI_all(int vector)
-{
-       x2apic_send_IPI_mask(cpu_online_mask, vector);
-}
-
-static int x2apic_apic_id_registered(void)
-{
-       return 1;
-}
-
-static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
-       /*
-        * We're using fixed IRQ delivery, can only return one logical APIC ID.
-        * May as well be the first.
-        */
-       int cpu = cpumask_first(cpumask);
-
-       if ((unsigned)cpu < nr_cpu_ids)
-               return per_cpu(x86_cpu_to_logical_apicid, cpu);
-       else
-               return BAD_APICID;
-}
-
-static unsigned int
-x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-                             const struct cpumask *andmask)
-{
-       int cpu;
-
-       /*
-        * We're using fixed IRQ delivery, can only return one logical APIC ID.
-        * May as well be the first.
-        */
-       for_each_cpu_and(cpu, cpumask, andmask) {
-               if (cpumask_test_cpu(cpu, cpu_online_mask))
-                       break;
-       }
-
-       if (cpu < nr_cpu_ids)
-               return per_cpu(x86_cpu_to_logical_apicid, cpu);
-
-       return BAD_APICID;
-}
-
-static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x)
-{
-       unsigned int id;
-
-       id = x;
-       return id;
-}
-
-static unsigned long set_apic_id(unsigned int id)
-{
-       unsigned long x;
-
-       x = id;
-       return x;
-}
-
-static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb)
-{
-       return current_cpu_data.initial_apicid >> index_msb;
-}
-
-static void x2apic_send_IPI_self(int vector)
-{
-       apic_write(APIC_SELF_IPI, vector);
-}
-
-static void init_x2apic_ldr(void)
-{
-       int cpu = smp_processor_id();
-
-       per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
-}
-
-struct apic apic_x2apic_cluster = {
-
-       .name                           = "cluster x2apic",
-       .probe                          = NULL,
-       .acpi_madt_oem_check            = x2apic_acpi_madt_oem_check,
-       .apic_id_registered             = x2apic_apic_id_registered,
-
-       .irq_delivery_mode              = dest_LowestPrio,
-       .irq_dest_mode                  = 1, /* logical */
-
-       .target_cpus                    = x2apic_target_cpus,
-       .disable_esr                    = 0,
-       .dest_logical                   = APIC_DEST_LOGICAL,
-       .check_apicid_used              = NULL,
-       .check_apicid_present           = NULL,
-
-       .vector_allocation_domain       = x2apic_vector_allocation_domain,
-       .init_apic_ldr                  = init_x2apic_ldr,
-
-       .ioapic_phys_id_map             = NULL,
-       .setup_apic_routing             = NULL,
-       .multi_timer_check              = NULL,
-       .apicid_to_node                 = NULL,
-       .cpu_to_logical_apicid          = NULL,
-       .cpu_present_to_apicid          = default_cpu_present_to_apicid,
-       .apicid_to_cpu_present          = NULL,
-       .setup_portio_remap             = NULL,
-       .check_phys_apicid_present      = default_check_phys_apicid_present,
-       .enable_apic_mode               = NULL,
-       .phys_pkg_id                    = x2apic_cluster_phys_pkg_id,
-       .mps_oem_check                  = NULL,
-
-       .get_apic_id                    = x2apic_cluster_phys_get_apic_id,
-       .set_apic_id                    = set_apic_id,
-       .apic_id_mask                   = 0xFFFFFFFFu,
-
-       .cpu_mask_to_apicid             = x2apic_cpu_mask_to_apicid,
-       .cpu_mask_to_apicid_and         = x2apic_cpu_mask_to_apicid_and,
-
-       .send_IPI_mask                  = x2apic_send_IPI_mask,
-       .send_IPI_mask_allbutself       = x2apic_send_IPI_mask_allbutself,
-       .send_IPI_allbutself            = x2apic_send_IPI_allbutself,
-       .send_IPI_all                   = x2apic_send_IPI_all,
-       .send_IPI_self                  = x2apic_send_IPI_self,
-
-       .wakeup_cpu                     = NULL,
-       .trampoline_phys_low            = DEFAULT_TRAMPOLINE_PHYS_LOW,
-       .trampoline_phys_high           = DEFAULT_TRAMPOLINE_PHYS_HIGH,
-       .wait_for_init_deassert         = NULL,
-       .smp_callin_clear_local_apic    = NULL,
-       .inquire_remote_apic            = NULL,
-
-       .read                           = native_apic_msr_read,
-       .write                          = native_apic_msr_write,
-       .icr_read                       = native_x2apic_icr_read,
-       .icr_write                      = native_x2apic_icr_write,
-       .wait_icr_idle                  = native_x2apic_wait_icr_idle,
-       .safe_wait_icr_idle             = native_safe_x2apic_wait_icr_idle,
-};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
deleted file mode 100644 (file)
index d2d52eb..0000000
+++ /dev/null
@@ -1,229 +0,0 @@
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/dmar.h>
-
-#include <asm/smp.h>
-#include <asm/apic.h>
-#include <asm/ipi.h>
-
-static int x2apic_phys;
-
-static int set_x2apic_phys_mode(char *arg)
-{
-       x2apic_phys = 1;
-       return 0;
-}
-early_param("x2apic_phys", set_x2apic_phys_mode);
-
-static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-       if (cpu_has_x2apic && x2apic_phys)
-               return 1;
-
-       return 0;
-}
-
-/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
-
-static const struct cpumask *x2apic_target_cpus(void)
-{
-       return cpumask_of(0);
-}
-
-static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-       cpumask_clear(retmask);
-       cpumask_set_cpu(cpu, retmask);
-}
-
-static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
-                                  unsigned int dest)
-{
-       unsigned long cfg;
-
-       cfg = __prepare_ICR(0, vector, dest);
-
-       /*
-        * send the IPI.
-        */
-       native_x2apic_icr_write(cfg, apicid);
-}
-
-static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
-{
-       unsigned long query_cpu;
-       unsigned long flags;
-
-       local_irq_save(flags);
-       for_each_cpu(query_cpu, mask) {
-               __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
-                                      vector, APIC_DEST_PHYSICAL);
-       }
-       local_irq_restore(flags);
-}
-
-static void
- x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
-{
-       unsigned long this_cpu = smp_processor_id();
-       unsigned long query_cpu;
-       unsigned long flags;
-
-       local_irq_save(flags);
-       for_each_cpu(query_cpu, mask) {
-               if (query_cpu != this_cpu)
-                       __x2apic_send_IPI_dest(
-                               per_cpu(x86_cpu_to_apicid, query_cpu),
-                               vector, APIC_DEST_PHYSICAL);
-       }
-       local_irq_restore(flags);
-}
-
-static void x2apic_send_IPI_allbutself(int vector)
-{
-       unsigned long this_cpu = smp_processor_id();
-       unsigned long query_cpu;
-       unsigned long flags;
-
-       local_irq_save(flags);
-       for_each_online_cpu(query_cpu) {
-               if (query_cpu == this_cpu)
-                       continue;
-               __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
-                                      vector, APIC_DEST_PHYSICAL);
-       }
-       local_irq_restore(flags);
-}
-
-static void x2apic_send_IPI_all(int vector)
-{
-       x2apic_send_IPI_mask(cpu_online_mask, vector);
-}
-
-static int x2apic_apic_id_registered(void)
-{
-       return 1;
-}
-
-static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
-       /*
-        * We're using fixed IRQ delivery, can only return one phys APIC ID.
-        * May as well be the first.
-        */
-       int cpu = cpumask_first(cpumask);
-
-       if ((unsigned)cpu < nr_cpu_ids)
-               return per_cpu(x86_cpu_to_apicid, cpu);
-       else
-               return BAD_APICID;
-}
-
-static unsigned int
-x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-                             const struct cpumask *andmask)
-{
-       int cpu;
-
-       /*
-        * We're using fixed IRQ delivery, can only return one phys APIC ID.
-        * May as well be the first.
-        */
-       for_each_cpu_and(cpu, cpumask, andmask) {
-               if (cpumask_test_cpu(cpu, cpu_online_mask))
-                       break;
-       }
-
-       if (cpu < nr_cpu_ids)
-               return per_cpu(x86_cpu_to_apicid, cpu);
-
-       return BAD_APICID;
-}
-
-static unsigned int x2apic_phys_get_apic_id(unsigned long x)
-{
-       return x;
-}
-
-static unsigned long set_apic_id(unsigned int id)
-{
-       return id;
-}
-
-static int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
-{
-       return current_cpu_data.initial_apicid >> index_msb;
-}
-
-static void x2apic_send_IPI_self(int vector)
-{
-       apic_write(APIC_SELF_IPI, vector);
-}
-
-static void init_x2apic_ldr(void)
-{
-}
-
-struct apic apic_x2apic_phys = {
-
-       .name                           = "physical x2apic",
-       .probe                          = NULL,
-       .acpi_madt_oem_check            = x2apic_acpi_madt_oem_check,
-       .apic_id_registered             = x2apic_apic_id_registered,
-
-       .irq_delivery_mode              = dest_Fixed,
-       .irq_dest_mode                  = 0, /* physical */
-
-       .target_cpus                    = x2apic_target_cpus,
-       .disable_esr                    = 0,
-       .dest_logical                   = 0,
-       .check_apicid_used              = NULL,
-       .check_apicid_present           = NULL,
-
-       .vector_allocation_domain       = x2apic_vector_allocation_domain,
-       .init_apic_ldr                  = init_x2apic_ldr,
-
-       .ioapic_phys_id_map             = NULL,
-       .setup_apic_routing             = NULL,
-       .multi_timer_check              = NULL,
-       .apicid_to_node                 = NULL,
-       .cpu_to_logical_apicid          = NULL,
-       .cpu_present_to_apicid          = default_cpu_present_to_apicid,
-       .apicid_to_cpu_present          = NULL,
-       .setup_portio_remap             = NULL,
-       .check_phys_apicid_present      = default_check_phys_apicid_present,
-       .enable_apic_mode               = NULL,
-       .phys_pkg_id                    = x2apic_phys_pkg_id,
-       .mps_oem_check                  = NULL,
-
-       .get_apic_id                    = x2apic_phys_get_apic_id,
-       .set_apic_id                    = set_apic_id,
-       .apic_id_mask                   = 0xFFFFFFFFu,
-
-       .cpu_mask_to_apicid             = x2apic_cpu_mask_to_apicid,
-       .cpu_mask_to_apicid_and         = x2apic_cpu_mask_to_apicid_and,
-
-       .send_IPI_mask                  = x2apic_send_IPI_mask,
-       .send_IPI_mask_allbutself       = x2apic_send_IPI_mask_allbutself,
-       .send_IPI_allbutself            = x2apic_send_IPI_allbutself,
-       .send_IPI_all                   = x2apic_send_IPI_all,
-       .send_IPI_self                  = x2apic_send_IPI_self,
-
-       .wakeup_cpu                     = NULL,
-       .trampoline_phys_low            = DEFAULT_TRAMPOLINE_PHYS_LOW,
-       .trampoline_phys_high           = DEFAULT_TRAMPOLINE_PHYS_HIGH,
-       .wait_for_init_deassert         = NULL,
-       .smp_callin_clear_local_apic    = NULL,
-       .inquire_remote_apic            = NULL,
-
-       .read                           = native_apic_msr_read,
-       .write                          = native_apic_msr_write,
-       .icr_read                       = native_x2apic_icr_read,
-       .icr_write                      = native_x2apic_icr_write,
-       .wait_icr_idle                  = native_x2apic_wait_icr_idle,
-       .safe_wait_icr_idle             = native_safe_x2apic_wait_icr_idle,
-};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
deleted file mode 100644 (file)
index 20b4ad0..0000000
+++ /dev/null
@@ -1,643 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * SGI UV APIC functions (note: not an Intel compatible APIC)
- *
- * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
- */
-
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <linux/hardirq.h>
-#include <linux/timer.h>
-#include <linux/proc_fs.h>
-#include <asm/current.h>
-#include <asm/smp.h>
-#include <asm/apic.h>
-#include <asm/ipi.h>
-#include <asm/pgtable.h>
-#include <asm/uv/uv.h>
-#include <asm/uv/uv_mmrs.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/bios.h>
-
-DEFINE_PER_CPU(int, x2apic_extra_bits);
-
-static enum uv_system_type uv_system_type;
-
-static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-       if (!strcmp(oem_id, "SGI")) {
-               if (!strcmp(oem_table_id, "UVL"))
-                       uv_system_type = UV_LEGACY_APIC;
-               else if (!strcmp(oem_table_id, "UVX"))
-                       uv_system_type = UV_X2APIC;
-               else if (!strcmp(oem_table_id, "UVH")) {
-                       uv_system_type = UV_NON_UNIQUE_APIC;
-                       return 1;
-               }
-       }
-       return 0;
-}
-
-enum uv_system_type get_uv_system_type(void)
-{
-       return uv_system_type;
-}
-
-int is_uv_system(void)
-{
-       return uv_system_type != UV_NONE;
-}
-EXPORT_SYMBOL_GPL(is_uv_system);
-
-DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
-EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
-
-struct uv_blade_info *uv_blade_info;
-EXPORT_SYMBOL_GPL(uv_blade_info);
-
-short *uv_node_to_blade;
-EXPORT_SYMBOL_GPL(uv_node_to_blade);
-
-short *uv_cpu_to_blade;
-EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
-
-short uv_possible_blades;
-EXPORT_SYMBOL_GPL(uv_possible_blades);
-
-unsigned long sn_rtc_cycles_per_second;
-EXPORT_SYMBOL(sn_rtc_cycles_per_second);
-
-/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
-
-static const struct cpumask *uv_target_cpus(void)
-{
-       return cpumask_of(0);
-}
-
-static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-       cpumask_clear(retmask);
-       cpumask_set_cpu(cpu, retmask);
-}
-
-int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
-{
-       unsigned long val;
-       int pnode;
-
-       pnode = uv_apicid_to_pnode(phys_apicid);
-       val = (1UL << UVH_IPI_INT_SEND_SHFT) |
-           (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
-           (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
-           APIC_DM_INIT;
-       uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
-       mdelay(10);
-
-       val = (1UL << UVH_IPI_INT_SEND_SHFT) |
-           (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
-           (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
-           APIC_DM_STARTUP;
-       uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
-       return 0;
-}
-
-static void uv_send_IPI_one(int cpu, int vector)
-{
-       unsigned long val, apicid;
-       int pnode;
-
-       apicid = per_cpu(x86_cpu_to_apicid, cpu);
-       pnode = uv_apicid_to_pnode(apicid);
-
-       val = (1UL << UVH_IPI_INT_SEND_SHFT) |
-             (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
-             (vector << UVH_IPI_INT_VECTOR_SHFT);
-
-       uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
-}
-
-static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
-{
-       unsigned int cpu;
-
-       for_each_cpu(cpu, mask)
-               uv_send_IPI_one(cpu, vector);
-}
-
-static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
-{
-       unsigned int this_cpu = smp_processor_id();
-       unsigned int cpu;
-
-       for_each_cpu(cpu, mask) {
-               if (cpu != this_cpu)
-                       uv_send_IPI_one(cpu, vector);
-       }
-}
-
-static void uv_send_IPI_allbutself(int vector)
-{
-       unsigned int this_cpu = smp_processor_id();
-       unsigned int cpu;
-
-       for_each_online_cpu(cpu) {
-               if (cpu != this_cpu)
-                       uv_send_IPI_one(cpu, vector);
-       }
-}
-
-static void uv_send_IPI_all(int vector)
-{
-       uv_send_IPI_mask(cpu_online_mask, vector);
-}
-
-static int uv_apic_id_registered(void)
-{
-       return 1;
-}
-
-static void uv_init_apic_ldr(void)
-{
-}
-
-static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
-       /*
-        * We're using fixed IRQ delivery, can only return one phys APIC ID.
-        * May as well be the first.
-        */
-       int cpu = cpumask_first(cpumask);
-
-       if ((unsigned)cpu < nr_cpu_ids)
-               return per_cpu(x86_cpu_to_apicid, cpu);
-       else
-               return BAD_APICID;
-}
-
-static unsigned int
-uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-                         const struct cpumask *andmask)
-{
-       int cpu;
-
-       /*
-        * We're using fixed IRQ delivery, can only return one phys APIC ID.
-        * May as well be the first.
-        */
-       for_each_cpu_and(cpu, cpumask, andmask) {
-               if (cpumask_test_cpu(cpu, cpu_online_mask))
-                       break;
-       }
-       if (cpu < nr_cpu_ids)
-               return per_cpu(x86_cpu_to_apicid, cpu);
-
-       return BAD_APICID;
-}
-
-static unsigned int x2apic_get_apic_id(unsigned long x)
-{
-       unsigned int id;
-
-       WARN_ON(preemptible() && num_online_cpus() > 1);
-       id = x | __get_cpu_var(x2apic_extra_bits);
-
-       return id;
-}
-
-static unsigned long set_apic_id(unsigned int id)
-{
-       unsigned long x;
-
-       /* maskout x2apic_extra_bits ? */
-       x = id;
-       return x;
-}
-
-static unsigned int uv_read_apic_id(void)
-{
-
-       return x2apic_get_apic_id(apic_read(APIC_ID));
-}
-
-static int uv_phys_pkg_id(int initial_apicid, int index_msb)
-{
-       return uv_read_apic_id() >> index_msb;
-}
-
-static void uv_send_IPI_self(int vector)
-{
-       apic_write(APIC_SELF_IPI, vector);
-}
-
-struct apic apic_x2apic_uv_x = {
-
-       .name                           = "UV large system",
-       .probe                          = NULL,
-       .acpi_madt_oem_check            = uv_acpi_madt_oem_check,
-       .apic_id_registered             = uv_apic_id_registered,
-
-       .irq_delivery_mode              = dest_Fixed,
-       .irq_dest_mode                  = 1, /* logical */
-
-       .target_cpus                    = uv_target_cpus,
-       .disable_esr                    = 0,
-       .dest_logical                   = APIC_DEST_LOGICAL,
-       .check_apicid_used              = NULL,
-       .check_apicid_present           = NULL,
-
-       .vector_allocation_domain       = uv_vector_allocation_domain,
-       .init_apic_ldr                  = uv_init_apic_ldr,
-
-       .ioapic_phys_id_map             = NULL,
-       .setup_apic_routing             = NULL,
-       .multi_timer_check              = NULL,
-       .apicid_to_node                 = NULL,
-       .cpu_to_logical_apicid          = NULL,
-       .cpu_present_to_apicid          = default_cpu_present_to_apicid,
-       .apicid_to_cpu_present          = NULL,
-       .setup_portio_remap             = NULL,
-       .check_phys_apicid_present      = default_check_phys_apicid_present,
-       .enable_apic_mode               = NULL,
-       .phys_pkg_id                    = uv_phys_pkg_id,
-       .mps_oem_check                  = NULL,
-
-       .get_apic_id                    = x2apic_get_apic_id,
-       .set_apic_id                    = set_apic_id,
-       .apic_id_mask                   = 0xFFFFFFFFu,
-
-       .cpu_mask_to_apicid             = uv_cpu_mask_to_apicid,
-       .cpu_mask_to_apicid_and         = uv_cpu_mask_to_apicid_and,
-
-       .send_IPI_mask                  = uv_send_IPI_mask,
-       .send_IPI_mask_allbutself       = uv_send_IPI_mask_allbutself,
-       .send_IPI_allbutself            = uv_send_IPI_allbutself,
-       .send_IPI_all                   = uv_send_IPI_all,
-       .send_IPI_self                  = uv_send_IPI_self,
-
-       .wakeup_cpu                     = NULL,
-       .trampoline_phys_low            = DEFAULT_TRAMPOLINE_PHYS_LOW,
-       .trampoline_phys_high           = DEFAULT_TRAMPOLINE_PHYS_HIGH,
-       .wait_for_init_deassert         = NULL,
-       .smp_callin_clear_local_apic    = NULL,
-       .inquire_remote_apic            = NULL,
-
-       .read                           = native_apic_msr_read,
-       .write                          = native_apic_msr_write,
-       .icr_read                       = native_x2apic_icr_read,
-       .icr_write                      = native_x2apic_icr_write,
-       .wait_icr_idle                  = native_x2apic_wait_icr_idle,
-       .safe_wait_icr_idle             = native_safe_x2apic_wait_icr_idle,
-};
-
-static __cpuinit void set_x2apic_extra_bits(int pnode)
-{
-       __get_cpu_var(x2apic_extra_bits) = (pnode << 6);
-}
-
-/*
- * Called on boot cpu.
- */
-static __init int boot_pnode_to_blade(int pnode)
-{
-       int blade;
-
-       for (blade = 0; blade < uv_num_possible_blades(); blade++)
-               if (pnode == uv_blade_info[blade].pnode)
-                       return blade;
-       BUG();
-}
-
-struct redir_addr {
-       unsigned long redirect;
-       unsigned long alias;
-};
-
-#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
-
-static __initdata struct redir_addr redir_addrs[] = {
-       {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG},
-       {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG},
-       {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG},
-};
-
-static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
-{
-       union uvh_si_alias0_overlay_config_u alias;
-       union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
-               alias.v = uv_read_local_mmr(redir_addrs[i].alias);
-               if (alias.s.base == 0) {
-                       *size = (1UL << alias.s.m_alias);
-                       redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
-                       *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
-                       return;
-               }
-       }
-       BUG();
-}
-
-static __init void map_low_mmrs(void)
-{
-       init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
-       init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
-}
-
-enum map_type {map_wb, map_uc};
-
-static __init void map_high(char *id, unsigned long base, int shift,
-                           int max_pnode, enum map_type map_type)
-{
-       unsigned long bytes, paddr;
-
-       paddr = base << shift;
-       bytes = (1UL << shift) * (max_pnode + 1);
-       printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
-                                               paddr + bytes);
-       if (map_type == map_uc)
-               init_extra_mapping_uc(paddr, bytes);
-       else
-               init_extra_mapping_wb(paddr, bytes);
-
-}
-static __init void map_gru_high(int max_pnode)
-{
-       union uvh_rh_gam_gru_overlay_config_mmr_u gru;
-       int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
-
-       gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
-       if (gru.s.enable)
-               map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
-}
-
-static __init void map_config_high(int max_pnode)
-{
-       union uvh_rh_gam_cfg_overlay_config_mmr_u cfg;
-       int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT;
-
-       cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
-       if (cfg.s.enable)
-               map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc);
-}
-
-static __init void map_mmr_high(int max_pnode)
-{
-       union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
-       int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
-
-       mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
-       if (mmr.s.enable)
-               map_high("MMR", mmr.s.base, shift, max_pnode, map_uc);
-}
-
-static __init void map_mmioh_high(int max_pnode)
-{
-       union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
-       int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
-
-       mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
-       if (mmioh.s.enable)
-               map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc);
-}
-
-static __init void uv_rtc_init(void)
-{
-       long status;
-       u64 ticks_per_sec;
-
-       status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
-                                       &ticks_per_sec);
-       if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
-               printk(KERN_WARNING
-                       "unable to determine platform RTC clock frequency, "
-                       "guessing.\n");
-               /* BIOS gives wrong value for clock freq. so guess */
-               sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
-       } else
-               sn_rtc_cycles_per_second = ticks_per_sec;
-}
-
-/*
- * percpu heartbeat timer
- */
-static void uv_heartbeat(unsigned long ignored)
-{
-       struct timer_list *timer = &uv_hub_info->scir.timer;
-       unsigned char bits = uv_hub_info->scir.state;
-
-       /* flip heartbeat bit */
-       bits ^= SCIR_CPU_HEARTBEAT;
-
-       /* is this cpu idle? */
-       if (idle_cpu(raw_smp_processor_id()))
-               bits &= ~SCIR_CPU_ACTIVITY;
-       else
-               bits |= SCIR_CPU_ACTIVITY;
-
-       /* update system controller interface reg */
-       uv_set_scir_bits(bits);
-
-       /* enable next timer period */
-       mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
-}
-
-static void __cpuinit uv_heartbeat_enable(int cpu)
-{
-       if (!uv_cpu_hub_info(cpu)->scir.enabled) {
-               struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
-
-               uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
-               setup_timer(timer, uv_heartbeat, cpu);
-               timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
-               add_timer_on(timer, cpu);
-               uv_cpu_hub_info(cpu)->scir.enabled = 1;
-       }
-
-       /* check boot cpu */
-       if (!uv_cpu_hub_info(0)->scir.enabled)
-               uv_heartbeat_enable(0);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static void __cpuinit uv_heartbeat_disable(int cpu)
-{
-       if (uv_cpu_hub_info(cpu)->scir.enabled) {
-               uv_cpu_hub_info(cpu)->scir.enabled = 0;
-               del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
-       }
-       uv_set_cpu_scir_bits(cpu, 0xff);
-}
-
-/*
- * cpu hotplug notifier
- */
-static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self,
-                                      unsigned long action, void *hcpu)
-{
-       long cpu = (long)hcpu;
-
-       switch (action) {
-       case CPU_ONLINE:
-               uv_heartbeat_enable(cpu);
-               break;
-       case CPU_DOWN_PREPARE:
-               uv_heartbeat_disable(cpu);
-               break;
-       default:
-               break;
-       }
-       return NOTIFY_OK;
-}
-
-static __init void uv_scir_register_cpu_notifier(void)
-{
-       hotcpu_notifier(uv_scir_cpu_notify, 0);
-}
-
-#else /* !CONFIG_HOTPLUG_CPU */
-
-static __init void uv_scir_register_cpu_notifier(void)
-{
-}
-
-static __init int uv_init_heartbeat(void)
-{
-       int cpu;
-
-       if (is_uv_system())
-               for_each_online_cpu(cpu)
-                       uv_heartbeat_enable(cpu);
-       return 0;
-}
-
-late_initcall(uv_init_heartbeat);
-
-#endif /* !CONFIG_HOTPLUG_CPU */
-
-/*
- * Called on each cpu to initialize the per_cpu UV data area.
- *     ZZZ hotplug not supported yet
- */
-void __cpuinit uv_cpu_init(void)
-{
-       /* CPU 0 initilization will be done via uv_system_init. */
-       if (!uv_blade_info)
-               return;
-
-       uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
-
-       if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
-               set_x2apic_extra_bits(uv_hub_info->pnode);
-}
-
-
-void __init uv_system_init(void)
-{
-       union uvh_si_addr_map_config_u m_n_config;
-       union uvh_node_id_u node_id;
-       unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
-       int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
-       int max_pnode = 0;
-       unsigned long mmr_base, present;
-
-       map_low_mmrs();
-
-       m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
-       m_val = m_n_config.s.m_skt;
-       n_val = m_n_config.s.n_skt;
-       mmr_base =
-           uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
-           ~UV_MMR_ENABLE;
-       printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
-
-       for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
-               uv_possible_blades +=
-                 hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
-       printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
-
-       bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
-       uv_blade_info = kmalloc(bytes, GFP_KERNEL);
-
-       get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
-
-       bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
-       uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
-       memset(uv_node_to_blade, 255, bytes);
-
-       bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
-       uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
-       memset(uv_cpu_to_blade, 255, bytes);
-
-       blade = 0;
-       for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
-               present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
-               for (j = 0; j < 64; j++) {
-                       if (!test_bit(j, &present))
-                               continue;
-                       uv_blade_info[blade].pnode = (i * 64 + j);
-                       uv_blade_info[blade].nr_possible_cpus = 0;
-                       uv_blade_info[blade].nr_online_cpus = 0;
-                       blade++;
-               }
-       }
-
-       node_id.v = uv_read_local_mmr(UVH_NODE_ID);
-       gnode_upper = (((unsigned long)node_id.s.node_id) &
-                      ~((1 << n_val) - 1)) << m_val;
-
-       uv_bios_init();
-       uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
-                           &sn_coherency_id, &sn_region_size);
-       uv_rtc_init();
-
-       for_each_present_cpu(cpu) {
-               nid = cpu_to_node(cpu);
-               pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu));
-               blade = boot_pnode_to_blade(pnode);
-               lcpu = uv_blade_info[blade].nr_possible_cpus;
-               uv_blade_info[blade].nr_possible_cpus++;
-
-               uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
-               uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
-               uv_cpu_hub_info(cpu)->m_val = m_val;
-               uv_cpu_hub_info(cpu)->n_val = m_val;
-               uv_cpu_hub_info(cpu)->numa_blade_id = blade;
-               uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
-               uv_cpu_hub_info(cpu)->pnode = pnode;
-               uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) - 1;
-               uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
-               uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
-               uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
-               uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
-               uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
-               uv_node_to_blade[nid] = blade;
-               uv_cpu_to_blade[cpu] = blade;
-               max_pnode = max(pnode, max_pnode);
-
-               printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, "
-                       "lcpu %d, blade %d\n",
-                       cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid,
-                       lcpu, blade);
-       }
-
-       map_gru_high(max_pnode);
-       map_mmr_high(max_pnode);
-       map_config_high(max_pnode);
-       map_mmioh_high(max_pnode);
-
-       uv_cpu_init();
-       uv_scir_register_cpu_notifier();
-       proc_mkdir("sgi_uv", NULL);
-}
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
deleted file mode 100644 (file)
index 00e6071..0000000
+++ /dev/null
@@ -1,4160 +0,0 @@
-/*
- *     Intel IO-APIC support for multi-Pentium hosts.
- *
- *     Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
- *
- *     Many thanks to Stig Venaas for trying out countless experimental
- *     patches and reporting/debugging problems patiently!
- *
- *     (c) 1999, Multiple IO-APIC support, developed by
- *     Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
- *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
- *     further tested and cleaned up by Zach Brown <zab@redhat.com>
- *     and Ingo Molnar <mingo@redhat.com>
- *
- *     Fixes
- *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
- *                                     thanks to Eric Gilmore
- *                                     and Rolf G. Tews
- *                                     for testing these extensively
- *     Paul Diefenbaugh        :       Added full ACPI support
- */
-
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/pci.h>
-#include <linux/mc146818rtc.h>
-#include <linux/compiler.h>
-#include <linux/acpi.h>
-#include <linux/module.h>
-#include <linux/sysdev.h>
-#include <linux/msi.h>
-#include <linux/htirq.h>
-#include <linux/freezer.h>
-#include <linux/kthread.h>
-#include <linux/jiffies.h>     /* time_after() */
-#ifdef CONFIG_ACPI
-#include <acpi/acpi_bus.h>
-#endif
-#include <linux/bootmem.h>
-#include <linux/dmar.h>
-#include <linux/hpet.h>
-
-#include <asm/idle.h>
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/cpu.h>
-#include <asm/desc.h>
-#include <asm/proto.h>
-#include <asm/acpi.h>
-#include <asm/dma.h>
-#include <asm/timer.h>
-#include <asm/i8259.h>
-#include <asm/nmi.h>
-#include <asm/msidef.h>
-#include <asm/hypertransport.h>
-#include <asm/setup.h>
-#include <asm/irq_remapping.h>
-#include <asm/hpet.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/uv_irq.h>
-
-#include <asm/apic.h>
-
-#define __apicdebuginit(type) static type __init
-
-/*
- *      Is the SiS APIC rmw bug present ?
- *      -1 = don't know, 0 = no, 1 = yes
- */
-int sis_apic_bug = -1;
-
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
-
-/*
- * # of IRQ routing registers
- */
-int nr_ioapic_registers[MAX_IO_APICS];
-
-/* I/O APIC entries */
-struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
-int nr_ioapics;
-
-/* MP IRQ source entries */
-struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* # of MP IRQ source entries */
-int mp_irq_entries;
-
-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
-int mp_bus_id_to_type[MAX_MP_BUSSES];
-#endif
-
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-
-int skip_ioapic_setup;
-
-void arch_disable_smp_support(void)
-{
-#ifdef CONFIG_PCI
-       noioapicquirk = 1;
-       noioapicreroute = -1;
-#endif
-       skip_ioapic_setup = 1;
-}
-
-static int __init parse_noapic(char *str)
-{
-       /* disable IO-APIC */
-       arch_disable_smp_support();
-       return 0;
-}
-early_param("noapic", parse_noapic);
-
-struct irq_pin_list;
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
-struct irq_pin_list {
-       int apic, pin;
-       struct irq_pin_list *next;
-};
-
-static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
-{
-       struct irq_pin_list *pin;
-       int node;
-
-       node = cpu_to_node(cpu);
-
-       pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
-
-       return pin;
-}
-
-struct irq_cfg {
-       struct irq_pin_list *irq_2_pin;
-       cpumask_var_t domain;
-       cpumask_var_t old_domain;
-       unsigned move_cleanup_count;
-       u8 vector;
-       u8 move_in_progress : 1;
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-       u8 move_desc_pending : 1;
-#endif
-};
-
-/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-#ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg irq_cfgx[] = {
-#else
-static struct irq_cfg irq_cfgx[NR_IRQS] = {
-#endif
-       [0]  = { .vector = IRQ0_VECTOR,  },
-       [1]  = { .vector = IRQ1_VECTOR,  },
-       [2]  = { .vector = IRQ2_VECTOR,  },
-       [3]  = { .vector = IRQ3_VECTOR,  },
-       [4]  = { .vector = IRQ4_VECTOR,  },
-       [5]  = { .vector = IRQ5_VECTOR,  },
-       [6]  = { .vector = IRQ6_VECTOR,  },
-       [7]  = { .vector = IRQ7_VECTOR,  },
-       [8]  = { .vector = IRQ8_VECTOR,  },
-       [9]  = { .vector = IRQ9_VECTOR,  },
-       [10] = { .vector = IRQ10_VECTOR, },
-       [11] = { .vector = IRQ11_VECTOR, },
-       [12] = { .vector = IRQ12_VECTOR, },
-       [13] = { .vector = IRQ13_VECTOR, },
-       [14] = { .vector = IRQ14_VECTOR, },
-       [15] = { .vector = IRQ15_VECTOR, },
-};
-
-int __init arch_early_irq_init(void)
-{
-       struct irq_cfg *cfg;
-       struct irq_desc *desc;
-       int count;
-       int i;
-
-       cfg = irq_cfgx;
-       count = ARRAY_SIZE(irq_cfgx);
-
-       for (i = 0; i < count; i++) {
-               desc = irq_to_desc(i);
-               desc->chip_data = &cfg[i];
-               alloc_bootmem_cpumask_var(&cfg[i].domain);
-               alloc_bootmem_cpumask_var(&cfg[i].old_domain);
-               if (i < NR_IRQS_LEGACY)
-                       cpumask_setall(cfg[i].domain);
-       }
-
-       return 0;
-}
-
-#ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg *irq_cfg(unsigned int irq)
-{
-       struct irq_cfg *cfg = NULL;
-       struct irq_desc *desc;
-
-       desc = irq_to_desc(irq);
-       if (desc)
-               cfg = desc->chip_data;
-
-       return cfg;
-}
-
-static struct irq_cfg *get_one_free_irq_cfg(int cpu)
-{
-       struct irq_cfg *cfg;
-       int node;
-
-       node = cpu_to_node(cpu);
-
-       cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
-       if (cfg) {
-               if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
-                       kfree(cfg);
-                       cfg = NULL;
-               } else if (!alloc_cpumask_var_node(&cfg->old_domain,
-                                                         GFP_ATOMIC, node)) {
-                       free_cpumask_var(cfg->domain);
-                       kfree(cfg);
-                       cfg = NULL;
-               } else {
-                       cpumask_clear(cfg->domain);
-                       cpumask_clear(cfg->old_domain);
-               }
-       }
-
-       return cfg;
-}
-
-int arch_init_chip_data(struct irq_desc *desc, int cpu)
-{
-       struct irq_cfg *cfg;
-
-       cfg = desc->chip_data;
-       if (!cfg) {
-               desc->chip_data = get_one_free_irq_cfg(cpu);
-               if (!desc->chip_data) {
-                       printk(KERN_ERR "can not alloc irq_cfg\n");
-                       BUG_ON(1);
-               }
-       }
-
-       return 0;
-}
-
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-
-static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
-{
-       struct irq_pin_list *old_entry, *head, *tail, *entry;
-
-       cfg->irq_2_pin = NULL;
-       old_entry = old_cfg->irq_2_pin;
-       if (!old_entry)
-               return;
-
-       entry = get_one_free_irq_2_pin(cpu);
-       if (!entry)
-               return;
-
-       entry->apic     = old_entry->apic;
-       entry->pin      = old_entry->pin;
-       head            = entry;
-       tail            = entry;
-       old_entry       = old_entry->next;
-       while (old_entry) {
-               entry = get_one_free_irq_2_pin(cpu);
-               if (!entry) {
-                       entry = head;
-                       while (entry) {
-                               head = entry->next;
-                               kfree(entry);
-                               entry = head;
-                       }
-                       /* still use the old one */
-                       return;
-               }
-               entry->apic     = old_entry->apic;
-               entry->pin      = old_entry->pin;
-               tail->next      = entry;
-               tail            = entry;
-               old_entry       = old_entry->next;
-       }
-
-       tail->next = NULL;
-       cfg->irq_2_pin = head;
-}
-
-static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
-{
-       struct irq_pin_list *entry, *next;
-
-       if (old_cfg->irq_2_pin == cfg->irq_2_pin)
-               return;
-
-       entry = old_cfg->irq_2_pin;
-
-       while (entry) {
-               next = entry->next;
-               kfree(entry);
-               entry = next;
-       }
-       old_cfg->irq_2_pin = NULL;
-}
-
-void arch_init_copy_chip_data(struct irq_desc *old_desc,
-                                struct irq_desc *desc, int cpu)
-{
-       struct irq_cfg *cfg;
-       struct irq_cfg *old_cfg;
-
-       cfg = get_one_free_irq_cfg(cpu);
-
-       if (!cfg)
-               return;
-
-       desc->chip_data = cfg;
-
-       old_cfg = old_desc->chip_data;
-
-       memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
-
-       init_copy_irq_2_pin(old_cfg, cfg, cpu);
-}
-
-static void free_irq_cfg(struct irq_cfg *old_cfg)
-{
-       kfree(old_cfg);
-}
-
-void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
-{
-       struct irq_cfg *old_cfg, *cfg;
-
-       old_cfg = old_desc->chip_data;
-       cfg = desc->chip_data;
-
-       if (old_cfg == cfg)
-               return;
-
-       if (old_cfg) {
-               free_irq_2_pin(old_cfg, cfg);
-               free_irq_cfg(old_cfg);
-               old_desc->chip_data = NULL;
-       }
-}
-
-static void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-       struct irq_cfg *cfg = desc->chip_data;
-
-       if (!cfg->move_in_progress) {
-               /* it means that domain is not changed */
-               if (!cpumask_intersects(desc->affinity, mask))
-                       cfg->move_desc_pending = 1;
-       }
-}
-#endif
-
-#else
-static struct irq_cfg *irq_cfg(unsigned int irq)
-{
-       return irq < nr_irqs ? irq_cfgx + irq : NULL;
-}
-
-#endif
-
-#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
-static inline void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-}
-#endif
-
-struct io_apic {
-       unsigned int index;
-       unsigned int unused[3];
-       unsigned int data;
-};
-
-static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
-{
-       return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
-               + (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
-}
-
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
-{
-       struct io_apic __iomem *io_apic = io_apic_base(apic);
-       writel(reg, &io_apic->index);
-       return readl(&io_apic->data);
-}
-
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
-{
-       struct io_apic __iomem *io_apic = io_apic_base(apic);
-       writel(reg, &io_apic->index);
-       writel(value, &io_apic->data);
-}
-
-/*
- * Re-write a value: to be used for read-modify-write
- * cycles where the read already set up the index register.
- *
- * Older SiS APIC requires we rewrite the index register
- */
-static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
-{
-       struct io_apic __iomem *io_apic = io_apic_base(apic);
-
-       if (sis_apic_bug)
-               writel(reg, &io_apic->index);
-       writel(value, &io_apic->data);
-}
-
-static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
-{
-       struct irq_pin_list *entry;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       entry = cfg->irq_2_pin;
-       for (;;) {
-               unsigned int reg;
-               int pin;
-
-               if (!entry)
-                       break;
-               pin = entry->pin;
-               reg = io_apic_read(entry->apic, 0x10 + pin*2);
-               /* Is the remote IRR bit set? */
-               if (reg & IO_APIC_REDIR_REMOTE_IRR) {
-                       spin_unlock_irqrestore(&ioapic_lock, flags);
-                       return true;
-               }
-               if (!entry->next)
-                       break;
-               entry = entry->next;
-       }
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-
-       return false;
-}
-
-union entry_union {
-       struct { u32 w1, w2; };
-       struct IO_APIC_route_entry entry;
-};
-
-static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
-{
-       union entry_union eu;
-       unsigned long flags;
-       spin_lock_irqsave(&ioapic_lock, flags);
-       eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
-       eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-       return eu.entry;
-}
-
-/*
- * When we write a new IO APIC routing entry, we need to write the high
- * word first! If the mask bit in the low word is clear, we will enable
- * the interrupt, and we need to make sure the entry is fully populated
- * before that happens.
- */
-static void
-__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
-{
-       union entry_union eu;
-       eu.entry = e;
-       io_apic_write(apic, 0x11 + 2*pin, eu.w2);
-       io_apic_write(apic, 0x10 + 2*pin, eu.w1);
-}
-
-void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
-{
-       unsigned long flags;
-       spin_lock_irqsave(&ioapic_lock, flags);
-       __ioapic_write_entry(apic, pin, e);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-/*
- * When we mask an IO APIC routing entry, we need to write the low
- * word first, in order to set the mask bit before we change the
- * high bits!
- */
-static void ioapic_mask_entry(int apic, int pin)
-{
-       unsigned long flags;
-       union entry_union eu = { .entry.mask = 1 };
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       io_apic_write(apic, 0x10 + 2*pin, eu.w1);
-       io_apic_write(apic, 0x11 + 2*pin, eu.w2);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
-{
-       cpumask_var_t cleanup_mask;
-
-       if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-               unsigned int i;
-               cfg->move_cleanup_count = 0;
-               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-                       cfg->move_cleanup_count++;
-               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-                       apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
-       } else {
-               cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
-               cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
-               apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-               free_cpumask_var(cleanup_mask);
-       }
-       cfg->move_in_progress = 0;
-}
-
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
-{
-       int apic, pin;
-       struct irq_pin_list *entry;
-       u8 vector = cfg->vector;
-
-       entry = cfg->irq_2_pin;
-       for (;;) {
-               unsigned int reg;
-
-               if (!entry)
-                       break;
-
-               apic = entry->apic;
-               pin = entry->pin;
-#ifdef CONFIG_INTR_REMAP
-               /*
-                * With interrupt-remapping, destination information comes
-                * from interrupt-remapping table entry.
-                */
-               if (!irq_remapped(irq))
-                       io_apic_write(apic, 0x11 + pin*2, dest);
-#else
-               io_apic_write(apic, 0x11 + pin*2, dest);
-#endif
-               reg = io_apic_read(apic, 0x10 + pin*2);
-               reg &= ~IO_APIC_REDIR_VECTOR_MASK;
-               reg |= vector;
-               io_apic_modify(apic, 0x10 + pin*2, reg);
-               if (!entry->next)
-                       break;
-               entry = entry->next;
-       }
-}
-
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
-/*
- * Either sets desc->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
- * leaves desc->affinity untouched.
- */
-static unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
-{
-       struct irq_cfg *cfg;
-       unsigned int irq;
-
-       if (!cpumask_intersects(mask, cpu_online_mask))
-               return BAD_APICID;
-
-       irq = desc->irq;
-       cfg = desc->chip_data;
-       if (assign_irq_vector(irq, cfg, mask))
-               return BAD_APICID;
-
-       cpumask_and(desc->affinity, cfg->domain, mask);
-       set_extra_move_desc(desc, mask);
-
-       return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask);
-}
-
-static void
-set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-       struct irq_cfg *cfg;
-       unsigned long flags;
-       unsigned int dest;
-       unsigned int irq;
-
-       irq = desc->irq;
-       cfg = desc->chip_data;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       dest = set_desc_affinity(desc, mask);
-       if (dest != BAD_APICID) {
-               /* Only the high 8 bits are valid. */
-               dest = SET_APIC_LOGICAL_ID(dest);
-               __target_IO_APIC_irq(irq, dest, cfg);
-       }
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void
-set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
-{
-       struct irq_desc *desc;
-
-       desc = irq_to_desc(irq);
-
-       set_ioapic_affinity_irq_desc(desc, mask);
-}
-#endif /* CONFIG_SMP */
-
-/*
- * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
- * shared ISA-space IRQs, so we have to support them. We are super
- * fast in the common case, and fast for shared ISA-space IRQs.
- */
-static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
-{
-       struct irq_pin_list *entry;
-
-       entry = cfg->irq_2_pin;
-       if (!entry) {
-               entry = get_one_free_irq_2_pin(cpu);
-               if (!entry) {
-                       printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
-                                       apic, pin);
-                       return;
-               }
-               cfg->irq_2_pin = entry;
-               entry->apic = apic;
-               entry->pin = pin;
-               return;
-       }
-
-       while (entry->next) {
-               /* not again, please */
-               if (entry->apic == apic && entry->pin == pin)
-                       return;
-
-               entry = entry->next;
-       }
-
-       entry->next = get_one_free_irq_2_pin(cpu);
-       entry = entry->next;
-       entry->apic = apic;
-       entry->pin = pin;
-}
-
-/*
- * Reroute an IRQ to a different pin.
- */
-static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
-                                     int oldapic, int oldpin,
-                                     int newapic, int newpin)
-{
-       struct irq_pin_list *entry = cfg->irq_2_pin;
-       int replaced = 0;
-
-       while (entry) {
-               if (entry->apic == oldapic && entry->pin == oldpin) {
-                       entry->apic = newapic;
-                       entry->pin = newpin;
-                       replaced = 1;
-                       /* every one is different, right? */
-                       break;
-               }
-               entry = entry->next;
-       }
-
-       /* why? call replace before add? */
-       if (!replaced)
-               add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
-}
-
-static inline void io_apic_modify_irq(struct irq_cfg *cfg,
-                               int mask_and, int mask_or,
-                               void (*final)(struct irq_pin_list *entry))
-{
-       int pin;
-       struct irq_pin_list *entry;
-
-       for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
-               unsigned int reg;
-               pin = entry->pin;
-               reg = io_apic_read(entry->apic, 0x10 + pin * 2);
-               reg &= mask_and;
-               reg |= mask_or;
-               io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
-               if (final)
-                       final(entry);
-       }
-}
-
-static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
-{
-       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
-}
-
-#ifdef CONFIG_X86_64
-static void io_apic_sync(struct irq_pin_list *entry)
-{
-       /*
-        * Synchronize the IO-APIC and the CPU by doing
-        * a dummy read from the IO-APIC
-        */
-       struct io_apic __iomem *io_apic;
-       io_apic = io_apic_base(entry->apic);
-       readl(&io_apic->data);
-}
-
-static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
-{
-       io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
-}
-#else /* CONFIG_X86_32 */
-static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
-{
-       io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
-}
-
-static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
-{
-       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
-                       IO_APIC_REDIR_MASKED, NULL);
-}
-
-static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
-{
-       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
-                       IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
-}
-#endif /* CONFIG_X86_32 */
-
-static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
-{
-       struct irq_cfg *cfg = desc->chip_data;
-       unsigned long flags;
-
-       BUG_ON(!cfg);
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       __mask_IO_APIC_irq(cfg);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
-{
-       struct irq_cfg *cfg = desc->chip_data;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       __unmask_IO_APIC_irq(cfg);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void mask_IO_APIC_irq(unsigned int irq)
-{
-       struct irq_desc *desc = irq_to_desc(irq);
-
-       mask_IO_APIC_irq_desc(desc);
-}
-static void unmask_IO_APIC_irq(unsigned int irq)
-{
-       struct irq_desc *desc = irq_to_desc(irq);
-
-       unmask_IO_APIC_irq_desc(desc);
-}
-
-static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
-{
-       struct IO_APIC_route_entry entry;
-
-       /* Check delivery_mode to be sure we're not clearing an SMI pin */
-       entry = ioapic_read_entry(apic, pin);
-       if (entry.delivery_mode == dest_SMI)
-               return;
-       /*
-        * Disable it in the IO-APIC irq-routing table:
-        */
-       ioapic_mask_entry(apic, pin);
-}
-
-static void clear_IO_APIC (void)
-{
-       int apic, pin;
-
-       for (apic = 0; apic < nr_ioapics; apic++)
-               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
-                       clear_IO_APIC_pin(apic, pin);
-}
-
-#ifdef CONFIG_X86_32
-/*
- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
- * specific CPU-side IRQs.
- */
-
-#define MAX_PIRQS 8
-static int pirq_entries[MAX_PIRQS] = {
-       [0 ... MAX_PIRQS - 1] = -1
-};
-
-static int __init ioapic_pirq_setup(char *str)
-{
-       int i, max;
-       int ints[MAX_PIRQS+1];
-
-       get_options(str, ARRAY_SIZE(ints), ints);
-
-       apic_printk(APIC_VERBOSE, KERN_INFO
-                       "PIRQ redirection, working around broken MP-BIOS.\n");
-       max = MAX_PIRQS;
-       if (ints[0] < MAX_PIRQS)
-               max = ints[0];
-
-       for (i = 0; i < max; i++) {
-               apic_printk(APIC_VERBOSE, KERN_DEBUG
-                               "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
-               /*
-                * PIRQs are mapped upside down, usually.
-                */
-               pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
-       }
-       return 1;
-}
-
-__setup("pirq=", ioapic_pirq_setup);
-#endif /* CONFIG_X86_32 */
-
-#ifdef CONFIG_INTR_REMAP
-/* I/O APIC RTE contents at the OS boot up */
-static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
-
-/*
- * Saves and masks all the unmasked IO-APIC RTE's
- */
-int save_mask_IO_APIC_setup(void)
-{
-       union IO_APIC_reg_01 reg_01;
-       unsigned long flags;
-       int apic, pin;
-
-       /*
-        * The number of IO-APIC IRQ registers (== #pins):
-        */
-       for (apic = 0; apic < nr_ioapics; apic++) {
-               spin_lock_irqsave(&ioapic_lock, flags);
-               reg_01.raw = io_apic_read(apic, 1);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
-               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
-       }
-
-       for (apic = 0; apic < nr_ioapics; apic++) {
-               early_ioapic_entries[apic] =
-                       kzalloc(sizeof(struct IO_APIC_route_entry) *
-                               nr_ioapic_registers[apic], GFP_KERNEL);
-               if (!early_ioapic_entries[apic])
-                       goto nomem;
-       }
-
-       for (apic = 0; apic < nr_ioapics; apic++)
-               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-                       struct IO_APIC_route_entry entry;
-
-                       entry = early_ioapic_entries[apic][pin] =
-                               ioapic_read_entry(apic, pin);
-                       if (!entry.mask) {
-                               entry.mask = 1;
-                               ioapic_write_entry(apic, pin, entry);
-                       }
-               }
-
-       return 0;
-
-nomem:
-       while (apic >= 0)
-               kfree(early_ioapic_entries[apic--]);
-       memset(early_ioapic_entries, 0,
-               ARRAY_SIZE(early_ioapic_entries));
-
-       return -ENOMEM;
-}
-
-void restore_IO_APIC_setup(void)
-{
-       int apic, pin;
-
-       for (apic = 0; apic < nr_ioapics; apic++) {
-               if (!early_ioapic_entries[apic])
-                       break;
-               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
-                       ioapic_write_entry(apic, pin,
-                                          early_ioapic_entries[apic][pin]);
-               kfree(early_ioapic_entries[apic]);
-               early_ioapic_entries[apic] = NULL;
-       }
-}
-
-void reinit_intr_remapped_IO_APIC(int intr_remapping)
-{
-       /*
-        * for now plain restore of previous settings.
-        * TBD: In the case of OS enabling interrupt-remapping,
-        * IO-APIC RTE's need to be setup to point to interrupt-remapping
-        * table entries. for now, do a plain restore, and wait for
-        * the setup_IO_APIC_irqs() to do proper initialization.
-        */
-       restore_IO_APIC_setup();
-}
-#endif
-
-/*
- * Find the IRQ entry number of a certain pin.
- */
-static int find_irq_entry(int apic, int pin, int type)
-{
-       int i;
-
-       for (i = 0; i < mp_irq_entries; i++)
-               if (mp_irqs[i].irqtype == type &&
-                   (mp_irqs[i].dstapic == mp_ioapics[apic].apicid ||
-                    mp_irqs[i].dstapic == MP_APIC_ALL) &&
-                   mp_irqs[i].dstirq == pin)
-                       return i;
-
-       return -1;
-}
-
-/*
- * Find the pin to which IRQ[irq] (ISA) is connected
- */
-static int __init find_isa_irq_pin(int irq, int type)
-{
-       int i;
-
-       for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].srcbus;
-
-               if (test_bit(lbus, mp_bus_not_pci) &&
-                   (mp_irqs[i].irqtype == type) &&
-                   (mp_irqs[i].srcbusirq == irq))
-
-                       return mp_irqs[i].dstirq;
-       }
-       return -1;
-}
-
-static int __init find_isa_irq_apic(int irq, int type)
-{
-       int i;
-
-       for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].srcbus;
-
-               if (test_bit(lbus, mp_bus_not_pci) &&
-                   (mp_irqs[i].irqtype == type) &&
-                   (mp_irqs[i].srcbusirq == irq))
-                       break;
-       }
-       if (i < mp_irq_entries) {
-               int apic;
-               for(apic = 0; apic < nr_ioapics; apic++) {
-                       if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic)
-                               return apic;
-               }
-       }
-
-       return -1;
-}
-
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
-       int apic, i, best_guess = -1;
-
-       apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
-               bus, slot, pin);
-       if (test_bit(bus, mp_bus_not_pci)) {
-               apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
-               return -1;
-       }
-       for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].srcbus;
-
-               for (apic = 0; apic < nr_ioapics; apic++)
-                       if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
-                           mp_irqs[i].dstapic == MP_APIC_ALL)
-                               break;
-
-               if (!test_bit(lbus, mp_bus_not_pci) &&
-                   !mp_irqs[i].irqtype &&
-                   (bus == lbus) &&
-                   (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
-                       int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
-
-                       if (!(apic || IO_APIC_IRQ(irq)))
-                               continue;
-
-                       if (pin == (mp_irqs[i].srcbusirq & 3))
-                               return irq;
-                       /*
-                        * Use the first all-but-pin matching entry as a
-                        * best-guess fuzzy result for broken mptables.
-                        */
-                       if (best_guess < 0)
-                               best_guess = irq;
-               }
-       }
-       return best_guess;
-}
-
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-/*
- * EISA Edge/Level control register, ELCR
- */
-static int EISA_ELCR(unsigned int irq)
-{
-       if (irq < NR_IRQS_LEGACY) {
-               unsigned int port = 0x4d0 + (irq >> 3);
-               return (inb(port) >> (irq & 7)) & 1;
-       }
-       apic_printk(APIC_VERBOSE, KERN_INFO
-                       "Broken MPtable reports ISA irq %d\n", irq);
-       return 0;
-}
-
-#endif
-
-/* ISA interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_ISA_trigger(idx)       (0)
-#define default_ISA_polarity(idx)      (0)
-
-/* EISA interrupts are always polarity zero and can be edge or level
- * trigger depending on the ELCR value.  If an interrupt is listed as
- * EISA conforming in the MP table, that means its trigger type must
- * be read in from the ELCR */
-
-#define default_EISA_trigger(idx)      (EISA_ELCR(mp_irqs[idx].srcbusirq))
-#define default_EISA_polarity(idx)     default_ISA_polarity(idx)
-
-/* PCI interrupts are always polarity one level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_PCI_trigger(idx)       (1)
-#define default_PCI_polarity(idx)      (1)
-
-/* MCA interrupts are always polarity zero level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_MCA_trigger(idx)       (1)
-#define default_MCA_polarity(idx)      default_ISA_polarity(idx)
-
-static int MPBIOS_polarity(int idx)
-{
-       int bus = mp_irqs[idx].srcbus;
-       int polarity;
-
-       /*
-        * Determine IRQ line polarity (high active or low active):
-        */
-       switch (mp_irqs[idx].irqflag & 3)
-       {
-               case 0: /* conforms, ie. bus-type dependent polarity */
-                       if (test_bit(bus, mp_bus_not_pci))
-                               polarity = default_ISA_polarity(idx);
-                       else
-                               polarity = default_PCI_polarity(idx);
-                       break;
-               case 1: /* high active */
-               {
-                       polarity = 0;
-                       break;
-               }
-               case 2: /* reserved */
-               {
-                       printk(KERN_WARNING "broken BIOS!!\n");
-                       polarity = 1;
-                       break;
-               }
-               case 3: /* low active */
-               {
-                       polarity = 1;
-                       break;
-               }
-               default: /* invalid */
-               {
-                       printk(KERN_WARNING "broken BIOS!!\n");
-                       polarity = 1;
-                       break;
-               }
-       }
-       return polarity;
-}
-
-static int MPBIOS_trigger(int idx)
-{
-       int bus = mp_irqs[idx].srcbus;
-       int trigger;
-
-       /*
-        * Determine IRQ trigger mode (edge or level sensitive):
-        */
-       switch ((mp_irqs[idx].irqflag>>2) & 3)
-       {
-               case 0: /* conforms, ie. bus-type dependent */
-                       if (test_bit(bus, mp_bus_not_pci))
-                               trigger = default_ISA_trigger(idx);
-                       else
-                               trigger = default_PCI_trigger(idx);
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-                       switch (mp_bus_id_to_type[bus]) {
-                               case MP_BUS_ISA: /* ISA pin */
-                               {
-                                       /* set before the switch */
-                                       break;
-                               }
-                               case MP_BUS_EISA: /* EISA pin */
-                               {
-                                       trigger = default_EISA_trigger(idx);
-                                       break;
-                               }
-                               case MP_BUS_PCI: /* PCI pin */
-                               {
-                                       /* set before the switch */
-                                       break;
-                               }
-                               case MP_BUS_MCA: /* MCA pin */
-                               {
-                                       trigger = default_MCA_trigger(idx);
-                                       break;
-                               }
-                               default:
-                               {
-                                       printk(KERN_WARNING "broken BIOS!!\n");
-                                       trigger = 1;
-                                       break;
-                               }
-                       }
-#endif
-                       break;
-               case 1: /* edge */
-               {
-                       trigger = 0;
-                       break;
-               }
-               case 2: /* reserved */
-               {
-                       printk(KERN_WARNING "broken BIOS!!\n");
-                       trigger = 1;
-                       break;
-               }
-               case 3: /* level */
-               {
-                       trigger = 1;
-                       break;
-               }
-               default: /* invalid */
-               {
-                       printk(KERN_WARNING "broken BIOS!!\n");
-                       trigger = 0;
-                       break;
-               }
-       }
-       return trigger;
-}
-
-static inline int irq_polarity(int idx)
-{
-       return MPBIOS_polarity(idx);
-}
-
-static inline int irq_trigger(int idx)
-{
-       return MPBIOS_trigger(idx);
-}
-
-int (*ioapic_renumber_irq)(int ioapic, int irq);
-static int pin_2_irq(int idx, int apic, int pin)
-{
-       int irq, i;
-       int bus = mp_irqs[idx].srcbus;
-
-       /*
-        * Debugging check, we are in big trouble if this message pops up!
-        */
-       if (mp_irqs[idx].dstirq != pin)
-               printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
-
-       if (test_bit(bus, mp_bus_not_pci)) {
-               irq = mp_irqs[idx].srcbusirq;
-       } else {
-               /*
-                * PCI IRQs are mapped in order
-                */
-               i = irq = 0;
-               while (i < apic)
-                       irq += nr_ioapic_registers[i++];
-               irq += pin;
-               /*
-                 * For MPS mode, so far only needed by ES7000 platform
-                 */
-               if (ioapic_renumber_irq)
-                       irq = ioapic_renumber_irq(apic, irq);
-       }
-
-#ifdef CONFIG_X86_32
-       /*
-        * PCI IRQ command line redirection. Yes, limits are hardcoded.
-        */
-       if ((pin >= 16) && (pin <= 23)) {
-               if (pirq_entries[pin-16] != -1) {
-                       if (!pirq_entries[pin-16]) {
-                               apic_printk(APIC_VERBOSE, KERN_DEBUG
-                                               "disabling PIRQ%d\n", pin-16);
-                       } else {
-                               irq = pirq_entries[pin-16];
-                               apic_printk(APIC_VERBOSE, KERN_DEBUG
-                                               "using PIRQ%d -> IRQ %d\n",
-                                               pin-16, irq);
-                       }
-               }
-       }
-#endif
-
-       return irq;
-}
-
-void lock_vector_lock(void)
-{
-       /* Used to the online set of cpus does not change
-        * during assign_irq_vector.
-        */
-       spin_lock(&vector_lock);
-}
-
-void unlock_vector_lock(void)
-{
-       spin_unlock(&vector_lock);
-}
-
-static int
-__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
-{
-       /*
-        * NOTE! The local APIC isn't very good at handling
-        * multiple interrupts at the same interrupt level.
-        * As the interrupt level is determined by taking the
-        * vector number and shifting that right by 4, we
-        * want to spread these out a bit so that they don't
-        * all fall in the same interrupt level.
-        *
-        * Also, we've got to be careful not to trash gate
-        * 0x80, because int 0x80 is hm, kind of importantish. ;)
-        */
-       static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
-       unsigned int old_vector;
-       int cpu, err;
-       cpumask_var_t tmp_mask;
-
-       if ((cfg->move_in_progress) || cfg->move_cleanup_count)
-               return -EBUSY;
-
-       if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
-               return -ENOMEM;
-
-       old_vector = cfg->vector;
-       if (old_vector) {
-               cpumask_and(tmp_mask, mask, cpu_online_mask);
-               cpumask_and(tmp_mask, cfg->domain, tmp_mask);
-               if (!cpumask_empty(tmp_mask)) {
-                       free_cpumask_var(tmp_mask);
-                       return 0;
-               }
-       }
-
-       /* Only try and allocate irqs on cpus that are present */
-       err = -ENOSPC;
-       for_each_cpu_and(cpu, mask, cpu_online_mask) {
-               int new_cpu;
-               int vector, offset;
-
-               apic->vector_allocation_domain(cpu, tmp_mask);
-
-               vector = current_vector;
-               offset = current_offset;
-next:
-               vector += 8;
-               if (vector >= first_system_vector) {
-                       /* If out of vectors on large boxen, must share them. */
-                       offset = (offset + 1) % 8;
-                       vector = FIRST_DEVICE_VECTOR + offset;
-               }
-               if (unlikely(current_vector == vector))
-                       continue;
-
-               if (test_bit(vector, used_vectors))
-                       goto next;
-
-               for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
-                       if (per_cpu(vector_irq, new_cpu)[vector] != -1)
-                               goto next;
-               /* Found one! */
-               current_vector = vector;
-               current_offset = offset;
-               if (old_vector) {
-                       cfg->move_in_progress = 1;
-                       cpumask_copy(cfg->old_domain, cfg->domain);
-               }
-               for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
-                       per_cpu(vector_irq, new_cpu)[vector] = irq;
-               cfg->vector = vector;
-               cpumask_copy(cfg->domain, tmp_mask);
-               err = 0;
-               break;
-       }
-       free_cpumask_var(tmp_mask);
-       return err;
-}
-
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
-{
-       int err;
-       unsigned long flags;
-
-       spin_lock_irqsave(&vector_lock, flags);
-       err = __assign_irq_vector(irq, cfg, mask);
-       spin_unlock_irqrestore(&vector_lock, flags);
-       return err;
-}
-
-static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
-{
-       int cpu, vector;
-
-       BUG_ON(!cfg->vector);
-
-       vector = cfg->vector;
-       for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
-               per_cpu(vector_irq, cpu)[vector] = -1;
-
-       cfg->vector = 0;
-       cpumask_clear(cfg->domain);
-
-       if (likely(!cfg->move_in_progress))
-               return;
-       for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
-               for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
-                                                               vector++) {
-                       if (per_cpu(vector_irq, cpu)[vector] != irq)
-                               continue;
-                       per_cpu(vector_irq, cpu)[vector] = -1;
-                       break;
-               }
-       }
-       cfg->move_in_progress = 0;
-}
-
-void __setup_vector_irq(int cpu)
-{
-       /* Initialize vector_irq on a new cpu */
-       /* This function must be called with vector_lock held */
-       int irq, vector;
-       struct irq_cfg *cfg;
-       struct irq_desc *desc;
-
-       /* Mark the inuse vectors */
-       for_each_irq_desc(irq, desc) {
-               cfg = desc->chip_data;
-               if (!cpumask_test_cpu(cpu, cfg->domain))
-                       continue;
-               vector = cfg->vector;
-               per_cpu(vector_irq, cpu)[vector] = irq;
-       }
-       /* Mark the free vectors */
-       for (vector = 0; vector < NR_VECTORS; ++vector) {
-               irq = per_cpu(vector_irq, cpu)[vector];
-               if (irq < 0)
-                       continue;
-
-               cfg = irq_cfg(irq);
-               if (!cpumask_test_cpu(cpu, cfg->domain))
-                       per_cpu(vector_irq, cpu)[vector] = -1;
-       }
-}
-
-static struct irq_chip ioapic_chip;
-#ifdef CONFIG_INTR_REMAP
-static struct irq_chip ir_ioapic_chip;
-#endif
-
-#define IOAPIC_AUTO     -1
-#define IOAPIC_EDGE     0
-#define IOAPIC_LEVEL    1
-
-#ifdef CONFIG_X86_32
-static inline int IO_APIC_irq_trigger(int irq)
-{
-       int apic, idx, pin;
-
-       for (apic = 0; apic < nr_ioapics; apic++) {
-               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-                       idx = find_irq_entry(apic, pin, mp_INT);
-                       if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
-                               return irq_trigger(idx);
-               }
-       }
-       /*
-         * nonexistent IRQs are edge default
-         */
-       return 0;
-}
-#else
-static inline int IO_APIC_irq_trigger(int irq)
-{
-       return 1;
-}
-#endif
-
-static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
-{
-
-       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-           trigger == IOAPIC_LEVEL)
-               desc->status |= IRQ_LEVEL;
-       else
-               desc->status &= ~IRQ_LEVEL;
-
-#ifdef CONFIG_INTR_REMAP
-       if (irq_remapped(irq)) {
-               desc->status |= IRQ_MOVE_PCNTXT;
-               if (trigger)
-                       set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
-                                                     handle_fasteoi_irq,
-                                                    "fasteoi");
-               else
-                       set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
-                                                     handle_edge_irq, "edge");
-               return;
-       }
-#endif
-       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-           trigger == IOAPIC_LEVEL)
-               set_irq_chip_and_handler_name(irq, &ioapic_chip,
-                                             handle_fasteoi_irq,
-                                             "fasteoi");
-       else
-               set_irq_chip_and_handler_name(irq, &ioapic_chip,
-                                             handle_edge_irq, "edge");
-}
-
-int setup_ioapic_entry(int apic_id, int irq,
-                      struct IO_APIC_route_entry *entry,
-                      unsigned int destination, int trigger,
-                      int polarity, int vector)
-{
-       /*
-        * add it to the IO-APIC irq-routing table:
-        */
-       memset(entry,0,sizeof(*entry));
-
-#ifdef CONFIG_INTR_REMAP
-       if (intr_remapping_enabled) {
-               struct intel_iommu *iommu = map_ioapic_to_ir(apic_id);
-               struct irte irte;
-               struct IR_IO_APIC_route_entry *ir_entry =
-                       (struct IR_IO_APIC_route_entry *) entry;
-               int index;
-
-               if (!iommu)
-                       panic("No mapping iommu for ioapic %d\n", apic_id);
-
-               index = alloc_irte(iommu, irq, 1);
-               if (index < 0)
-                       panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
-
-               memset(&irte, 0, sizeof(irte));
-
-               irte.present = 1;
-               irte.dst_mode = apic->irq_dest_mode;
-               irte.trigger_mode = trigger;
-               irte.dlvry_mode = apic->irq_delivery_mode;
-               irte.vector = vector;
-               irte.dest_id = IRTE_DEST(destination);
-
-               modify_irte(irq, &irte);
-
-               ir_entry->index2 = (index >> 15) & 0x1;
-               ir_entry->zero = 0;
-               ir_entry->format = 1;
-               ir_entry->index = (index & 0x7fff);
-       } else
-#endif
-       {
-               entry->delivery_mode = apic->irq_delivery_mode;
-               entry->dest_mode = apic->irq_dest_mode;
-               entry->dest = destination;
-       }
-
-       entry->mask = 0;                                /* enable IRQ */
-       entry->trigger = trigger;
-       entry->polarity = polarity;
-       entry->vector = vector;
-
-       /* Mask level triggered irqs.
-        * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
-        */
-       if (trigger)
-               entry->mask = 1;
-       return 0;
-}
-
-static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc,
-                             int trigger, int polarity)
-{
-       struct irq_cfg *cfg;
-       struct IO_APIC_route_entry entry;
-       unsigned int dest;
-
-       if (!IO_APIC_IRQ(irq))
-               return;
-
-       cfg = desc->chip_data;
-
-       if (assign_irq_vector(irq, cfg, apic->target_cpus()))
-               return;
-
-       dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
-
-       apic_printk(APIC_VERBOSE,KERN_DEBUG
-                   "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
-                   "IRQ %d Mode:%i Active:%i)\n",
-                   apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector,
-                   irq, trigger, polarity);
-
-
-       if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry,
-                              dest, trigger, polarity, cfg->vector)) {
-               printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
-                      mp_ioapics[apic_id].apicid, pin);
-               __clear_irq_vector(irq, cfg);
-               return;
-       }
-
-       ioapic_register_intr(irq, desc, trigger);
-       if (irq < NR_IRQS_LEGACY)
-               disable_8259A_irq(irq);
-
-       ioapic_write_entry(apic_id, pin, entry);
-}
-
-static void __init setup_IO_APIC_irqs(void)
-{
-       int apic_id, pin, idx, irq;
-       int notcon = 0;
-       struct irq_desc *desc;
-       struct irq_cfg *cfg;
-       int cpu = boot_cpu_id;
-
-       apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
-
-       for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
-               for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
-
-                       idx = find_irq_entry(apic_id, pin, mp_INT);
-                       if (idx == -1) {
-                               if (!notcon) {
-                                       notcon = 1;
-                                       apic_printk(APIC_VERBOSE,
-                                               KERN_DEBUG " %d-%d",
-                                               mp_ioapics[apic_id].apicid, pin);
-                               } else
-                                       apic_printk(APIC_VERBOSE, " %d-%d",
-                                               mp_ioapics[apic_id].apicid, pin);
-                               continue;
-                       }
-                       if (notcon) {
-                               apic_printk(APIC_VERBOSE,
-                                       " (apicid-pin) not connected\n");
-                               notcon = 0;
-                       }
-
-                       irq = pin_2_irq(idx, apic_id, pin);
-
-                       /*
-                        * Skip the timer IRQ if there's a quirk handler
-                        * installed and if it returns 1:
-                        */
-                       if (apic->multi_timer_check &&
-                                       apic->multi_timer_check(apic_id, irq))
-                               continue;
-
-                       desc = irq_to_desc_alloc_cpu(irq, cpu);
-                       if (!desc) {
-                               printk(KERN_INFO "can not get irq_desc for %d\n", irq);
-                               continue;
-                       }
-                       cfg = desc->chip_data;
-                       add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
-
-                       setup_IO_APIC_irq(apic_id, pin, irq, desc,
-                                       irq_trigger(idx), irq_polarity(idx));
-               }
-       }
-
-       if (notcon)
-               apic_printk(APIC_VERBOSE,
-                       " (apicid-pin) not connected\n");
-}
-
-/*
- * Set up the timer pin, possibly with the 8259A-master behind.
- */
-static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
-                                       int vector)
-{
-       struct IO_APIC_route_entry entry;
-
-#ifdef CONFIG_INTR_REMAP
-       if (intr_remapping_enabled)
-               return;
-#endif
-
-       memset(&entry, 0, sizeof(entry));
-
-       /*
-        * We use logical delivery to get the timer IRQ
-        * to the first CPU.
-        */
-       entry.dest_mode = apic->irq_dest_mode;
-       entry.mask = 0;                 /* don't mask IRQ for edge */
-       entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus());
-       entry.delivery_mode = apic->irq_delivery_mode;
-       entry.polarity = 0;
-       entry.trigger = 0;
-       entry.vector = vector;
-
-       /*
-        * The timer IRQ doesn't have to know that behind the
-        * scene we may have a 8259A-master in AEOI mode ...
-        */
-       set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
-
-       /*
-        * Add it to the IO-APIC irq-routing table:
-        */
-       ioapic_write_entry(apic_id, pin, entry);
-}
-
-
-__apicdebuginit(void) print_IO_APIC(void)
-{
-       int apic, i;
-       union IO_APIC_reg_00 reg_00;
-       union IO_APIC_reg_01 reg_01;
-       union IO_APIC_reg_02 reg_02;
-       union IO_APIC_reg_03 reg_03;
-       unsigned long flags;
-       struct irq_cfg *cfg;
-       struct irq_desc *desc;
-       unsigned int irq;
-
-       if (apic_verbosity == APIC_QUIET)
-               return;
-
-       printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
-       for (i = 0; i < nr_ioapics; i++)
-               printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
-                      mp_ioapics[i].apicid, nr_ioapic_registers[i]);
-
-       /*
-        * We are a bit conservative about what we expect.  We have to
-        * know about every hardware change ASAP.
-        */
-       printk(KERN_INFO "testing the IO APIC.......................\n");
-
-       for (apic = 0; apic < nr_ioapics; apic++) {
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       reg_00.raw = io_apic_read(apic, 0);
-       reg_01.raw = io_apic_read(apic, 1);
-       if (reg_01.bits.version >= 0x10)
-               reg_02.raw = io_apic_read(apic, 2);
-       if (reg_01.bits.version >= 0x20)
-               reg_03.raw = io_apic_read(apic, 3);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-
-       printk("\n");
-       printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
-       printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
-       printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
-       printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
-       printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
-
-       printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
-       printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
-
-       printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
-       printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
-
-       /*
-        * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
-        * but the value of reg_02 is read as the previous read register
-        * value, so ignore it if reg_02 == reg_01.
-        */
-       if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
-               printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
-               printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
-       }
-
-       /*
-        * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
-        * or reg_03, but the value of reg_0[23] is read as the previous read
-        * register value, so ignore it if reg_03 == reg_0[12].
-        */
-       if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
-           reg_03.raw != reg_01.raw) {
-               printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
-               printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
-       }
-
-       printk(KERN_DEBUG ".... IRQ redirection table:\n");
-
-       printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
-                         " Stat Dmod Deli Vect:   \n");
-
-       for (i = 0; i <= reg_01.bits.entries; i++) {
-               struct IO_APIC_route_entry entry;
-
-               entry = ioapic_read_entry(apic, i);
-
-               printk(KERN_DEBUG " %02x %03X ",
-                       i,
-                       entry.dest
-               );
-
-               printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
-                       entry.mask,
-                       entry.trigger,
-                       entry.irr,
-                       entry.polarity,
-                       entry.delivery_status,
-                       entry.dest_mode,
-                       entry.delivery_mode,
-                       entry.vector
-               );
-       }
-       }
-       printk(KERN_DEBUG "IRQ to pin mappings:\n");
-       for_each_irq_desc(irq, desc) {
-               struct irq_pin_list *entry;
-
-               cfg = desc->chip_data;
-               entry = cfg->irq_2_pin;
-               if (!entry)
-                       continue;
-               printk(KERN_DEBUG "IRQ%d ", irq);
-               for (;;) {
-                       printk("-> %d:%d", entry->apic, entry->pin);
-                       if (!entry->next)
-                               break;
-                       entry = entry->next;
-               }
-               printk("\n");
-       }
-
-       printk(KERN_INFO ".................................... done.\n");
-
-       return;
-}
-
-__apicdebuginit(void) print_APIC_bitfield(int base)
-{
-       unsigned int v;
-       int i, j;
-
-       if (apic_verbosity == APIC_QUIET)
-               return;
-
-       printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
-       for (i = 0; i < 8; i++) {
-               v = apic_read(base + i*0x10);
-               for (j = 0; j < 32; j++) {
-                       if (v & (1<<j))
-                               printk("1");
-                       else
-                               printk("0");
-               }
-               printk("\n");
-       }
-}
-
-__apicdebuginit(void) print_local_APIC(void *dummy)
-{
-       unsigned int v, ver, maxlvt;
-       u64 icr;
-
-       if (apic_verbosity == APIC_QUIET)
-               return;
-
-       printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
-               smp_processor_id(), hard_smp_processor_id());
-       v = apic_read(APIC_ID);
-       printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
-       v = apic_read(APIC_LVR);
-       printk(KERN_INFO "... APIC VERSION: %08x\n", v);
-       ver = GET_APIC_VERSION(v);
-       maxlvt = lapic_get_maxlvt();
-
-       v = apic_read(APIC_TASKPRI);
-       printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
-
-       if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
-               if (!APIC_XAPIC(ver)) {
-                       v = apic_read(APIC_ARBPRI);
-                       printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-                              v & APIC_ARBPRI_MASK);
-               }
-               v = apic_read(APIC_PROCPRI);
-               printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
-       }
-
-       /*
-        * Remote read supported only in the 82489DX and local APIC for
-        * Pentium processors.
-        */
-       if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
-               v = apic_read(APIC_RRR);
-               printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
-       }
-
-       v = apic_read(APIC_LDR);
-       printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
-       if (!x2apic_enabled()) {
-               v = apic_read(APIC_DFR);
-               printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
-       }
-       v = apic_read(APIC_SPIV);
-       printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
-
-       printk(KERN_DEBUG "... APIC ISR field:\n");
-       print_APIC_bitfield(APIC_ISR);
-       printk(KERN_DEBUG "... APIC TMR field:\n");
-       print_APIC_bitfield(APIC_TMR);
-       printk(KERN_DEBUG "... APIC IRR field:\n");
-       print_APIC_bitfield(APIC_IRR);
-
-       if (APIC_INTEGRATED(ver)) {             /* !82489DX */
-               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
-                       apic_write(APIC_ESR, 0);
-
-               v = apic_read(APIC_ESR);
-               printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
-       }
-
-       icr = apic_icr_read();
-       printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
-       printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
-
-       v = apic_read(APIC_LVTT);
-       printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
-       if (maxlvt > 3) {                       /* PC is LVT#4. */
-               v = apic_read(APIC_LVTPC);
-               printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
-       }
-       v = apic_read(APIC_LVT0);
-       printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
-       v = apic_read(APIC_LVT1);
-       printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
-       if (maxlvt > 2) {                       /* ERR is LVT#3. */
-               v = apic_read(APIC_LVTERR);
-               printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
-       }
-
-       v = apic_read(APIC_TMICT);
-       printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
-       v = apic_read(APIC_TMCCT);
-       printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
-       v = apic_read(APIC_TDCR);
-       printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
-       printk("\n");
-}
-
-__apicdebuginit(void) print_all_local_APICs(void)
-{
-       int cpu;
-
-       preempt_disable();
-       for_each_online_cpu(cpu)
-               smp_call_function_single(cpu, print_local_APIC, NULL, 1);
-       preempt_enable();
-}
-
-__apicdebuginit(void) print_PIC(void)
-{
-       unsigned int v;
-       unsigned long flags;
-
-       if (apic_verbosity == APIC_QUIET)
-               return;
-
-       printk(KERN_DEBUG "\nprinting PIC contents\n");
-
-       spin_lock_irqsave(&i8259A_lock, flags);
-
-       v = inb(0xa1) << 8 | inb(0x21);
-       printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
-
-       v = inb(0xa0) << 8 | inb(0x20);
-       printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
-
-       outb(0x0b,0xa0);
-       outb(0x0b,0x20);
-       v = inb(0xa0) << 8 | inb(0x20);
-       outb(0x0a,0xa0);
-       outb(0x0a,0x20);
-
-       spin_unlock_irqrestore(&i8259A_lock, flags);
-
-       printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
-
-       v = inb(0x4d1) << 8 | inb(0x4d0);
-       printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
-}
-
-__apicdebuginit(int) print_all_ICs(void)
-{
-       print_PIC();
-       print_all_local_APICs();
-       print_IO_APIC();
-
-       return 0;
-}
-
-fs_initcall(print_all_ICs);
-
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-
-void __init enable_IO_APIC(void)
-{
-       union IO_APIC_reg_01 reg_01;
-       int i8259_apic, i8259_pin;
-       int apic;
-       unsigned long flags;
-
-       /*
-        * The number of IO-APIC IRQ registers (== #pins):
-        */
-       for (apic = 0; apic < nr_ioapics; apic++) {
-               spin_lock_irqsave(&ioapic_lock, flags);
-               reg_01.raw = io_apic_read(apic, 1);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
-               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
-       }
-       for(apic = 0; apic < nr_ioapics; apic++) {
-               int pin;
-               /* See if any of the pins is in ExtINT mode */
-               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-                       struct IO_APIC_route_entry entry;
-                       entry = ioapic_read_entry(apic, pin);
-
-                       /* If the interrupt line is enabled and in ExtInt mode
-                        * I have found the pin where the i8259 is connected.
-                        */
-                       if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
-                               ioapic_i8259.apic = apic;
-                               ioapic_i8259.pin  = pin;
-                               goto found_i8259;
-                       }
-               }
-       }
- found_i8259:
-       /* Look to see what if the MP table has reported the ExtINT */
-       /* If we could not find the appropriate pin by looking at the ioapic
-        * the i8259 probably is not connected the ioapic but give the
-        * mptable a chance anyway.
-        */
-       i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
-       i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
-       /* Trust the MP table if nothing is setup in the hardware */
-       if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
-               printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
-               ioapic_i8259.pin  = i8259_pin;
-               ioapic_i8259.apic = i8259_apic;
-       }
-       /* Complain if the MP table and the hardware disagree */
-       if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
-               (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
-       {
-               printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
-       }
-
-       /*
-        * Do not trust the IO-APIC being empty at bootup
-        */
-       clear_IO_APIC();
-}
-
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
-{
-       /*
-        * Clear the IO-APIC before rebooting:
-        */
-       clear_IO_APIC();
-
-       /*
-        * If the i8259 is routed through an IOAPIC
-        * Put that IOAPIC in virtual wire mode
-        * so legacy interrupts can be delivered.
-        */
-       if (ioapic_i8259.pin != -1) {
-               struct IO_APIC_route_entry entry;
-
-               memset(&entry, 0, sizeof(entry));
-               entry.mask            = 0; /* Enabled */
-               entry.trigger         = 0; /* Edge */
-               entry.irr             = 0;
-               entry.polarity        = 0; /* High */
-               entry.delivery_status = 0;
-               entry.dest_mode       = 0; /* Physical */
-               entry.delivery_mode   = dest_ExtINT; /* ExtInt */
-               entry.vector          = 0;
-               entry.dest            = read_apic_id();
-
-               /*
-                * Add it to the IO-APIC irq-routing table:
-                */
-               ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
-       }
-
-       disconnect_bsp_APIC(ioapic_i8259.pin != -1);
-}
-
-#ifdef CONFIG_X86_32
-/*
- * function to set the IO-APIC physical IDs based on the
- * values stored in the MPC table.
- *
- * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
- */
-
-static void __init setup_ioapic_ids_from_mpc(void)
-{
-       union IO_APIC_reg_00 reg_00;
-       physid_mask_t phys_id_present_map;
-       int apic_id;
-       int i;
-       unsigned char old_id;
-       unsigned long flags;
-
-       if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
-               return;
-
-       /*
-        * Don't check I/O APIC IDs for xAPIC systems.  They have
-        * no meaning without the serial APIC bus.
-        */
-       if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-               || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-               return;
-       /*
-        * This is broken; anything with a real cpu count has to
-        * circumvent this idiocy regardless.
-        */
-       phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
-
-       /*
-        * Set the IOAPIC ID to the value stored in the MPC table.
-        */
-       for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
-
-               /* Read the register 0 value */
-               spin_lock_irqsave(&ioapic_lock, flags);
-               reg_00.raw = io_apic_read(apic_id, 0);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
-
-               old_id = mp_ioapics[apic_id].apicid;
-
-               if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) {
-                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
-                               apic_id, mp_ioapics[apic_id].apicid);
-                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
-                               reg_00.bits.ID);
-                       mp_ioapics[apic_id].apicid = reg_00.bits.ID;
-               }
-
-               /*
-                * Sanity check, is the ID really free? Every APIC in a
-                * system must have a unique ID or we get lots of nice
-                * 'stuck on smp_invalidate_needed IPI wait' messages.
-                */
-               if (apic->check_apicid_used(phys_id_present_map,
-                                       mp_ioapics[apic_id].apicid)) {
-                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
-                               apic_id, mp_ioapics[apic_id].apicid);
-                       for (i = 0; i < get_physical_broadcast(); i++)
-                               if (!physid_isset(i, phys_id_present_map))
-                                       break;
-                       if (i >= get_physical_broadcast())
-                               panic("Max APIC ID exceeded!\n");
-                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
-                               i);
-                       physid_set(i, phys_id_present_map);
-                       mp_ioapics[apic_id].apicid = i;
-               } else {
-                       physid_mask_t tmp;
-                       tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
-                       apic_printk(APIC_VERBOSE, "Setting %d in the "
-                                       "phys_id_present_map\n",
-                                       mp_ioapics[apic_id].apicid);
-                       physids_or(phys_id_present_map, phys_id_present_map, tmp);
-               }
-
-
-               /*
-                * We need to adjust the IRQ routing table
-                * if the ID changed.
-                */
-               if (old_id != mp_ioapics[apic_id].apicid)
-                       for (i = 0; i < mp_irq_entries; i++)
-                               if (mp_irqs[i].dstapic == old_id)
-                                       mp_irqs[i].dstapic
-                                               = mp_ioapics[apic_id].apicid;
-
-               /*
-                * Read the right value from the MPC table and
-                * write it into the ID register.
-                */
-               apic_printk(APIC_VERBOSE, KERN_INFO
-                       "...changing IO-APIC physical APIC ID to %d ...",
-                       mp_ioapics[apic_id].apicid);
-
-               reg_00.bits.ID = mp_ioapics[apic_id].apicid;
-               spin_lock_irqsave(&ioapic_lock, flags);
-               io_apic_write(apic_id, 0, reg_00.raw);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
-
-               /*
-                * Sanity check
-                */
-               spin_lock_irqsave(&ioapic_lock, flags);
-               reg_00.raw = io_apic_read(apic_id, 0);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
-               if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
-                       printk("could not set ID!\n");
-               else
-                       apic_printk(APIC_VERBOSE, " ok.\n");
-       }
-}
-#endif
-
-int no_timer_check __initdata;
-
-static int __init notimercheck(char *s)
-{
-       no_timer_check = 1;
-       return 1;
-}
-__setup("no_timer_check", notimercheck);
-
-/*
- * There is a nasty bug in some older SMP boards, their mptable lies
- * about the timer IRQ. We do the following to work around the situation:
- *
- *     - timer IRQ defaults to IO-APIC IRQ
- *     - if this function detects that timer IRQs are defunct, then we fall
- *       back to ISA timer IRQs
- */
-static int __init timer_irq_works(void)
-{
-       unsigned long t1 = jiffies;
-       unsigned long flags;
-
-       if (no_timer_check)
-               return 1;
-
-       local_save_flags(flags);
-       local_irq_enable();
-       /* Let ten ticks pass... */
-       mdelay((10 * 1000) / HZ);
-       local_irq_restore(flags);
-
-       /*
-        * Expect a few ticks at least, to be sure some possible
-        * glue logic does not lock up after one or two first
-        * ticks in a non-ExtINT mode.  Also the local APIC
-        * might have cached one ExtINT interrupt.  Finally, at
-        * least one tick may be lost due to delays.
-        */
-
-       /* jiffies wrap? */
-       if (time_after(jiffies, t1 + 4))
-               return 1;
-       return 0;
-}
-
-/*
- * In the SMP+IOAPIC case it might happen that there are an unspecified
- * number of pending IRQ events unhandled. These cases are very rare,
- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
- * better to do it this way as thus we do not have to be aware of
- * 'pending' interrupts in the IRQ path, except at this point.
- */
-/*
- * Edge triggered needs to resend any interrupt
- * that was delayed but this is now handled in the device
- * independent code.
- */
-
-/*
- * Starting up a edge-triggered IO-APIC interrupt is
- * nasty - we need to make sure that we get the edge.
- * If it is already asserted for some reason, we need
- * return 1 to indicate that is was pending.
- *
- * This is not complete - we should be able to fake
- * an edge even if it isn't on the 8259A...
- */
-
-static unsigned int startup_ioapic_irq(unsigned int irq)
-{
-       int was_pending = 0;
-       unsigned long flags;
-       struct irq_cfg *cfg;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       if (irq < NR_IRQS_LEGACY) {
-               disable_8259A_irq(irq);
-               if (i8259A_irq_pending(irq))
-                       was_pending = 1;
-       }
-       cfg = irq_cfg(irq);
-       __unmask_IO_APIC_irq(cfg);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-
-       return was_pending;
-}
-
-#ifdef CONFIG_X86_64
-static int ioapic_retrigger_irq(unsigned int irq)
-{
-
-       struct irq_cfg *cfg = irq_cfg(irq);
-       unsigned long flags;
-
-       spin_lock_irqsave(&vector_lock, flags);
-       apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
-       spin_unlock_irqrestore(&vector_lock, flags);
-
-       return 1;
-}
-#else
-static int ioapic_retrigger_irq(unsigned int irq)
-{
-       apic->send_IPI_self(irq_cfg(irq)->vector);
-
-       return 1;
-}
-#endif
-
-/*
- * Level and edge triggered IO-APIC interrupts need different handling,
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
- * handled with the level-triggered descriptor, but that one has slightly
- * more overhead. Level-triggered interrupts cannot be handled with the
- * edge-triggered handler, without risking IRQ storms and other ugly
- * races.
- */
-
-#ifdef CONFIG_SMP
-
-#ifdef CONFIG_INTR_REMAP
-static void ir_irq_migration(struct work_struct *work);
-
-static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
-
-/*
- * Migrate the IO-APIC irq in the presence of intr-remapping.
- *
- * For edge triggered, irq migration is a simple atomic update(of vector
- * and cpu destination) of IRTE and flush the hardware cache.
- *
- * For level triggered, we need to modify the io-apic RTE aswell with the update
- * vector information, along with modifying IRTE with vector and destination.
- * So irq migration for level triggered is little  bit more complex compared to
- * edge triggered migration. But the good news is, we use the same algorithm
- * for level triggered migration as we have today, only difference being,
- * we now initiate the irq migration from process context instead of the
- * interrupt context.
- *
- * In future, when we do a directed EOI (combined with cpu EOI broadcast
- * suppression) to the IO-APIC, level triggered irq migration will also be
- * as simple as edge triggered migration and we can do the irq migration
- * with a simple atomic update to IO-APIC RTE.
- */
-static void
-migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-       struct irq_cfg *cfg;
-       struct irte irte;
-       int modify_ioapic_rte;
-       unsigned int dest;
-       unsigned long flags;
-       unsigned int irq;
-
-       if (!cpumask_intersects(mask, cpu_online_mask))
-               return;
-
-       irq = desc->irq;
-       if (get_irte(irq, &irte))
-               return;
-
-       cfg = desc->chip_data;
-       if (assign_irq_vector(irq, cfg, mask))
-               return;
-
-       set_extra_move_desc(desc, mask);
-
-       dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
-
-       modify_ioapic_rte = desc->status & IRQ_LEVEL;
-       if (modify_ioapic_rte) {
-               spin_lock_irqsave(&ioapic_lock, flags);
-               __target_IO_APIC_irq(irq, dest, cfg);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
-       }
-
-       irte.vector = cfg->vector;
-       irte.dest_id = IRTE_DEST(dest);
-
-       /*
-        * Modified the IRTE and flushes the Interrupt entry cache.
-        */
-       modify_irte(irq, &irte);
-
-       if (cfg->move_in_progress)
-               send_cleanup_vector(cfg);
-
-       cpumask_copy(desc->affinity, mask);
-}
-
-static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
-{
-       int ret = -1;
-       struct irq_cfg *cfg = desc->chip_data;
-
-       mask_IO_APIC_irq_desc(desc);
-
-       if (io_apic_level_ack_pending(cfg)) {
-               /*
-                * Interrupt in progress. Migrating irq now will change the
-                * vector information in the IO-APIC RTE and that will confuse
-                * the EOI broadcast performed by cpu.
-                * So, delay the irq migration to the next instance.
-                */
-               schedule_delayed_work(&ir_migration_work, 1);
-               goto unmask;
-       }
-
-       /* everthing is clear. we have right of way */
-       migrate_ioapic_irq_desc(desc, desc->pending_mask);
-
-       ret = 0;
-       desc->status &= ~IRQ_MOVE_PENDING;
-       cpumask_clear(desc->pending_mask);
-
-unmask:
-       unmask_IO_APIC_irq_desc(desc);
-
-       return ret;
-}
-
-static void ir_irq_migration(struct work_struct *work)
-{
-       unsigned int irq;
-       struct irq_desc *desc;
-
-       for_each_irq_desc(irq, desc) {
-               if (desc->status & IRQ_MOVE_PENDING) {
-                       unsigned long flags;
-
-                       spin_lock_irqsave(&desc->lock, flags);
-                       if (!desc->chip->set_affinity ||
-                           !(desc->status & IRQ_MOVE_PENDING)) {
-                               desc->status &= ~IRQ_MOVE_PENDING;
-                               spin_unlock_irqrestore(&desc->lock, flags);
-                               continue;
-                       }
-
-                       desc->chip->set_affinity(irq, desc->pending_mask);
-                       spin_unlock_irqrestore(&desc->lock, flags);
-               }
-       }
-}
-
-/*
- * Migrates the IRQ destination in the process context.
- */
-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
-                                           const struct cpumask *mask)
-{
-       if (desc->status & IRQ_LEVEL) {
-               desc->status |= IRQ_MOVE_PENDING;
-               cpumask_copy(desc->pending_mask, mask);
-               migrate_irq_remapped_level_desc(desc);
-               return;
-       }
-
-       migrate_ioapic_irq_desc(desc, mask);
-}
-static void set_ir_ioapic_affinity_irq(unsigned int irq,
-                                      const struct cpumask *mask)
-{
-       struct irq_desc *desc = irq_to_desc(irq);
-
-       set_ir_ioapic_affinity_irq_desc(desc, mask);
-}
-#endif
-
-asmlinkage void smp_irq_move_cleanup_interrupt(void)
-{
-       unsigned vector, me;
-
-       ack_APIC_irq();
-       exit_idle();
-       irq_enter();
-
-       me = smp_processor_id();
-       for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-               unsigned int irq;
-               struct irq_desc *desc;
-               struct irq_cfg *cfg;
-               irq = __get_cpu_var(vector_irq)[vector];
-
-               if (irq == -1)
-                       continue;
-
-               desc = irq_to_desc(irq);
-               if (!desc)
-                       continue;
-
-               cfg = irq_cfg(irq);
-               spin_lock(&desc->lock);
-               if (!cfg->move_cleanup_count)
-                       goto unlock;
-
-               if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
-                       goto unlock;
-
-               __get_cpu_var(vector_irq)[vector] = -1;
-               cfg->move_cleanup_count--;
-unlock:
-               spin_unlock(&desc->lock);
-       }
-
-       irq_exit();
-}
-
-static void irq_complete_move(struct irq_desc **descp)
-{
-       struct irq_desc *desc = *descp;
-       struct irq_cfg *cfg = desc->chip_data;
-       unsigned vector, me;
-
-       if (likely(!cfg->move_in_progress)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-               if (likely(!cfg->move_desc_pending))
-                       return;
-
-               /* domain has not changed, but affinity did */
-               me = smp_processor_id();
-               if (cpumask_test_cpu(me, desc->affinity)) {
-                       *descp = desc = move_irq_desc(desc, me);
-                       /* get the new one */
-                       cfg = desc->chip_data;
-                       cfg->move_desc_pending = 0;
-               }
-#endif
-               return;
-       }
-
-       vector = ~get_irq_regs()->orig_ax;
-       me = smp_processor_id();
-
-       if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-               *descp = desc = move_irq_desc(desc, me);
-               /* get the new one */
-               cfg = desc->chip_data;
-#endif
-               send_cleanup_vector(cfg);
-       }
-}
-#else
-static inline void irq_complete_move(struct irq_desc **descp) {}
-#endif
-
-#ifdef CONFIG_INTR_REMAP
-static void ack_x2apic_level(unsigned int irq)
-{
-       ack_x2APIC_irq();
-}
-
-static void ack_x2apic_edge(unsigned int irq)
-{
-       ack_x2APIC_irq();
-}
-
-#endif
-
-static void ack_apic_edge(unsigned int irq)
-{
-       struct irq_desc *desc = irq_to_desc(irq);
-
-       irq_complete_move(&desc);
-       move_native_irq(irq);
-       ack_APIC_irq();
-}
-
-atomic_t irq_mis_count;
-
-static void ack_apic_level(unsigned int irq)
-{
-       struct irq_desc *desc = irq_to_desc(irq);
-
-#ifdef CONFIG_X86_32
-       unsigned long v;
-       int i;
-#endif
-       struct irq_cfg *cfg;
-       int do_unmask_irq = 0;
-
-       irq_complete_move(&desc);
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-       /* If we are moving the irq we need to mask it */
-       if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
-               do_unmask_irq = 1;
-               mask_IO_APIC_irq_desc(desc);
-       }
-#endif
-
-#ifdef CONFIG_X86_32
-       /*
-       * It appears there is an erratum which affects at least version 0x11
-       * of I/O APIC (that's the 82093AA and cores integrated into various
-       * chipsets).  Under certain conditions a level-triggered interrupt is
-       * erroneously delivered as edge-triggered one but the respective IRR
-       * bit gets set nevertheless.  As a result the I/O unit expects an EOI
-       * message but it will never arrive and further interrupts are blocked
-       * from the source.  The exact reason is so far unknown, but the
-       * phenomenon was observed when two consecutive interrupt requests
-       * from a given source get delivered to the same CPU and the source is
-       * temporarily disabled in between.
-       *
-       * A workaround is to simulate an EOI message manually.  We achieve it
-       * by setting the trigger mode to edge and then to level when the edge
-       * trigger mode gets detected in the TMR of a local APIC for a
-       * level-triggered interrupt.  We mask the source for the time of the
-       * operation to prevent an edge-triggered interrupt escaping meanwhile.
-       * The idea is from Manfred Spraul.  --macro
-       */
-       cfg = desc->chip_data;
-       i = cfg->vector;
-
-       v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-#endif
-
-       /*
-        * We must acknowledge the irq before we move it or the acknowledge will
-        * not propagate properly.
-        */
-       ack_APIC_irq();
-
-       /* Now we can move and renable the irq */
-       if (unlikely(do_unmask_irq)) {
-               /* Only migrate the irq if the ack has been received.
-                *
-                * On rare occasions the broadcast level triggered ack gets
-                * delayed going to ioapics, and if we reprogram the
-                * vector while Remote IRR is still set the irq will never
-                * fire again.
-                *
-                * To prevent this scenario we read the Remote IRR bit
-                * of the ioapic.  This has two effects.
-                * - On any sane system the read of the ioapic will
-                *   flush writes (and acks) going to the ioapic from
-                *   this cpu.
-                * - We get to see if the ACK has actually been delivered.
-                *
-                * Based on failed experiments of reprogramming the
-                * ioapic entry from outside of irq context starting
-                * with masking the ioapic entry and then polling until
-                * Remote IRR was clear before reprogramming the
-                * ioapic I don't trust the Remote IRR bit to be
-                * completey accurate.
-                *
-                * However there appears to be no other way to plug
-                * this race, so if the Remote IRR bit is not
-                * accurate and is causing problems then it is a hardware bug
-                * and you can go talk to the chipset vendor about it.
-                */
-               cfg = desc->chip_data;
-               if (!io_apic_level_ack_pending(cfg))
-                       move_masked_irq(irq);
-               unmask_IO_APIC_irq_desc(desc);
-       }
-
-#ifdef CONFIG_X86_32
-       if (!(v & (1 << (i & 0x1f)))) {
-               atomic_inc(&irq_mis_count);
-               spin_lock(&ioapic_lock);
-               __mask_and_edge_IO_APIC_irq(cfg);
-               __unmask_and_level_IO_APIC_irq(cfg);
-               spin_unlock(&ioapic_lock);
-       }
-#endif
-}
-
-static struct irq_chip ioapic_chip __read_mostly = {
-       .name           = "IO-APIC",
-       .startup        = startup_ioapic_irq,
-       .mask           = mask_IO_APIC_irq,
-       .unmask         = unmask_IO_APIC_irq,
-       .ack            = ack_apic_edge,
-       .eoi            = ack_apic_level,
-#ifdef CONFIG_SMP
-       .set_affinity   = set_ioapic_affinity_irq,
-#endif
-       .retrigger      = ioapic_retrigger_irq,
-};
-
-#ifdef CONFIG_INTR_REMAP
-static struct irq_chip ir_ioapic_chip __read_mostly = {
-       .name           = "IR-IO-APIC",
-       .startup        = startup_ioapic_irq,
-       .mask           = mask_IO_APIC_irq,
-       .unmask         = unmask_IO_APIC_irq,
-       .ack            = ack_x2apic_edge,
-       .eoi            = ack_x2apic_level,
-#ifdef CONFIG_SMP
-       .set_affinity   = set_ir_ioapic_affinity_irq,
-#endif
-       .retrigger      = ioapic_retrigger_irq,
-};
-#endif
-
-static inline void init_IO_APIC_traps(void)
-{
-       int irq;
-       struct irq_desc *desc;
-       struct irq_cfg *cfg;
-
-       /*
-        * NOTE! The local APIC isn't very good at handling
-        * multiple interrupts at the same interrupt level.
-        * As the interrupt level is determined by taking the
-        * vector number and shifting that right by 4, we
-        * want to spread these out a bit so that they don't
-        * all fall in the same interrupt level.
-        *
-        * Also, we've got to be careful not to trash gate
-        * 0x80, because int 0x80 is hm, kind of importantish. ;)
-        */
-       for_each_irq_desc(irq, desc) {
-               cfg = desc->chip_data;
-               if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
-                       /*
-                        * Hmm.. We don't have an entry for this,
-                        * so default to an old-fashioned 8259
-                        * interrupt if we can..
-                        */
-                       if (irq < NR_IRQS_LEGACY)
-                               make_8259A_irq(irq);
-                       else
-                               /* Strange. Oh, well.. */
-                               desc->chip = &no_irq_chip;
-               }
-       }
-}
-
-/*
- * The local APIC irq-chip implementation:
- */
-
-static void mask_lapic_irq(unsigned int irq)
-{
-       unsigned long v;
-
-       v = apic_read(APIC_LVT0);
-       apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-}
-
-static void unmask_lapic_irq(unsigned int irq)
-{
-       unsigned long v;
-
-       v = apic_read(APIC_LVT0);
-       apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
-}
-
-static void ack_lapic_irq(unsigned int irq)
-{
-       ack_APIC_irq();
-}
-
-static struct irq_chip lapic_chip __read_mostly = {
-       .name           = "local-APIC",
-       .mask           = mask_lapic_irq,
-       .unmask         = unmask_lapic_irq,
-       .ack            = ack_lapic_irq,
-};
-
-static void lapic_register_intr(int irq, struct irq_desc *desc)
-{
-       desc->status &= ~IRQ_LEVEL;
-       set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
-                                     "edge");
-}
-
-static void __init setup_nmi(void)
-{
-       /*
-        * Dirty trick to enable the NMI watchdog ...
-        * We put the 8259A master into AEOI mode and
-        * unmask on all local APICs LVT0 as NMI.
-        *
-        * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
-        * is from Maciej W. Rozycki - so we do not have to EOI from
-        * the NMI handler or the timer interrupt.
-        */
-       apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
-
-       enable_NMI_through_LVT0();
-
-       apic_printk(APIC_VERBOSE, " done.\n");
-}
-
-/*
- * This looks a bit hackish but it's about the only one way of sending
- * a few INTA cycles to 8259As and any associated glue logic.  ICR does
- * not support the ExtINT mode, unfortunately.  We need to send these
- * cycles as some i82489DX-based boards have glue logic that keeps the
- * 8259A interrupt line asserted until INTA.  --macro
- */
-static inline void __init unlock_ExtINT_logic(void)
-{
-       int apic, pin, i;
-       struct IO_APIC_route_entry entry0, entry1;
-       unsigned char save_control, save_freq_select;
-
-       pin  = find_isa_irq_pin(8, mp_INT);
-       if (pin == -1) {
-               WARN_ON_ONCE(1);
-               return;
-       }
-       apic = find_isa_irq_apic(8, mp_INT);
-       if (apic == -1) {
-               WARN_ON_ONCE(1);
-               return;
-       }
-
-       entry0 = ioapic_read_entry(apic, pin);
-       clear_IO_APIC_pin(apic, pin);
-
-       memset(&entry1, 0, sizeof(entry1));
-
-       entry1.dest_mode = 0;                   /* physical delivery */
-       entry1.mask = 0;                        /* unmask IRQ now */
-       entry1.dest = hard_smp_processor_id();
-       entry1.delivery_mode = dest_ExtINT;
-       entry1.polarity = entry0.polarity;
-       entry1.trigger = 0;
-       entry1.vector = 0;
-
-       ioapic_write_entry(apic, pin, entry1);
-
-       save_control = CMOS_READ(RTC_CONTROL);
-       save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
-       CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
-                  RTC_FREQ_SELECT);
-       CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
-
-       i = 100;
-       while (i-- > 0) {
-               mdelay(10);
-               if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
-                       i -= 10;
-       }
-
-       CMOS_WRITE(save_control, RTC_CONTROL);
-       CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-       clear_IO_APIC_pin(apic, pin);
-
-       ioapic_write_entry(apic, pin, entry0);
-}
-
-static int disable_timer_pin_1 __initdata;
-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init disable_timer_pin_setup(char *arg)
-{
-       disable_timer_pin_1 = 1;
-       return 0;
-}
-early_param("disable_timer_pin_1", disable_timer_pin_setup);
-
-int timer_through_8259 __initdata;
-
-/*
- * This code may look a bit paranoid, but it's supposed to cooperate with
- * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
- * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
- * fanatically on his truly buggy board.
- *
- * FIXME: really need to revamp this for all platforms.
- */
-static inline void __init check_timer(void)
-{
-       struct irq_desc *desc = irq_to_desc(0);
-       struct irq_cfg *cfg = desc->chip_data;
-       int cpu = boot_cpu_id;
-       int apic1, pin1, apic2, pin2;
-       unsigned long flags;
-       int no_pin1 = 0;
-
-       local_irq_save(flags);
-
-       /*
-        * get/set the timer IRQ vector:
-        */
-       disable_8259A_irq(0);
-       assign_irq_vector(0, cfg, apic->target_cpus());
-
-       /*
-        * As IRQ0 is to be enabled in the 8259A, the virtual
-        * wire has to be disabled in the local APIC.  Also
-        * timer interrupts need to be acknowledged manually in
-        * the 8259A for the i82489DX when using the NMI
-        * watchdog as that APIC treats NMIs as level-triggered.
-        * The AEOI mode will finish them in the 8259A
-        * automatically.
-        */
-       apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
-       init_8259A(1);
-#ifdef CONFIG_X86_32
-       {
-               unsigned int ver;
-
-               ver = apic_read(APIC_LVR);
-               ver = GET_APIC_VERSION(ver);
-               timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
-       }
-#endif
-
-       pin1  = find_isa_irq_pin(0, mp_INT);
-       apic1 = find_isa_irq_apic(0, mp_INT);
-       pin2  = ioapic_i8259.pin;
-       apic2 = ioapic_i8259.apic;
-
-       apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
-                   "apic1=%d pin1=%d apic2=%d pin2=%d\n",
-                   cfg->vector, apic1, pin1, apic2, pin2);
-
-       /*
-        * Some BIOS writers are clueless and report the ExtINTA
-        * I/O APIC input from the cascaded 8259A as the timer
-        * interrupt input.  So just in case, if only one pin
-        * was found above, try it both directly and through the
-        * 8259A.
-        */
-       if (pin1 == -1) {
-#ifdef CONFIG_INTR_REMAP
-               if (intr_remapping_enabled)
-                       panic("BIOS bug: timer not connected to IO-APIC");
-#endif
-               pin1 = pin2;
-               apic1 = apic2;
-               no_pin1 = 1;
-       } else if (pin2 == -1) {
-               pin2 = pin1;
-               apic2 = apic1;
-       }
-
-       if (pin1 != -1) {
-               /*
-                * Ok, does IRQ0 through the IOAPIC work?
-                */
-               if (no_pin1) {
-                       add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
-                       setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
-               } else {
-                       /* for edge trigger, setup_IO_APIC_irq already
-                        * leave it unmasked.
-                        * so only need to unmask if it is level-trigger
-                        * do we really have level trigger timer?
-                        */
-                       int idx;
-                       idx = find_irq_entry(apic1, pin1, mp_INT);
-                       if (idx != -1 && irq_trigger(idx))
-                               unmask_IO_APIC_irq_desc(desc);
-               }
-               if (timer_irq_works()) {
-                       if (nmi_watchdog == NMI_IO_APIC) {
-                               setup_nmi();
-                               enable_8259A_irq(0);
-                       }
-                       if (disable_timer_pin_1 > 0)
-                               clear_IO_APIC_pin(0, pin1);
-                       goto out;
-               }
-#ifdef CONFIG_INTR_REMAP
-               if (intr_remapping_enabled)
-                       panic("timer doesn't work through Interrupt-remapped IO-APIC");
-#endif
-               local_irq_disable();
-               clear_IO_APIC_pin(apic1, pin1);
-               if (!no_pin1)
-                       apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
-                                   "8254 timer not connected to IO-APIC\n");
-
-               apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
-                           "(IRQ0) through the 8259A ...\n");
-               apic_printk(APIC_QUIET, KERN_INFO
-                           "..... (found apic %d pin %d) ...\n", apic2, pin2);
-               /*
-                * legacy devices should be connected to IO APIC #0
-                */
-               replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
-               setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
-               enable_8259A_irq(0);
-               if (timer_irq_works()) {
-                       apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
-                       timer_through_8259 = 1;
-                       if (nmi_watchdog == NMI_IO_APIC) {
-                               disable_8259A_irq(0);
-                               setup_nmi();
-                               enable_8259A_irq(0);
-                       }
-                       goto out;
-               }
-               /*
-                * Cleanup, just in case ...
-                */
-               local_irq_disable();
-               disable_8259A_irq(0);
-               clear_IO_APIC_pin(apic2, pin2);
-               apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
-       }
-
-       if (nmi_watchdog == NMI_IO_APIC) {
-               apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
-                           "through the IO-APIC - disabling NMI Watchdog!\n");
-               nmi_watchdog = NMI_NONE;
-       }
-#ifdef CONFIG_X86_32
-       timer_ack = 0;
-#endif
-
-       apic_printk(APIC_QUIET, KERN_INFO
-                   "...trying to set up timer as Virtual Wire IRQ...\n");
-
-       lapic_register_intr(0, desc);
-       apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);     /* Fixed mode */
-       enable_8259A_irq(0);
-
-       if (timer_irq_works()) {
-               apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
-               goto out;
-       }
-       local_irq_disable();
-       disable_8259A_irq(0);
-       apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
-       apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
-
-       apic_printk(APIC_QUIET, KERN_INFO
-                   "...trying to set up timer as ExtINT IRQ...\n");
-
-       init_8259A(0);
-       make_8259A_irq(0);
-       apic_write(APIC_LVT0, APIC_DM_EXTINT);
-
-       unlock_ExtINT_logic();
-
-       if (timer_irq_works()) {
-               apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
-               goto out;
-       }
-       local_irq_disable();
-       apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
-       panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
-               "report.  Then try booting with the 'noapic' option.\n");
-out:
-       local_irq_restore(flags);
-}
-
-/*
- * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
- * to devices.  However there may be an I/O APIC pin available for
- * this interrupt regardless.  The pin may be left unconnected, but
- * typically it will be reused as an ExtINT cascade interrupt for
- * the master 8259A.  In the MPS case such a pin will normally be
- * reported as an ExtINT interrupt in the MP table.  With ACPI
- * there is no provision for ExtINT interrupts, and in the absence
- * of an override it would be treated as an ordinary ISA I/O APIC
- * interrupt, that is edge-triggered and unmasked by default.  We
- * used to do this, but it caused problems on some systems because
- * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
- * the same ExtINT cascade interrupt to drive the local APIC of the
- * bootstrap processor.  Therefore we refrain from routing IRQ2 to
- * the I/O APIC in all cases now.  No actual device should request
- * it anyway.  --macro
- */
-#define PIC_IRQS       (1 << PIC_CASCADE_IR)
-
-void __init setup_IO_APIC(void)
-{
-
-       /*
-        * calling enable_IO_APIC() is moved to setup_local_APIC for BP
-        */
-
-       io_apic_irqs = ~PIC_IRQS;
-
-       apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
-       /*
-         * Set up IO-APIC IRQ routing.
-         */
-#ifdef CONFIG_X86_32
-       if (!acpi_ioapic)
-               setup_ioapic_ids_from_mpc();
-#endif
-       sync_Arb_IDs();
-       setup_IO_APIC_irqs();
-       init_IO_APIC_traps();
-       check_timer();
-}
-
-/*
- *      Called after all the initialization is done. If we didnt find any
- *      APIC bugs then we can allow the modify fast path
- */
-
-static int __init io_apic_bug_finalize(void)
-{
-       if (sis_apic_bug == -1)
-               sis_apic_bug = 0;
-       return 0;
-}
-
-late_initcall(io_apic_bug_finalize);
-
-struct sysfs_ioapic_data {
-       struct sys_device dev;
-       struct IO_APIC_route_entry entry[0];
-};
-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
-
-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
-{
-       struct IO_APIC_route_entry *entry;
-       struct sysfs_ioapic_data *data;
-       int i;
-
-       data = container_of(dev, struct sysfs_ioapic_data, dev);
-       entry = data->entry;
-       for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
-               *entry = ioapic_read_entry(dev->id, i);
-
-       return 0;
-}
-
-static int ioapic_resume(struct sys_device *dev)
-{
-       struct IO_APIC_route_entry *entry;
-       struct sysfs_ioapic_data *data;
-       unsigned long flags;
-       union IO_APIC_reg_00 reg_00;
-       int i;
-
-       data = container_of(dev, struct sysfs_ioapic_data, dev);
-       entry = data->entry;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       reg_00.raw = io_apic_read(dev->id, 0);
-       if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
-               reg_00.bits.ID = mp_ioapics[dev->id].apicid;
-               io_apic_write(dev->id, 0, reg_00.raw);
-       }
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-       for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
-               ioapic_write_entry(dev->id, i, entry[i]);
-
-       return 0;
-}
-
-static struct sysdev_class ioapic_sysdev_class = {
-       .name = "ioapic",
-       .suspend = ioapic_suspend,
-       .resume = ioapic_resume,
-};
-
-static int __init ioapic_init_sysfs(void)
-{
-       struct sys_device * dev;
-       int i, size, error;
-
-       error = sysdev_class_register(&ioapic_sysdev_class);
-       if (error)
-               return error;
-
-       for (i = 0; i < nr_ioapics; i++ ) {
-               size = sizeof(struct sys_device) + nr_ioapic_registers[i]
-                       * sizeof(struct IO_APIC_route_entry);
-               mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
-               if (!mp_ioapic_data[i]) {
-                       printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-                       continue;
-               }
-               dev = &mp_ioapic_data[i]->dev;
-               dev->id = i;
-               dev->cls = &ioapic_sysdev_class;
-               error = sysdev_register(dev);
-               if (error) {
-                       kfree(mp_ioapic_data[i]);
-                       mp_ioapic_data[i] = NULL;
-                       printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-                       continue;
-               }
-       }
-
-       return 0;
-}
-
-device_initcall(ioapic_init_sysfs);
-
-static int nr_irqs_gsi = NR_IRQS_LEGACY;
-/*
- * Dynamic irq allocate and deallocation
- */
-unsigned int create_irq_nr(unsigned int irq_want)
-{
-       /* Allocate an unused irq */
-       unsigned int irq;
-       unsigned int new;
-       unsigned long flags;
-       struct irq_cfg *cfg_new = NULL;
-       int cpu = boot_cpu_id;
-       struct irq_desc *desc_new = NULL;
-
-       irq = 0;
-       if (irq_want < nr_irqs_gsi)
-               irq_want = nr_irqs_gsi;
-
-       spin_lock_irqsave(&vector_lock, flags);
-       for (new = irq_want; new < nr_irqs; new++) {
-               desc_new = irq_to_desc_alloc_cpu(new, cpu);
-               if (!desc_new) {
-                       printk(KERN_INFO "can not get irq_desc for %d\n", new);
-                       continue;
-               }
-               cfg_new = desc_new->chip_data;
-
-               if (cfg_new->vector != 0)
-                       continue;
-               if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
-                       irq = new;
-               break;
-       }
-       spin_unlock_irqrestore(&vector_lock, flags);
-
-       if (irq > 0) {
-               dynamic_irq_init(irq);
-               /* restore it, in case dynamic_irq_init clear it */
-               if (desc_new)
-                       desc_new->chip_data = cfg_new;
-       }
-       return irq;
-}
-
-int create_irq(void)
-{
-       unsigned int irq_want;
-       int irq;
-
-       irq_want = nr_irqs_gsi;
-       irq = create_irq_nr(irq_want);
-
-       if (irq == 0)
-               irq = -1;
-
-       return irq;
-}
-
-void destroy_irq(unsigned int irq)
-{
-       unsigned long flags;
-       struct irq_cfg *cfg;
-       struct irq_desc *desc;
-
-       /* store it, in case dynamic_irq_cleanup clear it */
-       desc = irq_to_desc(irq);
-       cfg = desc->chip_data;
-       dynamic_irq_cleanup(irq);
-       /* connect back irq_cfg */
-       if (desc)
-               desc->chip_data = cfg;
-
-#ifdef CONFIG_INTR_REMAP
-       free_irte(irq);
-#endif
-       spin_lock_irqsave(&vector_lock, flags);
-       __clear_irq_vector(irq, cfg);
-       spin_unlock_irqrestore(&vector_lock, flags);
-}
-
-/*
- * MSI message composition
- */
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
-{
-       struct irq_cfg *cfg;
-       int err;
-       unsigned dest;
-
-       if (disable_apic)
-               return -ENXIO;
-
-       cfg = irq_cfg(irq);
-       err = assign_irq_vector(irq, cfg, apic->target_cpus());
-       if (err)
-               return err;
-
-       dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
-
-#ifdef CONFIG_INTR_REMAP
-       if (irq_remapped(irq)) {
-               struct irte irte;
-               int ir_index;
-               u16 sub_handle;
-
-               ir_index = map_irq_to_irte_handle(irq, &sub_handle);
-               BUG_ON(ir_index == -1);
-
-               memset (&irte, 0, sizeof(irte));
-
-               irte.present = 1;
-               irte.dst_mode = apic->irq_dest_mode;
-               irte.trigger_mode = 0; /* edge */
-               irte.dlvry_mode = apic->irq_delivery_mode;
-               irte.vector = cfg->vector;
-               irte.dest_id = IRTE_DEST(dest);
-
-               modify_irte(irq, &irte);
-
-               msg->address_hi = MSI_ADDR_BASE_HI;
-               msg->data = sub_handle;
-               msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
-                                 MSI_ADDR_IR_SHV |
-                                 MSI_ADDR_IR_INDEX1(ir_index) |
-                                 MSI_ADDR_IR_INDEX2(ir_index);
-       } else
-#endif
-       {
-               msg->address_hi = MSI_ADDR_BASE_HI;
-               msg->address_lo =
-                       MSI_ADDR_BASE_LO |
-                       ((apic->irq_dest_mode == 0) ?
-                               MSI_ADDR_DEST_MODE_PHYSICAL:
-                               MSI_ADDR_DEST_MODE_LOGICAL) |
-                       ((apic->irq_delivery_mode != dest_LowestPrio) ?
-                               MSI_ADDR_REDIRECTION_CPU:
-                               MSI_ADDR_REDIRECTION_LOWPRI) |
-                       MSI_ADDR_DEST_ID(dest);
-
-               msg->data =
-                       MSI_DATA_TRIGGER_EDGE |
-                       MSI_DATA_LEVEL_ASSERT |
-                       ((apic->irq_delivery_mode != dest_LowestPrio) ?
-                               MSI_DATA_DELIVERY_FIXED:
-                               MSI_DATA_DELIVERY_LOWPRI) |
-                       MSI_DATA_VECTOR(cfg->vector);
-       }
-       return err;
-}
-
-#ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
-{
-       struct irq_desc *desc = irq_to_desc(irq);
-       struct irq_cfg *cfg;
-       struct msi_msg msg;
-       unsigned int dest;
-
-       dest = set_desc_affinity(desc, mask);
-       if (dest == BAD_APICID)
-               return;
-
-       cfg = desc->chip_data;
-
-       read_msi_msg_desc(desc, &msg);
-
-       msg.data &= ~MSI_DATA_VECTOR_MASK;
-       msg.data |= MSI_DATA_VECTOR(cfg->vector);
-       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-       msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-       write_msi_msg_desc(desc, &msg);
-}
-#ifdef CONFIG_INTR_REMAP
-/*
- * Migrate the MSI irq to another cpumask. This migration is
- * done in the process context using interrupt-remapping hardware.
- */
-static void
-ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
-{
-       struct irq_desc *desc = irq_to_desc(irq);
-       struct irq_cfg *cfg = desc->chip_data;
-       unsigned int dest;
-       struct irte irte;
-
-       if (get_irte(irq, &irte))
-               return;
-
-       dest = set_desc_affinity(desc, mask);
-       if (dest == BAD_APICID)
-               return;
-
-       irte.vector = cfg->vector;
-       irte.dest_id = IRTE_DEST(dest);
-
-       /*
-        * atomically update the IRTE with the new destination and vector.
-        */
-       modify_irte(irq, &irte);
-
-       /*
-        * After this point, all the interrupts will start arriving
-        * at the new destination. So, time to cleanup the previous
-        * vector allocation.
-        */
-       if (cfg->move_in_progress)
-               send_cleanup_vector(cfg);
-}
-
-#endif
-#endif /* CONFIG_SMP */
-
-/*
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
- * which implement the MSI or MSI-X Capability Structure.
- */
-static struct irq_chip msi_chip = {
-       .name           = "PCI-MSI",
-       .unmask         = unmask_msi_irq,
-       .mask           = mask_msi_irq,
-       .ack            = ack_apic_edge,
-#ifdef CONFIG_SMP
-       .set_affinity   = set_msi_irq_affinity,
-#endif
-       .retrigger      = ioapic_retrigger_irq,
-};
-
-#ifdef CONFIG_INTR_REMAP
-static struct irq_chip msi_ir_chip = {
-       .name           = "IR-PCI-MSI",
-       .unmask         = unmask_msi_irq,
-       .mask           = mask_msi_irq,
-       .ack            = ack_x2apic_edge,
-#ifdef CONFIG_SMP
-       .set_affinity   = ir_set_msi_irq_affinity,
-#endif
-       .retrigger      = ioapic_retrigger_irq,
-};
-
-/*
- * Map the PCI dev to the corresponding remapping hardware unit
- * and allocate 'nvec' consecutive interrupt-remapping table entries
- * in it.
- */
-static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
-{
-       struct intel_iommu *iommu;
-       int index;
-
-       iommu = map_dev_to_ir(dev);
-       if (!iommu) {
-               printk(KERN_ERR
-                      "Unable to map PCI %s to iommu\n", pci_name(dev));
-               return -ENOENT;
-       }
-
-       index = alloc_irte(iommu, irq, nvec);
-       if (index < 0) {
-               printk(KERN_ERR
-                      "Unable to allocate %d IRTE for PCI %s\n", nvec,
-                      pci_name(dev));
-               return -ENOSPC;
-       }
-       return index;
-}
-#endif
-
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
-{
-       int ret;
-       struct msi_msg msg;
-
-       ret = msi_compose_msg(dev, irq, &msg);
-       if (ret < 0)
-               return ret;
-
-       set_irq_msi(irq, msidesc);
-       write_msi_msg(irq, &msg);
-
-#ifdef CONFIG_INTR_REMAP
-       if (irq_remapped(irq)) {
-               struct irq_desc *desc = irq_to_desc(irq);
-               /*
-                * irq migration in process context
-                */
-               desc->status |= IRQ_MOVE_PCNTXT;
-               set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
-       } else
-#endif
-               set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
-
-       dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
-
-       return 0;
-}
-
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-       unsigned int irq;
-       int ret, sub_handle;
-       struct msi_desc *msidesc;
-       unsigned int irq_want;
-
-#ifdef CONFIG_INTR_REMAP
-       struct intel_iommu *iommu = 0;
-       int index = 0;
-#endif
-
-       irq_want = nr_irqs_gsi;
-       sub_handle = 0;
-       list_for_each_entry(msidesc, &dev->msi_list, list) {
-               irq = create_irq_nr(irq_want);
-               if (irq == 0)
-                       return -1;
-               irq_want = irq + 1;
-#ifdef CONFIG_INTR_REMAP
-               if (!intr_remapping_enabled)
-                       goto no_ir;
-
-               if (!sub_handle) {
-                       /*
-                        * allocate the consecutive block of IRTE's
-                        * for 'nvec'
-                        */
-                       index = msi_alloc_irte(dev, irq, nvec);
-                       if (index < 0) {
-                               ret = index;
-                               goto error;
-                       }
-               } else {
-                       iommu = map_dev_to_ir(dev);
-                       if (!iommu) {
-                               ret = -ENOENT;
-                               goto error;
-                       }
-                       /*
-                        * setup the mapping between the irq and the IRTE
-                        * base index, the sub_handle pointing to the
-                        * appropriate interrupt remap table entry.
-                        */
-                       set_irte_irq(irq, iommu, index, sub_handle);
-               }
-no_ir:
-#endif
-               ret = setup_msi_irq(dev, msidesc, irq);
-               if (ret < 0)
-                       goto error;
-               sub_handle++;
-       }
-       return 0;
-
-error:
-       destroy_irq(irq);
-       return ret;
-}
-
-void arch_teardown_msi_irq(unsigned int irq)
-{
-       destroy_irq(irq);
-}
-
-#ifdef CONFIG_DMAR
-#ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
-{
-       struct irq_desc *desc = irq_to_desc(irq);
-       struct irq_cfg *cfg;
-       struct msi_msg msg;
-       unsigned int dest;
-
-       dest = set_desc_affinity(desc, mask);
-       if (dest == BAD_APICID)
-               return;
-
-       cfg = desc->chip_data;
-
-       dmar_msi_read(irq, &msg);
-
-       msg.data &= ~MSI_DATA_VECTOR_MASK;
-       msg.data |= MSI_DATA_VECTOR(cfg->vector);
-       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-       msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-       dmar_msi_write(irq, &msg);
-}
-
-#endif /* CONFIG_SMP */
-
-struct irq_chip dmar_msi_type = {
-       .name = "DMAR_MSI",
-       .unmask = dmar_msi_unmask,
-       .mask = dmar_msi_mask,
-       .ack = ack_apic_edge,
-#ifdef CONFIG_SMP
-       .set_affinity = dmar_msi_set_affinity,
-#endif
-       .retrigger = ioapic_retrigger_irq,
-};
-
-int arch_setup_dmar_msi(unsigned int irq)
-{
-       int ret;
-       struct msi_msg msg;
-
-       ret = msi_compose_msg(NULL, irq, &msg);
-       if (ret < 0)
-               return ret;
-       dmar_msi_write(irq, &msg);
-       set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
-               "edge");
-       return 0;
-}
-#endif
-
-#ifdef CONFIG_HPET_TIMER
-
-#ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
-{
-       struct irq_desc *desc = irq_to_desc(irq);
-       struct irq_cfg *cfg;
-       struct msi_msg msg;
-       unsigned int dest;
-
-       dest = set_desc_affinity(desc, mask);
-       if (dest == BAD_APICID)
-               return;
-
-       cfg = desc->chip_data;
-
-       hpet_msi_read(irq, &msg);
-
-       msg.data &= ~MSI_DATA_VECTOR_MASK;
-       msg.data |= MSI_DATA_VECTOR(cfg->vector);
-       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-       msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-       hpet_msi_write(irq, &msg);
-}
-
-#endif /* CONFIG_SMP */
-
-struct irq_chip hpet_msi_type = {
-       .name = "HPET_MSI",
-       .unmask = hpet_msi_unmask,
-       .mask = hpet_msi_mask,
-       .ack = ack_apic_edge,
-#ifdef CONFIG_SMP
-       .set_affinity = hpet_msi_set_affinity,
-#endif
-       .retrigger = ioapic_retrigger_irq,
-};
-
-int arch_setup_hpet_msi(unsigned int irq)
-{
-       int ret;
-       struct msi_msg msg;
-
-       ret = msi_compose_msg(NULL, irq, &msg);
-       if (ret < 0)
-               return ret;
-
-       hpet_msi_write(irq, &msg);
-       set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
-               "edge");
-
-       return 0;
-}
-#endif
-
-#endif /* CONFIG_PCI_MSI */
-/*
- * Hypertransport interrupt support
- */
-#ifdef CONFIG_HT_IRQ
-
-#ifdef CONFIG_SMP
-
-static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
-{
-       struct ht_irq_msg msg;
-       fetch_ht_irq_msg(irq, &msg);
-
-       msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
-       msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-
-       msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
-       msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
-
-       write_ht_irq_msg(irq, &msg);
-}
-
-static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
-{
-       struct irq_desc *desc = irq_to_desc(irq);
-       struct irq_cfg *cfg;
-       unsigned int dest;
-
-       dest = set_desc_affinity(desc, mask);
-       if (dest == BAD_APICID)
-               return;
-
-       cfg = desc->chip_data;
-
-       target_ht_irq(irq, dest, cfg->vector);
-}
-
-#endif
-
-static struct irq_chip ht_irq_chip = {
-       .name           = "PCI-HT",
-       .mask           = mask_ht_irq,
-       .unmask         = unmask_ht_irq,
-       .ack            = ack_apic_edge,
-#ifdef CONFIG_SMP
-       .set_affinity   = set_ht_irq_affinity,
-#endif
-       .retrigger      = ioapic_retrigger_irq,
-};
-
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
-{
-       struct irq_cfg *cfg;
-       int err;
-
-       if (disable_apic)
-               return -ENXIO;
-
-       cfg = irq_cfg(irq);
-       err = assign_irq_vector(irq, cfg, apic->target_cpus());
-       if (!err) {
-               struct ht_irq_msg msg;
-               unsigned dest;
-
-               dest = apic->cpu_mask_to_apicid_and(cfg->domain,
-                                                   apic->target_cpus());
-
-               msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
-
-               msg.address_lo =
-                       HT_IRQ_LOW_BASE |
-                       HT_IRQ_LOW_DEST_ID(dest) |
-                       HT_IRQ_LOW_VECTOR(cfg->vector) |
-                       ((apic->irq_dest_mode == 0) ?
-                               HT_IRQ_LOW_DM_PHYSICAL :
-                               HT_IRQ_LOW_DM_LOGICAL) |
-                       HT_IRQ_LOW_RQEOI_EDGE |
-                       ((apic->irq_delivery_mode != dest_LowestPrio) ?
-                               HT_IRQ_LOW_MT_FIXED :
-                               HT_IRQ_LOW_MT_ARBITRATED) |
-                       HT_IRQ_LOW_IRQ_MASKED;
-
-               write_ht_irq_msg(irq, &msg);
-
-               set_irq_chip_and_handler_name(irq, &ht_irq_chip,
-                                             handle_edge_irq, "edge");
-
-               dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
-       }
-       return err;
-}
-#endif /* CONFIG_HT_IRQ */
-
-#ifdef CONFIG_X86_UV
-/*
- * Re-target the irq to the specified CPU and enable the specified MMR located
- * on the specified blade to allow the sending of MSIs to the specified CPU.
- */
-int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
-                      unsigned long mmr_offset)
-{
-       const struct cpumask *eligible_cpu = cpumask_of(cpu);
-       struct irq_cfg *cfg;
-       int mmr_pnode;
-       unsigned long mmr_value;
-       struct uv_IO_APIC_route_entry *entry;
-       unsigned long flags;
-       int err;
-
-       cfg = irq_cfg(irq);
-
-       err = assign_irq_vector(irq, cfg, eligible_cpu);
-       if (err != 0)
-               return err;
-
-       spin_lock_irqsave(&vector_lock, flags);
-       set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
-                                     irq_name);
-       spin_unlock_irqrestore(&vector_lock, flags);
-
-       mmr_value = 0;
-       entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-       entry->vector = cfg->vector;
-       entry->delivery_mode = apic->irq_delivery_mode;
-       entry->dest_mode = apic->irq_dest_mode;
-       entry->polarity = 0;
-       entry->trigger = 0;
-       entry->mask = 0;
-       entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
-
-       mmr_pnode = uv_blade_to_pnode(mmr_blade);
-       uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
-       return irq;
-}
-
-/*
- * Disable the specified MMR located on the specified blade so that MSIs are
- * longer allowed to be sent.
- */
-void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
-{
-       unsigned long mmr_value;
-       struct uv_IO_APIC_route_entry *entry;
-       int mmr_pnode;
-
-       mmr_value = 0;
-       entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-       entry->mask = 1;
-
-       mmr_pnode = uv_blade_to_pnode(mmr_blade);
-       uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-}
-#endif /* CONFIG_X86_64 */
-
-int __init io_apic_get_redir_entries (int ioapic)
-{
-       union IO_APIC_reg_01    reg_01;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       reg_01.raw = io_apic_read(ioapic, 1);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-
-       return reg_01.bits.entries;
-}
-
-void __init probe_nr_irqs_gsi(void)
-{
-       int nr = 0;
-
-       nr = acpi_probe_gsi();
-       if (nr > nr_irqs_gsi) {
-               nr_irqs_gsi = nr;
-       } else {
-               /* for acpi=off or acpi is not compiled in */
-               int idx;
-
-               nr = 0;
-               for (idx = 0; idx < nr_ioapics; idx++)
-                       nr += io_apic_get_redir_entries(idx) + 1;
-
-               if (nr > nr_irqs_gsi)
-                       nr_irqs_gsi = nr;
-       }
-
-       printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
-}
-
-#ifdef CONFIG_SPARSE_IRQ
-int __init arch_probe_nr_irqs(void)
-{
-       int nr;
-
-       if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
-               nr_irqs = NR_VECTORS * nr_cpu_ids;
-
-       nr = nr_irqs_gsi + 8 * nr_cpu_ids;
-#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
-       /*
-        * for MSI and HT dyn irq
-        */
-       nr += nr_irqs_gsi * 16;
-#endif
-       if (nr < nr_irqs)
-               nr_irqs = nr;
-
-       return 0;
-}
-#endif
-
-/* --------------------------------------------------------------------------
-                          ACPI-based IOAPIC Configuration
-   -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI
-
-#ifdef CONFIG_X86_32
-int __init io_apic_get_unique_id(int ioapic, int apic_id)
-{
-       union IO_APIC_reg_00 reg_00;
-       static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
-       physid_mask_t tmp;
-       unsigned long flags;
-       int i = 0;
-
-       /*
-        * The P4 platform supports up to 256 APIC IDs on two separate APIC
-        * buses (one for LAPICs, one for IOAPICs), where predecessors only
-        * supports up to 16 on one shared APIC bus.
-        *
-        * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
-        *      advantage of new APIC bus architecture.
-        */
-
-       if (physids_empty(apic_id_map))
-               apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       reg_00.raw = io_apic_read(ioapic, 0);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-
-       if (apic_id >= get_physical_broadcast()) {
-               printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
-                       "%d\n", ioapic, apic_id, reg_00.bits.ID);
-               apic_id = reg_00.bits.ID;
-       }
-
-       /*
-        * Every APIC in a system must have a unique ID or we get lots of nice
-        * 'stuck on smp_invalidate_needed IPI wait' messages.
-        */
-       if (apic->check_apicid_used(apic_id_map, apic_id)) {
-
-               for (i = 0; i < get_physical_broadcast(); i++) {
-                       if (!apic->check_apicid_used(apic_id_map, i))
-                               break;
-               }
-
-               if (i == get_physical_broadcast())
-                       panic("Max apic_id exceeded!\n");
-
-               printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
-                       "trying %d\n", ioapic, apic_id, i);
-
-               apic_id = i;
-       }
-
-       tmp = apic->apicid_to_cpu_present(apic_id);
-       physids_or(apic_id_map, apic_id_map, tmp);
-
-       if (reg_00.bits.ID != apic_id) {
-               reg_00.bits.ID = apic_id;
-
-               spin_lock_irqsave(&ioapic_lock, flags);
-               io_apic_write(ioapic, 0, reg_00.raw);
-               reg_00.raw = io_apic_read(ioapic, 0);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
-
-               /* Sanity check */
-               if (reg_00.bits.ID != apic_id) {
-                       printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
-                       return -1;
-               }
-       }
-
-       apic_printk(APIC_VERBOSE, KERN_INFO
-                       "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
-
-       return apic_id;
-}
-
-int __init io_apic_get_version(int ioapic)
-{
-       union IO_APIC_reg_01    reg_01;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       reg_01.raw = io_apic_read(ioapic, 1);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-
-       return reg_01.bits.version;
-}
-#endif
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
-{
-       struct irq_desc *desc;
-       struct irq_cfg *cfg;
-       int cpu = boot_cpu_id;
-
-       if (!IO_APIC_IRQ(irq)) {
-               apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
-                       ioapic);
-               return -EINVAL;
-       }
-
-       desc = irq_to_desc_alloc_cpu(irq, cpu);
-       if (!desc) {
-               printk(KERN_INFO "can not get irq_desc %d\n", irq);
-               return 0;
-       }
-
-       /*
-        * IRQs < 16 are already in the irq_2_pin[] map
-        */
-       if (irq >= NR_IRQS_LEGACY) {
-               cfg = desc->chip_data;
-               add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
-       }
-
-       setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
-
-       return 0;
-}
-
-
-int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
-{
-       int i;
-
-       if (skip_ioapic_setup)
-               return -1;
-
-       for (i = 0; i < mp_irq_entries; i++)
-               if (mp_irqs[i].irqtype == mp_INT &&
-                   mp_irqs[i].srcbusirq == bus_irq)
-                       break;
-       if (i >= mp_irq_entries)
-               return -1;
-
-       *trigger = irq_trigger(i);
-       *polarity = irq_polarity(i);
-       return 0;
-}
-
-#endif /* CONFIG_ACPI */
-
-/*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be apic->target_cpus()
- */
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
-       int pin, ioapic, irq, irq_entry;
-       struct irq_desc *desc;
-       struct irq_cfg *cfg;
-       const struct cpumask *mask;
-
-       if (skip_ioapic_setup == 1)
-               return;
-
-       for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-               for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-                       irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-                       if (irq_entry == -1)
-                               continue;
-                       irq = pin_2_irq(irq_entry, ioapic, pin);
-
-                       /* setup_IO_APIC_irqs could fail to get vector for some device
-                        * when you have too many devices, because at that time only boot
-                        * cpu is online.
-                        */
-                       desc = irq_to_desc(irq);
-                       cfg = desc->chip_data;
-                       if (!cfg->vector) {
-                               setup_IO_APIC_irq(ioapic, pin, irq, desc,
-                                                 irq_trigger(irq_entry),
-                                                 irq_polarity(irq_entry));
-                               continue;
-
-                       }
-
-                       /*
-                        * Honour affinities which have been set in early boot
-                        */
-                       if (desc->status &
-                           (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-                               mask = desc->affinity;
-                       else
-                               mask = apic->target_cpus();
-
-#ifdef CONFIG_INTR_REMAP
-                       if (intr_remapping_enabled)
-                               set_ir_ioapic_affinity_irq_desc(desc, mask);
-                       else
-#endif
-                               set_ioapic_affinity_irq_desc(desc, mask);
-               }
-
-       }
-}
-#endif
-
-#define IOAPIC_RESOURCE_NAME_SIZE 11
-
-static struct resource *ioapic_resources;
-
-static struct resource * __init ioapic_setup_resources(void)
-{
-       unsigned long n;
-       struct resource *res;
-       char *mem;
-       int i;
-
-       if (nr_ioapics <= 0)
-               return NULL;
-
-       n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
-       n *= nr_ioapics;
-
-       mem = alloc_bootmem(n);
-       res = (void *)mem;
-
-       if (mem != NULL) {
-               mem += sizeof(struct resource) * nr_ioapics;
-
-               for (i = 0; i < nr_ioapics; i++) {
-                       res[i].name = mem;
-                       res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-                       sprintf(mem,  "IOAPIC %u", i);
-                       mem += IOAPIC_RESOURCE_NAME_SIZE;
-               }
-       }
-
-       ioapic_resources = res;
-
-       return res;
-}
-
-void __init ioapic_init_mappings(void)
-{
-       unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
-       struct resource *ioapic_res;
-       int i;
-
-       ioapic_res = ioapic_setup_resources();
-       for (i = 0; i < nr_ioapics; i++) {
-               if (smp_found_config) {
-                       ioapic_phys = mp_ioapics[i].apicaddr;
-#ifdef CONFIG_X86_32
-                       if (!ioapic_phys) {
-                               printk(KERN_ERR
-                                      "WARNING: bogus zero IO-APIC "
-                                      "address found in MPTABLE, "
-                                      "disabling IO/APIC support!\n");
-                               smp_found_config = 0;
-                               skip_ioapic_setup = 1;
-                               goto fake_ioapic_page;
-                       }
-#endif
-               } else {
-#ifdef CONFIG_X86_32
-fake_ioapic_page:
-#endif
-                       ioapic_phys = (unsigned long)
-                               alloc_bootmem_pages(PAGE_SIZE);
-                       ioapic_phys = __pa(ioapic_phys);
-               }
-               set_fixmap_nocache(idx, ioapic_phys);
-               apic_printk(APIC_VERBOSE,
-                           "mapped IOAPIC to %08lx (%08lx)\n",
-                           __fix_to_virt(idx), ioapic_phys);
-               idx++;
-
-               if (ioapic_res != NULL) {
-                       ioapic_res->start = ioapic_phys;
-                       ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
-                       ioapic_res++;
-               }
-       }
-}
-
-static int __init ioapic_insert_resources(void)
-{
-       int i;
-       struct resource *r = ioapic_resources;
-
-       if (!r) {
-               printk(KERN_ERR
-                      "IO APIC resources could be not be allocated.\n");
-               return -1;
-       }
-
-       for (i = 0; i < nr_ioapics; i++) {
-               insert_resource(&iomem_resource, r);
-               r++;
-       }
-
-       return 0;
-}
-
-/* Insert the IO APIC resources after PCI initialization has occured to handle
- * IO APICS that are mapped in on a BAR in PCI space. */
-late_initcall(ioapic_insert_resources);
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
deleted file mode 100644 (file)
index dbf5445..0000000
+++ /dev/null
@@ -1,164 +0,0 @@
-#include <linux/cpumask.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/cache.h>
-#include <linux/cpu.h>
-#include <linux/module.h>
-
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <asm/apic.h>
-#include <asm/proto.h>
-#include <asm/ipi.h>
-
-void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
-{
-       unsigned long query_cpu;
-       unsigned long flags;
-
-       /*
-        * Hack. The clustered APIC addressing mode doesn't allow us to send
-        * to an arbitrary mask, so I do a unicast to each CPU instead.
-        * - mbligh
-        */
-       local_irq_save(flags);
-       for_each_cpu(query_cpu, mask) {
-               __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
-                               query_cpu), vector, APIC_DEST_PHYSICAL);
-       }
-       local_irq_restore(flags);
-}
-
-void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
-                                                int vector)
-{
-       unsigned int this_cpu = smp_processor_id();
-       unsigned int query_cpu;
-       unsigned long flags;
-
-       /* See Hack comment above */
-
-       local_irq_save(flags);
-       for_each_cpu(query_cpu, mask) {
-               if (query_cpu == this_cpu)
-                       continue;
-               __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
-                                query_cpu), vector, APIC_DEST_PHYSICAL);
-       }
-       local_irq_restore(flags);
-}
-
-void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
-                                                int vector)
-{
-       unsigned long flags;
-       unsigned int query_cpu;
-
-       /*
-        * Hack. The clustered APIC addressing mode doesn't allow us to send
-        * to an arbitrary mask, so I do a unicasts to each CPU instead. This
-        * should be modified to do 1 message per cluster ID - mbligh
-        */
-
-       local_irq_save(flags);
-       for_each_cpu(query_cpu, mask)
-               __default_send_IPI_dest_field(
-                       apic->cpu_to_logical_apicid(query_cpu), vector,
-                       apic->dest_logical);
-       local_irq_restore(flags);
-}
-
-void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
-                                                int vector)
-{
-       unsigned long flags;
-       unsigned int query_cpu;
-       unsigned int this_cpu = smp_processor_id();
-
-       /* See Hack comment above */
-
-       local_irq_save(flags);
-       for_each_cpu(query_cpu, mask) {
-               if (query_cpu == this_cpu)
-                       continue;
-               __default_send_IPI_dest_field(
-                       apic->cpu_to_logical_apicid(query_cpu), vector,
-                       apic->dest_logical);
-               }
-       local_irq_restore(flags);
-}
-
-#ifdef CONFIG_X86_32
-
-/*
- * This is only used on smaller machines.
- */
-void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
-{
-       unsigned long mask = cpumask_bits(cpumask)[0];
-       unsigned long flags;
-
-       local_irq_save(flags);
-       WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
-       __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
-       local_irq_restore(flags);
-}
-
-void default_send_IPI_allbutself(int vector)
-{
-       /*
-        * if there are no other CPUs in the system then we get an APIC send
-        * error if we try to broadcast, thus avoid sending IPIs in this case.
-        */
-       if (!(num_online_cpus() > 1))
-               return;
-
-       __default_local_send_IPI_allbutself(vector);
-}
-
-void default_send_IPI_all(int vector)
-{
-       __default_local_send_IPI_all(vector);
-}
-
-void default_send_IPI_self(int vector)
-{
-       __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical);
-}
-
-/* must come after the send_IPI functions above for inlining */
-static int convert_apicid_to_cpu(int apic_id)
-{
-       int i;
-
-       for_each_possible_cpu(i) {
-               if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
-                       return i;
-       }
-       return -1;
-}
-
-int safe_smp_processor_id(void)
-{
-       int apicid, cpuid;
-
-       if (!boot_cpu_has(X86_FEATURE_APIC))
-               return 0;
-
-       apicid = hard_smp_processor_id();
-       if (apicid == BAD_APICID)
-               return 0;
-
-       cpuid = convert_apicid_to_cpu(apicid);
-
-       return cpuid >= 0 ? cpuid : 0;
-}
-#endif
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
deleted file mode 100644 (file)
index bdfad80..0000000
+++ /dev/null
@@ -1,564 +0,0 @@
-/*
- *  NMI watchdog support on APIC systems
- *
- *  Started by Ingo Molnar <mingo@redhat.com>
- *
- *  Fixes:
- *  Mikael Pettersson  : AMD K7 support for local APIC NMI watchdog.
- *  Mikael Pettersson  : Power Management for local APIC NMI watchdog.
- *  Mikael Pettersson  : Pentium 4 support for local APIC NMI watchdog.
- *  Pavel Machek and
- *  Mikael Pettersson  : PM converted to driver model. Disable/enable API.
- */
-
-#include <asm/apic.h>
-
-#include <linux/nmi.h>
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/sysdev.h>
-#include <linux/sysctl.h>
-#include <linux/percpu.h>
-#include <linux/kprobes.h>
-#include <linux/cpumask.h>
-#include <linux/kernel_stat.h>
-#include <linux/kdebug.h>
-#include <linux/smp.h>
-
-#include <asm/i8259.h>
-#include <asm/io_apic.h>
-#include <asm/proto.h>
-#include <asm/timer.h>
-
-#include <asm/mce.h>
-
-#include <asm/mach_traps.h>
-
-int unknown_nmi_panic;
-int nmi_watchdog_enabled;
-
-static cpumask_t backtrace_mask = CPU_MASK_NONE;
-
-/* nmi_active:
- * >0: the lapic NMI watchdog is active, but can be disabled
- * <0: the lapic NMI watchdog has not been set up, and cannot
- *     be enabled
- *  0: the lapic NMI watchdog is disabled, but can be enabled
- */
-atomic_t nmi_active = ATOMIC_INIT(0);          /* oprofile uses this */
-EXPORT_SYMBOL(nmi_active);
-
-unsigned int nmi_watchdog = NMI_NONE;
-EXPORT_SYMBOL(nmi_watchdog);
-
-static int panic_on_timeout;
-
-static unsigned int nmi_hz = HZ;
-static DEFINE_PER_CPU(short, wd_enabled);
-static int endflag __initdata;
-
-static inline unsigned int get_nmi_count(int cpu)
-{
-       return per_cpu(irq_stat, cpu).__nmi_count;
-}
-
-static inline int mce_in_progress(void)
-{
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
-       return atomic_read(&mce_entry) > 0;
-#endif
-       return 0;
-}
-
-/*
- * Take the local apic timer and PIT/HPET into account. We don't
- * know which one is active, when we have highres/dyntick on
- */
-static inline unsigned int get_timer_irqs(int cpu)
-{
-       return per_cpu(irq_stat, cpu).apic_timer_irqs +
-               per_cpu(irq_stat, cpu).irq0_irqs;
-}
-
-#ifdef CONFIG_SMP
-/*
- * The performance counters used by NMI_LOCAL_APIC don't trigger when
- * the CPU is idle. To make sure the NMI watchdog really ticks on all
- * CPUs during the test make them busy.
- */
-static __init void nmi_cpu_busy(void *data)
-{
-       local_irq_enable_in_hardirq();
-       /*
-        * Intentionally don't use cpu_relax here. This is
-        * to make sure that the performance counter really ticks,
-        * even if there is a simulator or similar that catches the
-        * pause instruction. On a real HT machine this is fine because
-        * all other CPUs are busy with "useless" delay loops and don't
-        * care if they get somewhat less cycles.
-        */
-       while (endflag == 0)
-               mb();
-}
-#endif
-
-static void report_broken_nmi(int cpu, int *prev_nmi_count)
-{
-       printk(KERN_CONT "\n");
-
-       printk(KERN_WARNING
-               "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
-                       cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
-
-       printk(KERN_WARNING
-               "Please report this to bugzilla.kernel.org,\n");
-       printk(KERN_WARNING
-               "and attach the output of the 'dmesg' command.\n");
-
-       per_cpu(wd_enabled, cpu) = 0;
-       atomic_dec(&nmi_active);
-}
-
-static void __acpi_nmi_disable(void *__unused)
-{
-       apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
-}
-
-int __init check_nmi_watchdog(void)
-{
-       unsigned int *prev_nmi_count;
-       int cpu;
-
-       if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
-               return 0;
-
-       prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
-       if (!prev_nmi_count)
-               goto error;
-
-       printk(KERN_INFO "Testing NMI watchdog ... ");
-
-#ifdef CONFIG_SMP
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
-#endif
-
-       for_each_possible_cpu(cpu)
-               prev_nmi_count[cpu] = get_nmi_count(cpu);
-       local_irq_enable();
-       mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
-
-       for_each_online_cpu(cpu) {
-               if (!per_cpu(wd_enabled, cpu))
-                       continue;
-               if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
-                       report_broken_nmi(cpu, prev_nmi_count);
-       }
-       endflag = 1;
-       if (!atomic_read(&nmi_active)) {
-               kfree(prev_nmi_count);
-               atomic_set(&nmi_active, -1);
-               goto error;
-       }
-       printk("OK.\n");
-
-       /*
-        * now that we know it works we can reduce NMI frequency to
-        * something more reasonable; makes a difference in some configs
-        */
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               nmi_hz = lapic_adjust_nmi_hz(1);
-
-       kfree(prev_nmi_count);
-       return 0;
-error:
-       if (nmi_watchdog == NMI_IO_APIC) {
-               if (!timer_through_8259)
-                       disable_8259A_irq(0);
-               on_each_cpu(__acpi_nmi_disable, NULL, 1);
-       }
-
-#ifdef CONFIG_X86_32
-       timer_ack = 0;
-#endif
-       return -1;
-}
-
-static int __init setup_nmi_watchdog(char *str)
-{
-       unsigned int nmi;
-
-       if (!strncmp(str, "panic", 5)) {
-               panic_on_timeout = 1;
-               str = strchr(str, ',');
-               if (!str)
-                       return 1;
-               ++str;
-       }
-
-       if (!strncmp(str, "lapic", 5))
-               nmi_watchdog = NMI_LOCAL_APIC;
-       else if (!strncmp(str, "ioapic", 6))
-               nmi_watchdog = NMI_IO_APIC;
-       else {
-               get_option(&str, &nmi);
-               if (nmi >= NMI_INVALID)
-                       return 0;
-               nmi_watchdog = nmi;
-       }
-
-       return 1;
-}
-__setup("nmi_watchdog=", setup_nmi_watchdog);
-
-/*
- * Suspend/resume support
- */
-#ifdef CONFIG_PM
-
-static int nmi_pm_active; /* nmi_active before suspend */
-
-static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
-{
-       /* only CPU0 goes here, other CPUs should be offline */
-       nmi_pm_active = atomic_read(&nmi_active);
-       stop_apic_nmi_watchdog(NULL);
-       BUG_ON(atomic_read(&nmi_active) != 0);
-       return 0;
-}
-
-static int lapic_nmi_resume(struct sys_device *dev)
-{
-       /* only CPU0 goes here, other CPUs should be offline */
-       if (nmi_pm_active > 0) {
-               setup_apic_nmi_watchdog(NULL);
-               touch_nmi_watchdog();
-       }
-       return 0;
-}
-
-static struct sysdev_class nmi_sysclass = {
-       .name           = "lapic_nmi",
-       .resume         = lapic_nmi_resume,
-       .suspend        = lapic_nmi_suspend,
-};
-
-static struct sys_device device_lapic_nmi = {
-       .id     = 0,
-       .cls    = &nmi_sysclass,
-};
-
-static int __init init_lapic_nmi_sysfs(void)
-{
-       int error;
-
-       /*
-        * should really be a BUG_ON but b/c this is an
-        * init call, it just doesn't work.  -dcz
-        */
-       if (nmi_watchdog != NMI_LOCAL_APIC)
-               return 0;
-
-       if (atomic_read(&nmi_active) < 0)
-               return 0;
-
-       error = sysdev_class_register(&nmi_sysclass);
-       if (!error)
-               error = sysdev_register(&device_lapic_nmi);
-       return error;
-}
-
-/* must come after the local APIC's device_initcall() */
-late_initcall(init_lapic_nmi_sysfs);
-
-#endif /* CONFIG_PM */
-
-static void __acpi_nmi_enable(void *__unused)
-{
-       apic_write(APIC_LVT0, APIC_DM_NMI);
-}
-
-/*
- * Enable timer based NMIs on all CPUs:
- */
-void acpi_nmi_enable(void)
-{
-       if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-               on_each_cpu(__acpi_nmi_enable, NULL, 1);
-}
-
-/*
- * Disable timer based NMIs on all CPUs:
- */
-void acpi_nmi_disable(void)
-{
-       if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-               on_each_cpu(__acpi_nmi_disable, NULL, 1);
-}
-
-/*
- * This function is called as soon the LAPIC NMI watchdog driver has everything
- * in place and it's ready to check if the NMIs belong to the NMI watchdog
- */
-void cpu_nmi_set_wd_enabled(void)
-{
-       __get_cpu_var(wd_enabled) = 1;
-}
-
-void setup_apic_nmi_watchdog(void *unused)
-{
-       if (__get_cpu_var(wd_enabled))
-               return;
-
-       /* cheap hack to support suspend/resume */
-       /* if cpu0 is not active neither should the other cpus */
-       if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
-               return;
-
-       switch (nmi_watchdog) {
-       case NMI_LOCAL_APIC:
-               if (lapic_watchdog_init(nmi_hz) < 0) {
-                       __get_cpu_var(wd_enabled) = 0;
-                       return;
-               }
-               /* FALL THROUGH */
-       case NMI_IO_APIC:
-               __get_cpu_var(wd_enabled) = 1;
-               atomic_inc(&nmi_active);
-       }
-}
-
-void stop_apic_nmi_watchdog(void *unused)
-{
-       /* only support LOCAL and IO APICs for now */
-       if (!nmi_watchdog_active())
-               return;
-       if (__get_cpu_var(wd_enabled) == 0)
-               return;
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               lapic_watchdog_stop();
-       else
-               __acpi_nmi_disable(NULL);
-       __get_cpu_var(wd_enabled) = 0;
-       atomic_dec(&nmi_active);
-}
-
-/*
- * the best way to detect whether a CPU has a 'hard lockup' problem
- * is to check it's local APIC timer IRQ counts. If they are not
- * changing then that CPU has some problem.
- *
- * as these watchdog NMI IRQs are generated on every CPU, we only
- * have to check the current processor.
- *
- * since NMIs don't listen to _any_ locks, we have to be extremely
- * careful not to rely on unsafe variables. The printk might lock
- * up though, so we have to break up any console locks first ...
- * [when there will be more tty-related locks, break them up here too!]
- */
-
-static DEFINE_PER_CPU(unsigned, last_irq_sum);
-static DEFINE_PER_CPU(local_t, alert_counter);
-static DEFINE_PER_CPU(int, nmi_touch);
-
-void touch_nmi_watchdog(void)
-{
-       if (nmi_watchdog_active()) {
-               unsigned cpu;
-
-               /*
-                * Tell other CPUs to reset their alert counters. We cannot
-                * do it ourselves because the alert count increase is not
-                * atomic.
-                */
-               for_each_present_cpu(cpu) {
-                       if (per_cpu(nmi_touch, cpu) != 1)
-                               per_cpu(nmi_touch, cpu) = 1;
-               }
-       }
-
-       /*
-        * Tickle the softlockup detector too:
-        */
-       touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL(touch_nmi_watchdog);
-
-notrace __kprobes int
-nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
-{
-       /*
-        * Since current_thread_info()-> is always on the stack, and we
-        * always switch the stack NMI-atomically, it's safe to use
-        * smp_processor_id().
-        */
-       unsigned int sum;
-       int touched = 0;
-       int cpu = smp_processor_id();
-       int rc = 0;
-
-       /* check for other users first */
-       if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
-                       == NOTIFY_STOP) {
-               rc = 1;
-               touched = 1;
-       }
-
-       sum = get_timer_irqs(cpu);
-
-       if (__get_cpu_var(nmi_touch)) {
-               __get_cpu_var(nmi_touch) = 0;
-               touched = 1;
-       }
-
-       if (cpu_isset(cpu, backtrace_mask)) {
-               static DEFINE_SPINLOCK(lock);   /* Serialise the printks */
-
-               spin_lock(&lock);
-               printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
-               dump_stack();
-               spin_unlock(&lock);
-               cpu_clear(cpu, backtrace_mask);
-       }
-
-       /* Could check oops_in_progress here too, but it's safer not to */
-       if (mce_in_progress())
-               touched = 1;
-
-       /* if the none of the timers isn't firing, this cpu isn't doing much */
-       if (!touched && __get_cpu_var(last_irq_sum) == sum) {
-               /*
-                * Ayiee, looks like this CPU is stuck ...
-                * wait a few IRQs (5 seconds) before doing the oops ...
-                */
-               local_inc(&__get_cpu_var(alert_counter));
-               if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz)
-                       /*
-                        * die_nmi will return ONLY if NOTIFY_STOP happens..
-                        */
-                       die_nmi("BUG: NMI Watchdog detected LOCKUP",
-                               regs, panic_on_timeout);
-       } else {
-               __get_cpu_var(last_irq_sum) = sum;
-               local_set(&__get_cpu_var(alert_counter), 0);
-       }
-
-       /* see if the nmi watchdog went off */
-       if (!__get_cpu_var(wd_enabled))
-               return rc;
-       switch (nmi_watchdog) {
-       case NMI_LOCAL_APIC:
-               rc |= lapic_wd_event(nmi_hz);
-               break;
-       case NMI_IO_APIC:
-               /*
-                * don't know how to accurately check for this.
-                * just assume it was a watchdog timer interrupt
-                * This matches the old behaviour.
-                */
-               rc = 1;
-               break;
-       }
-       return rc;
-}
-
-#ifdef CONFIG_SYSCTL
-
-static void enable_ioapic_nmi_watchdog_single(void *unused)
-{
-       __get_cpu_var(wd_enabled) = 1;
-       atomic_inc(&nmi_active);
-       __acpi_nmi_enable(NULL);
-}
-
-static void enable_ioapic_nmi_watchdog(void)
-{
-       on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
-       touch_nmi_watchdog();
-}
-
-static void disable_ioapic_nmi_watchdog(void)
-{
-       on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
-}
-
-static int __init setup_unknown_nmi_panic(char *str)
-{
-       unknown_nmi_panic = 1;
-       return 1;
-}
-__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
-
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
-{
-       unsigned char reason = get_nmi_reason();
-       char buf[64];
-
-       sprintf(buf, "NMI received for unknown reason %02x\n", reason);
-       die_nmi(buf, regs, 1); /* Always panic here */
-       return 0;
-}
-
-/*
- * proc handler for /proc/sys/kernel/nmi
- */
-int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
-                       void __user *buffer, size_t *length, loff_t *ppos)
-{
-       int old_state;
-
-       nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
-       old_state = nmi_watchdog_enabled;
-       proc_dointvec(table, write, file, buffer, length, ppos);
-       if (!!old_state == !!nmi_watchdog_enabled)
-               return 0;
-
-       if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
-               printk(KERN_WARNING
-                       "NMI watchdog is permanently disabled\n");
-               return -EIO;
-       }
-
-       if (nmi_watchdog == NMI_LOCAL_APIC) {
-               if (nmi_watchdog_enabled)
-                       enable_lapic_nmi_watchdog();
-               else
-                       disable_lapic_nmi_watchdog();
-       } else if (nmi_watchdog == NMI_IO_APIC) {
-               if (nmi_watchdog_enabled)
-                       enable_ioapic_nmi_watchdog();
-               else
-                       disable_ioapic_nmi_watchdog();
-       } else {
-               printk(KERN_WARNING
-                       "NMI watchdog doesn't know what hardware to touch\n");
-               return -EIO;
-       }
-       return 0;
-}
-
-#endif /* CONFIG_SYSCTL */
-
-int do_nmi_callback(struct pt_regs *regs, int cpu)
-{
-#ifdef CONFIG_SYSCTL
-       if (unknown_nmi_panic)
-               return unknown_nmi_panic_callback(regs, cpu);
-#endif
-       return 0;
-}
-
-void __trigger_all_cpu_backtrace(void)
-{
-       int i;
-
-       backtrace_mask = cpu_online_map;
-       /* Wait for up to 10 seconds for all CPUs to do the backtrace */
-       for (i = 0; i < 10 * 1000; i++) {
-               if (cpus_empty(backtrace_mask))
-                       break;
-               mdelay(1);
-       }
-}