a work-around for a number of buggy BIOSes. Switch this option on if
your computer crashes instead of powering off properly.
+CONFIG_X86_MCE
+ Machine Check Exception support allows the processor to notify the
+ kernel if it detects a problem (e.g. overheating, component failure).
+ The action the kernel takes depends on the severity of the problem,
+ ranging from a warning message on the console, to halting the machine.
+ Your processor must be a Pentium or newer to support this - check the
+ flags in /proc/cpuinfo for mce. Note that some older Pentium systems
+ have a design flaw which leads to false MCE events - hence MCE is
+ disabled on all P5 processors, unless explicitly enabled with "mce"
+ as a boot argument. Similarly, if MCE is built in and creates a
+ problem on some new non-standard machine, you can boot with "nomce"
+ to disable it. MCE support simply ignores non-MCE processors like
+ the 386 and 486, so nearly everyone can say Y here.
+
+CONFIG_X86_MCE_NONFATAL
+ Enabling this feature starts a timer that triggers every 5 seconds which
+ will look at the machine check registers to see if anything happened.
+ Non-fatal problems automatically get corrected (but still logged).
+ Disable this if you don't want to see these messages.
+ Seeing the messages this option prints out may be indicative of dying hardware,
+ or out-of-spec (ie, overclocked) hardware.
+ This option only does something on hardware with Intel P6 style MCE.
+ (Pentium Pro and above, AMD Athlon/Duron)
+
CONFIG_TOSHIBA
This adds a driver to safely access the System Management Mode of
the CPU on Toshiba portables with a genuine Toshiba BIOS. It does
of the BUG call as well as the EIP and oops trace. This aids
debugging but costs about 70-100K of memory.
+CONFIG_DEBUG_OBSOLETE
+ Say Y here if you want to reduce the chances of the tree compiling,
+ and are prepared to dig into driver internals to fix compile errors.
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/config.h>
+#include <linux/irq.h>
#include <asm/processor.h>
+#include <asm/system.h>
#include <asm/msr.h>
+#include <asm/apic.h>
+#include <asm/pgtable.h>
+
+#ifdef CONFIG_X86_MCE
static int mce_disabled __initdata = 0;
+static int banks;
+
/*
- * Machine Check Handler For PII/PIII
+ * If we get an MCE, we don't know what state the caches/TLB's are
+ * going to be in, so we throw them all away.
*/
+static void inline flush_all (void)
+{
+ __asm__ __volatile__ ("invd": : );
+ __flush_tlb();
+}
-static int banks;
+/*
+ * P4/Xeon Thermal transition interrupt handler
+ */
+
+static void intel_thermal_interrupt(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ u32 l, h;
+ unsigned int cpu = smp_processor_id();
+
+ ack_APIC_irq();
+
+ rdmsr(MSR_IA32_THERM_STATUS, l, h);
+ if (l & 1) {
+ printk(KERN_EMERG "CPU#%d: Temperature above threshold\n", cpu);
+ printk(KERN_EMERG "CPU#%d: Running in modulated clock mode\n", cpu);
+ } else {
+ printk(KERN_INFO "CPU#%d: Temperature/speed normal\n", cpu);
+ }
+#endif
+}
+
+static void unexpected_thermal_interrupt(struct pt_regs *regs)
+{
+ printk(KERN_ERR "CPU#%d: Unexpected LVT TMR interrupt!\n", smp_processor_id());
+}
+
+/*
+ * Thermal interrupt handler for this CPU setup
+ */
+
+static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
+
+asmlinkage void smp_thermal_interrupt(struct pt_regs regs)
+{
+ vendor_thermal_interrupt(®s);
+}
+
+/* P4/Xeon Thermal regulation detect and init */
+
+static void __init intel_init_thermal(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ u32 l, h;
+ unsigned int cpu = smp_processor_id();
+
+ /* Thermal monitoring */
+ if (!test_bit(X86_FEATURE_ACPI, &c->x86_capability))
+ return; /* -ENODEV */
+
+ /* Clock modulation */
+ if (!test_bit(X86_FEATURE_ACC, &c->x86_capability))
+ return; /* -ENODEV */
+
+ rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+ /* first check if its enabled already, in which case there might
+ * be some SMM goo which handles it, so we can't even put a handler
+ * since it might be delivered via SMI already -zwanem.
+ */
+
+ if (l & (1<<3)) {
+ printk(KERN_DEBUG "CPU#%d: Thermal monitoring already enabled\n", cpu);
+ } else {
+ wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
+ printk(KERN_INFO "CPU#%d: Thermal monitoring enabled\n", cpu);
+ }
+
+ /* check wether a vector already exists */
+ l = apic_read(APIC_LVTTHMR);
+ if (l & 0xff) {
+ printk(KERN_DEBUG "CPU#%d: Thermal LVT already handled\n", cpu);
+ return; /* -EBUSY */
+ }
+
+ wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
+ printk(KERN_INFO "CPU#%d: Thermal monitoring enabled\n", cpu);
+
+ /* The temperature transition interrupt handler setup */
+ l = THERMAL_APIC_VECTOR; /* our delivery vector */
+ l |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
+ apic_write_around(APIC_LVTTHMR, l);
+
+ rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
+ wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x3 , h);
+
+ /* ok we're good to go... */
+ vendor_thermal_interrupt = intel_thermal_interrupt;
+ l = apic_read(APIC_LVTTHMR);
+ apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+
+ return;
+#endif
+}
+
+/*
+ * Machine Check Handler For PII/PIII
+ */
static void intel_machine_check(struct pt_regs * regs, long error_code)
{
u32 alow, ahigh, high, low;
u32 mcgstl, mcgsth;
int i;
-
+
+ flush_all();
+
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
if(mcgstl&(1<<0)) /* Recoverable ? */
recover=0;
if(high&(1<<27))
{
rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
- printk("[%08x%08x]", alow, ahigh);
+ printk("[%08x%08x]", ahigh, alow);
}
if(high&(1<<26))
{
rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
- printk(" at %08x%08x",
- ahigh, alow);
+ printk(" at %08x%08x", ahigh, alow);
}
printk("\n");
/* Clear it */
machine_check_vector(regs, error_code);
}
+
+#ifdef CONFIG_X86_MCE_NONFATAL
+struct timer_list mce_timer;
+
+static void mce_checkregs (unsigned int cpu)
+{
+ u32 low, high;
+ int i;
+
+ if (cpu!=smp_processor_id())
+ BUG();
+
+ for (i=0; i<banks; i++) {
+ rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
+
+ if ((low | high) != 0) {
+ flush_all();
+ printk (KERN_EMERG "MCE: The hardware reports a non fatal, correctable incident occured on CPU %d.\n", smp_processor_id());
+ printk (KERN_EMERG "Bank %d: %08x%08x\n", i, high, low);
+
+ /* Scrub the error so we don't pick it up in 5 seconds time. */
+ wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+
+ /* Serialize */
+ wmb();
+ }
+ }
+
+ /* Refresh the timer. */
+ mce_timer.expires = jiffies + 5 * HZ;
+ add_timer (&mce_timer);
+}
+
+static void mce_timerfunc (unsigned long data)
+{
+ int i;
+
+ for (i=0; i<smp_num_cpus; i++) {
+ if (i == smp_processor_id())
+ mce_checkregs(i);
+ else
+ smp_call_function (mce_checkregs, i, 1, 1);
+ }
+}
+#endif
+
+
/*
- * Set up machine check reporting for Intel processors
+ * Set up machine check reporting for processors with Intel style MCE
*/
static void __init intel_mcheck_init(struct cpuinfo_x86 *c)
if(done==0)
printk(KERN_INFO "Intel machine check architecture supported.\n");
rdmsr(MSR_IA32_MCG_CAP, l, h);
- if(l&(1<<8))
+ if(l&(1<<8)) /* Control register present ? */
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
banks = l&0xff;
- for(i=1;i<banks;i++)
- {
- wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+
+ /* Don't enable bank 0 on intel P6 cores, it goes bang quickly. */
+ if (c->x86_vendor == X86_VENDOR_INTEL && c->x86 == 6) {
+ for(i=1; i<banks; i++)
+ wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+ } else {
+ for(i=0; i<banks; i++)
+ wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
}
- for(i=0;i<banks;i++)
- {
+
+ for(i=0; i<banks; i++)
wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
- }
+
set_in_cr4(X86_CR4_MCE);
printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", smp_processor_id());
+
+ intel_init_thermal(c);
+
done=1;
}
* This has to be run for each processor
*/
-
-
void __init mcheck_init(struct cpuinfo_x86 *c)
{
if(mce_disabled==1)
return;
-
+
switch(c->x86_vendor)
{
case X86_VENDOR_AMD:
- /*
- * AMD K7 machine check is Intel like
- */
- if(c->x86 == 6)
+ /* AMD K7 machine check is Intel like */
+ if(c->x86 == 6) {
intel_mcheck_init(c);
+#ifdef CONFIG_X86_MCE_NONFATAL
+ /* Set the timer to check for non-fatal errors every 5 seconds */
+ init_timer (&mce_timer);
+ mce_timer.expires = jiffies + 5 * HZ;
+ mce_timer.data = 0;
+ mce_timer.function = &mce_timerfunc;
+ add_timer (&mce_timer);
+#endif
+ }
break;
+
case X86_VENDOR_INTEL:
intel_mcheck_init(c);
break;
+
case X86_VENDOR_CENTAUR:
winchip_mcheck_init(c);
break;
+
default:
break;
}
__setup("nomce", mcheck_disable);
__setup("mce", mcheck_enable);
+
+#else
+asmlinkage void do_machine_check(struct pt_regs * regs, long error_code) {}
+asmlinkage void smp_thermal_interrupt(struct pt_regs regs) {}
+void __init mcheck_init(struct cpuinfo_x86 *c) {}
+#endif