* Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
* Maciej W. Rozycki : Bits for genuine 82489DX APICs
* Andi Kleen : Changed for SMP boot into long mode.
+ * Rusty Russell : Hacked into shape for new "hotplug" boot process.
*/
#include <linux/config.h>
#include <asm/kdebug.h>
#include <asm/tlbflush.h>
-/* Setup configured maximum number of CPUs to activate */
-static int max_cpus = -1;
-
-/* Total count of live CPUs */
-int smp_num_cpus = 1;
-
/* Bitmask of currently online CPUs */
unsigned long cpu_online_map;
volatile int x86_cpu_to_apicid[NR_CPUS];
static volatile unsigned long cpu_callin_map;
-static volatile unsigned long cpu_callout_map;
+volatile unsigned long cpu_callout_map;
+static unsigned long smp_commenced_mask;
/* Per CPU bogomips and other parameters */
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
/* Set when the idlers are all forked */
int smp_threads_ready;
-/*
- * Setup routine for controlling SMP activation
- *
- * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
- * activation entirely (the MPS table probe still happens, though).
- *
- * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
- * greater than 0, limits the maximum number of CPUs activated in
- * SMP mode to <NUM>.
- */
-
-static int __init nosmp(char *str)
-{
- max_cpus = 0;
- return 1;
-}
-
-__setup("nosmp", nosmp);
-
-static int __init maxcpus(char *str)
-{
- get_option(&str, &max_cpus);
- return 1;
-}
-
-__setup("maxcpus=", maxcpus);
-
/*
* Trampoline 80x86 program as an array.
*/
static unsigned long __init setup_trampoline(void)
{
extern volatile __u32 tramp_gdt_ptr;
- tramp_gdt_ptr = __pa_symbol(&gdt_table);
+ tramp_gdt_ptr = __pa_symbol(&cpu_gdt_table);
memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
return virt_to_phys(trampoline_base);
}
* a given CPU
*/
-void __init smp_store_cpu_info(int id)
+static void __init smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = cpu_data + id;
identify_cpu(c);
}
-/*
- * Architecture specific routine called by the kernel just before init is
- * fired off. This allows the BP to have everything in order [we hope].
- * At the end of this all the APs will hit the system scheduling and off
- * we go. Each AP will load the system gdt's and jump through the kernel
- * init into idle(). At this point the scheduler will one day take over
- * and give them jobs to do. smp_callin is a standard routine
- * we use to track CPUs as they power up.
- */
-
-static atomic_t smp_commenced = ATOMIC_INIT(0);
-
-void __init smp_commence(void)
-{
- /*
- * Lets the callins below out of their loop.
- */
- Dprintk("Setting commenced=1, go go go\n");
-
- wmb();
- atomic_set(&smp_commenced,1);
-}
-
/*
* TSC synchronization.
*
extern unsigned int fast_gettimeoffset_quotient;
-static inline unsigned long long div64 (unsigned long long a, unsigned long b)
-{
- return a/b;
-}
-
static void __init synchronize_tsc_bp (void)
{
int i;
long long delta;
unsigned long one_usec;
int buggy = 0;
+ extern unsigned cpu_khz;
- printk("checking TSC synchronization across CPUs: ");
+ printk("checking TSC synchronization across %u CPUs: ",num_booting_cpus());
- one_usec = ((1<<30)/fast_gettimeoffset_quotient)*(1<<2);
+ one_usec = cpu_khz;
atomic_set(&tsc_start_flag, 1);
wmb();
/*
* all APs synchronize but they loop on '== num_cpus'
*/
- while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb();
+ while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) mb();
atomic_set(&tsc_count_stop, 0);
wmb();
/*
/*
* Wait for all APs to leave the synchronization point:
*/
- while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb();
+ while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) mb();
atomic_set(&tsc_count_start, 0);
wmb();
atomic_inc(&tsc_count_stop);
}
sum = 0;
- for (i = 0; i < smp_num_cpus; i++) {
+ for (i = 0; i < NR_CPUS; i++) {
+ if (test_bit(i, &cpu_callout_map)) {
t0 = tsc_values[i];
sum += t0;
}
- avg = div64(sum, smp_num_cpus);
+ }
+ avg = sum / num_booting_cpus();
sum = 0;
- for (i = 0; i < smp_num_cpus; i++) {
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!test_bit(i, &cpu_callout_map))
+ continue;
+
delta = tsc_values[i] - avg;
if (delta < 0)
delta = -delta;
buggy = 1;
printk("\n");
}
- realdelta = div64(delta, one_usec);
+ realdelta = delta / one_usec;
if (tsc_values[i] < avg)
realdelta = -realdelta;
int i;
/*
- * smp_num_cpus is not necessarily known at the time
+ * Not every cpu is online at the time
* this gets called, so we first wait for the BP to
* finish SMP initialization:
*/
for (i = 0; i < NR_LOOPS; i++) {
atomic_inc(&tsc_count_start);
- while (atomic_read(&tsc_count_start) != smp_num_cpus) mb();
+ while (atomic_read(&tsc_count_start) != num_booting_cpus()) mb();
rdtscll(tsc_values[smp_processor_id()]);
if (i == NR_LOOPS-1)
write_tsc(0, 0);
atomic_inc(&tsc_count_stop);
- while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb();
+ while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
}
}
#undef NR_LOOPS
*/
phys_id = GET_APIC_ID(apic_read(APIC_ID));
cpuid = smp_processor_id();
- if (test_and_set_bit(cpuid, &cpu_online_map)) {
+ if (test_and_set_bit(cpuid, &cpu_callin_map)) {
printk("huh, phys CPU#%d, CPU#%d already present??\n",
phys_id, cpuid);
BUG();
Dprintk("CALLIN, before setup_local_APIC().\n");
setup_local_APIC();
- sti();
+ local_irq_enable();
#ifdef CONFIG_MTRR
/*
calibrate_delay();
Dprintk("Stack at about %p\n",&cpuid);
+ disable_APIC_timer();
+
/*
* Save our processor parameters
*/
*/
int __init start_secondary(void *unused)
{
- int var;
- printk("rsp %p\n",&var);
-
/*
* Dont put anything before smp_callin(), SMP
* booting is too fragile that we want to limit the
*/
cpu_init();
smp_callin();
- while (!atomic_read(&smp_commenced))
+
+ Dprintk("cpu %d: waiting for commence\n", smp_processor_id());
+ while (!test_bit(smp_processor_id(), &smp_commenced_mask))
rep_nop();
+
+ Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
+ setup_secondary_APIC_clock();
+
+ Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
+
+ if (nmi_watchdog == NMI_IO_APIC) {
+ disable_8259A_irq(0);
+ enable_NMI_through_LVT0(NULL);
+ enable_8259A_irq(0);
+ }
+
+
+ enable_APIC_timer();
+
/*
* low-memory mappings have been cleared, flush them from
* the local TLBs too.
*/
local_flush_tlb();
+ Dprintk("cpu %d eSetting cpu_online_map\n", smp_processor_id());
+ set_bit(smp_processor_id(), &cpu_online_map);
+ wmb();
+
return cpu_idle();
}
* don't care about the rip and regs settings since
* we'll never reschedule the forked task.
*/
- return do_fork(CLONE_VM|CLONE_IDLETASK, 0, ®s, 0);
+ return do_fork(CLONE_VM|CLONE_IDLETASK, 0, ®s, 0, NULL);
}
#if APIC_DEBUG
}
#endif
-static void __init do_boot_cpu (int apicid)
+static int __init wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
{
- struct task_struct *idle;
- unsigned long send_status, accept_status, boot_status, maxlvt;
- int timeout, num_starts, j, cpu;
- unsigned long start_eip;
-
- printk("do_boot_cpu cpucount = %d\n", cpucount);
-
- cpu = ++cpucount;
- /*
- * We can't use kernel_thread since we must avoid to
- * reschedule the child.
- */
- idle = fork_by_hand();
- if (IS_ERR(idle))
- panic("failed fork for CPU %d", cpu);
-
- /*
- * We remove it from the pidhash and the runqueue
- * once we got the process:
- */
- init_idle(idle,cpu);
-
- x86_cpu_to_apicid[cpu] = apicid;
- x86_apicid_to_cpu[apicid] = cpu;
- idle->thread.rip = (unsigned long)start_secondary;
-// idle->thread.rsp = (unsigned long)idle->thread_info + THREAD_SIZE - 512;
-
- unhash_process(idle);
-
- cpu_pda[cpu].pcurrent = idle;
-
- /* start_eip had better be page-aligned! */
- start_eip = setup_trampoline();
-
- init_rsp = (unsigned long)idle->thread_info + PAGE_SIZE + 1024;
- initial_code = initialize_secondary;
-
- printk("Booting processor %d/%d rip %lx rsp %lx rsp2 %lx\n", cpu, apicid,
- start_eip, idle->thread.rsp, init_rsp);
-
- /*
- * This grunge runs the startup process for
- * the targeted processor.
- */
-
- atomic_set(&init_deasserted, 0);
-
- Dprintk("Setting warm reset code and vector.\n");
-
- CMOS_WRITE(0xa, 0xf);
- local_flush_tlb();
- Dprintk("1.\n");
- *((volatile unsigned short *) phys_to_virt(0x469)) = start_eip >> 4;
- Dprintk("2.\n");
- *((volatile unsigned short *) phys_to_virt(0x467)) = start_eip & 0xf;
- Dprintk("3.\n");
-
- /*
- * Be paranoid about clearing APIC errors.
- */
- if (APIC_INTEGRATED(apic_version[apicid])) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- }
-
- /*
- * Status is now clean
- */
- send_status = 0;
- accept_status = 0;
- boot_status = 0;
-
- /*
- * Starting actual IPI sequence...
- */
+ unsigned long send_status = 0, accept_status = 0;
+ int maxlvt, timeout, num_starts, j;
Dprintk("Asserting INIT.\n");
/*
* Turn INIT on target chip
*/
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/*
* Send IPI
Dprintk("Deasserting INIT.\n");
/* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/* Send IPI */
apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
* Should we send STARTUP IPIs ?
*
* Determine this based on the APIC version.
- * If we don't have an integrated APIC, don't
- * send the STARTUP IPIs.
+ * If we don't have an integrated APIC, don't send the STARTUP IPIs.
*/
- if (APIC_INTEGRATED(apic_version[apicid]))
+ if (APIC_INTEGRATED(apic_version[phys_apicid]))
num_starts = 2;
else
num_starts = 0;
*/
/* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/* Boot on the stack */
/* Kick the second */
apic_write_around(APIC_ICR, APIC_DM_STARTUP
- | (start_eip >> 12));
+ | (start_rip >> 12));
/*
* Give the other CPU some time to accept the IPI.
if (accept_status)
printk("APIC delivery error (%lx).\n", accept_status);
- if (!send_status && !accept_status) {
+ return (send_status | accept_status);
+}
+
+extern unsigned long cpu_initialized;
+
+static void __init do_boot_cpu (int apicid)
+{
+ struct task_struct *idle;
+ unsigned long boot_error;
+ int timeout, cpu;
+ unsigned long start_rip;
+
+ cpu = ++cpucount;
+ /*
+ * We can't use kernel_thread since we must avoid to
+ * reschedule the child.
+ */
+ idle = fork_by_hand();
+ if (IS_ERR(idle))
+ panic("failed fork for CPU %d", cpu);
+
+ /*
+ * We remove it from the pidhash and the runqueue
+ * once we got the process:
+ */
+ init_idle(idle,cpu);
+
+ x86_cpu_to_apicid[cpu] = apicid;
+ x86_apicid_to_cpu[apicid] = cpu;
+ idle->thread.rip = (unsigned long)start_secondary;
+// idle->thread.rsp = (unsigned long)idle->thread_info + THREAD_SIZE - 512;
+
+ unhash_process(idle);
+
+ cpu_pda[cpu].pcurrent = idle;
+
+ /* start_eip had better be page-aligned! */
+ start_rip = setup_trampoline();
+
+ init_rsp = (unsigned long)idle->thread_info + PAGE_SIZE + 1024;
+ init_tss[cpu].rsp0 = init_rsp;
+ initial_code = initialize_secondary;
+
+ printk("Booting processor %d/%d rip %lx rsp %lx rsp2 %lx\n", cpu, apicid,
+ start_rip, idle->thread.rsp, init_rsp);
+
+ /*
+ * This grunge runs the startup process for
+ * the targeted processor.
+ */
+
+ atomic_set(&init_deasserted, 0);
+
+ Dprintk("Setting warm reset code and vector.\n");
+
+ CMOS_WRITE(0xa, 0xf);
+ local_flush_tlb();
+ Dprintk("1.\n");
+ *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4;
+ Dprintk("2.\n");
+ *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf;
+ Dprintk("3.\n");
+
+ /*
+ * Be paranoid about clearing APIC errors.
+ */
+ if (APIC_INTEGRATED(apic_version[apicid])) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ }
+
+ /*
+ * Status is now clean
+ */
+ boot_error = 0;
+
+ /*
+ * Starting actual IPI sequence...
+ */
+ boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
+
+ if (!boot_error) {
/*
* allow APs to start initializing.
*/
print_cpu_info(&cpu_data[cpu]);
Dprintk("CPU has booted.\n");
} else {
- boot_status = 1;
+ boot_error = 1;
if (*((volatile unsigned char *)phys_to_virt(8192))
== 0xA5)
/* trampoline started but...? */
#endif
}
}
- if (send_status || accept_status || boot_status) {
+ if (boot_error) {
x86_cpu_to_apicid[cpu] = -1;
x86_apicid_to_cpu[apicid] = -1;
+ clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */
+ clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
cpucount--;
}
/* mark "stuck" area as not stuck */
- *((volatile unsigned long *)phys_to_virt(8192)) = 0;
+ *((volatile unsigned *)phys_to_virt(8192)) = 0;
}
cycles_t cacheflush_time;
extern int prof_old_multiplier[NR_CPUS];
extern int prof_counter[NR_CPUS];
-void __init smp_boot_cpus(void)
+static void __init smp_boot_cpus(unsigned int max_cpus)
{
int apicid, cpu;
* We have the boot CPU online for sure.
*/
set_bit(0, &cpu_online_map);
+ set_bit(0, &cpu_callout_map);
x86_apicid_to_cpu[boot_cpu_id] = 0;
x86_cpu_to_apicid[0] = boot_cpu_id;
- global_irq_holder = NO_PROC_ID;
current_thread_info()->cpu = 0;
smp_tune_scheduling();
+ if (!test_bit(hard_smp_processor_id(), &phys_cpu_present_map)) {
+ printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+ hard_smp_processor_id());
+ phys_cpu_present_map |= (1 << hard_smp_processor_id());
+ }
+
/*
* If we couldnt find an SMP configuration at boot time,
* get out of here now!
printk(KERN_NOTICE "SMP motherboard not detected.\n");
io_apic_irqs = 0;
cpu_online_map = phys_cpu_present_map = 1;
- smp_num_cpus = 1;
+ phys_cpu_present_map = 1;
if (APIC_init_uniprocessor())
printk(KERN_NOTICE "Local APIC not detected."
" Using dummy APIC emulation.\n");
- goto smp_done;
+ return;
}
/*
printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
io_apic_irqs = 0;
cpu_online_map = phys_cpu_present_map = 1;
- smp_num_cpus = 1;
- goto smp_done;
+ phys_cpu_present_map = 1;
+ return;
}
verify_local_APIC();
printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
io_apic_irqs = 0;
cpu_online_map = phys_cpu_present_map = 1;
- smp_num_cpus = 1;
- goto smp_done;
+ phys_cpu_present_map = 1;
+ return;
}
connect_bsp_APIC();
*/
CMOS_WRITE(0, 0xf);
- *((volatile long *) phys_to_virt(0x467)) = 0;
+ *((volatile int *) phys_to_virt(0x467)) = 0;
}
/*
} else {
unsigned long bogosum = 0;
for (cpu = 0; cpu < NR_CPUS; cpu++)
- if (cpu_online_map & (1<<cpu))
+ if (cpu_callout_map & (1<<cpu))
bogosum += cpu_data[cpu].loops_per_jiffy;
printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
cpucount+1,
(bogosum/(5000/HZ))%100);
Dprintk("Before bogocount - setting activated=1.\n");
}
- smp_num_cpus = cpucount + 1;
Dprintk("Boot done.\n");
if (!skip_ioapic_setup && nr_ioapics)
setup_IO_APIC();
- /*
- * Set up all local APIC timers in the system:
- */
- setup_APIC_clocks();
+ setup_boot_APIC_clock();
/*
* Synchronize the TSC with the AP
*/
if (cpu_has_tsc && cpucount)
synchronize_tsc_bp();
+}
+
+/* These are wrappers to interface to the new boot process. Someone
+ who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+ smp_boot_cpus(max_cpus);
+}
+
+int __devinit __cpu_up(unsigned int cpu)
+{
+ /* This only works at boot for x86. See "rewrite" above. */
+ if (test_bit(cpu, &smp_commenced_mask))
+ return -ENOSYS;
-smp_done:
+ /* In case one didn't come up */
+ if (!test_bit(cpu, &cpu_callin_map))
+ return -EIO;
+
+ /* Unleash the CPU! */
+ Dprintk("waiting for cpu %d\n", cpu);
+
+ set_bit(cpu, &smp_commenced_mask);
+ while (!test_bit(cpu, &cpu_online_map))
+ mb();
+ return 0;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
zap_low_mappings();
}
+