enum ipi_message_type {
IPI_RESCHEDULE,
+ IPI_MIGRATION,
IPI_CALL_FUNC,
IPI_CPU_STOP,
};
spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
/* Set to a secondary's cpuid when it comes online. */
-static unsigned long smp_secondary_alive;
+static int smp_secondary_alive __initdata = 0;
/* Which cpus ids came online. */
unsigned long cpu_present_mask;
int smp_num_cpus = 1; /* Number that came online. */
int smp_threads_ready; /* True once the per process idle is forked. */
cycles_t cacheflush_time;
+unsigned long cache_decay_ticks;
int __cpu_number_map[NR_CPUS];
int __cpu_logical_map[NR_CPUS];
{
int cpuid = hard_smp_processor_id();
- if (current != init_tasks[cpu_number_map(cpuid)]) {
- printk("BUG: smp_calling: cpu %d current %p init_tasks[cpu_number_map(cpuid)] %p\n",
- cpuid, current, init_tasks[cpu_number_map(cpuid)]);
- }
-
- DBGS(("CALLIN %d state 0x%lx\n", cpuid, current->state));
-
/* Turn on machine checks. */
wrmces(7);
/* Get our local ticker going. */
smp_setup_percpu_timer(cpuid);
+ /* All kernel threads share the same mm context. */
+ atomic_inc(&init_mm.mm_count);
+ current->active_mm = &init_mm;
+
/* Must have completely accurate bogos. */
__sti();
- /*
- * Wait boot CPU to stop with irq enabled before
- * running calibrate_delay().
- */
+ /* Wait boot CPU to stop with irq enabled before running
+ calibrate_delay. */
wait_boot_cpu_to_stop(cpuid);
mb();
calibrate_delay();
smp_store_cpu_info(cpuid);
- /*
- * Allow master to continue only after we written
- * the loops_per_jiffy.
- */
+ /* Allow master to continue only after we written loops_per_jiffy. */
wmb();
smp_secondary_alive = 1;
while (!smp_threads_ready)
barrier();
- DBGS(("smp_callin: commencing CPU %d current %p\n",
- cpuid, current));
-
- /* Setup the scheduler for this processor. */
- init_idle();
+ DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n",
+ cpuid, current, current->active_mm));
- /* ??? This should be in init_idle. */
- atomic_inc(&init_mm.mm_count);
- current->active_mm = &init_mm;
/* Do nothing. */
cpu_idle();
}
smp_tune_scheduling (int cpuid)
{
struct percpu_struct *cpu;
- unsigned long on_chip_cache;
- unsigned long freq;
+ unsigned long on_chip_cache; /* kB */
+ unsigned long freq; /* Hz */
+ unsigned long bandwidth = 350; /* MB/s */
cpu = (struct percpu_struct*)((char*)hwrpb + hwrpb->processor_offset
+ cpuid * hwrpb->processor_size);
case EV6_CPU:
case EV67_CPU:
- on_chip_cache = 64 + 64;
- break;
-
default:
- on_chip_cache = 8 + 8;
+ on_chip_cache = 64 + 64;
break;
}
freq = hwrpb->cycle_freq ? : est_cycle_freq;
-#if 0
- /* Magic estimation stolen from x86 port. */
- cacheflush_time = freq / 1024L * on_chip_cache / 5000L;
+ cacheflush_time = (freq / 1000000) * (on_chip_cache << 10) / bandwidth;
+ cache_decay_ticks = cacheflush_time / (freq / 1000) * HZ / 1000;
- printk("Using heuristic of %d cycles.\n",
- cacheflush_time);
-#else
- /* Magic value to force potential preemption of other CPUs. */
- cacheflush_time = INT_MAX;
+ printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
+ cacheflush_time/(freq/1000000),
+ (cacheflush_time*100/(freq/1000000)) % 100);
+ printk("task migration cache decay timeout: %ld msecs.\n",
+ (cache_decay_ticks + 1) * 1000 / HZ);
+}
- printk("Using heuristic of %d cycles.\n",
- cacheflush_time);
-#endif
+/* Wait until hwrpb->txrdy is clear for cpu. Return -1 on timeout. */
+static int __init
+wait_for_txrdy (unsigned long cpumask)
+{
+ unsigned long timeout;
+
+ if (!(hwrpb->txrdy & cpumask))
+ return 0;
+
+ timeout = jiffies + 10*HZ;
+ while (time_before(jiffies, timeout)) {
+ if (!(hwrpb->txrdy & cpumask))
+ return 0;
+ udelay(10);
+ barrier();
+ }
+
+ return -1;
}
/*
* Send a message to a secondary's console. "START" is one such
* interesting message. ;-)
*/
-static void
+static void __init
send_secondary_console_msg(char *str, int cpuid)
{
struct percpu_struct *cpu;
register char *cp1, *cp2;
unsigned long cpumask;
size_t len;
- long timeout;
cpu = (struct percpu_struct *)
((char*)hwrpb
+ cpuid * hwrpb->processor_size);
cpumask = (1UL << cpuid);
- if (hwrpb->txrdy & cpumask)
- goto delay1;
- ready1:
+ if (wait_for_txrdy(cpumask))
+ goto timeout;
cp2 = str;
len = strlen(cp2);
wmb();
set_bit(cpuid, &hwrpb->rxrdy);
- if (hwrpb->txrdy & cpumask)
- goto delay2;
- ready2:
+ if (wait_for_txrdy(cpumask))
+ goto timeout;
return;
-delay1:
- /* Wait 10 seconds. Note that jiffies aren't ticking yet. */
- for (timeout = 1000000; timeout > 0; --timeout) {
- if (!(hwrpb->txrdy & cpumask))
- goto ready1;
- udelay(10);
- barrier();
- }
- goto timeout;
-
-delay2:
- /* Wait 10 seconds. */
- for (timeout = 1000000; timeout > 0; --timeout) {
- if (!(hwrpb->txrdy & cpumask))
- goto ready2;
- udelay(10);
- barrier();
- }
- goto timeout;
-
-timeout:
+ timeout:
printk("Processor %x not ready\n", cpuid);
- return;
}
/*
secondary_cpu_start(int cpuid, struct task_struct *idle)
{
struct percpu_struct *cpu;
- struct pcb_struct *hwpcb;
+ struct pcb_struct *hwpcb, *ipcb;
long timeout;
cpu = (struct percpu_struct *)
+ hwrpb->processor_offset
+ cpuid * hwrpb->processor_size);
hwpcb = (struct pcb_struct *) cpu->hwpcb;
+ ipcb = &idle->thread_info->pcb;
/* Initialize the CPU's HWPCB to something just good enough for
us to get started. Immediately after starting, we'll swpctx
- to the target idle task's ptb. Reuse the stack in the mean
+ to the target idle task's pcb. Reuse the stack in the mean
time. Precalculate the target PCBB. */
- hwpcb->ksp = (unsigned long) idle + sizeof(union task_union) - 16;
+ hwpcb->ksp = (unsigned long)ipcb + sizeof(union thread_union) - 16;
hwpcb->usp = 0;
- hwpcb->ptbr = idle->thread.ptbr;
+ hwpcb->ptbr = ipcb->ptbr;
hwpcb->pcc = 0;
hwpcb->asn = 0;
- hwpcb->unique = virt_to_phys(&idle->thread);
- hwpcb->flags = idle->thread.pal_flags;
+ hwpcb->unique = virt_to_phys(ipcb);
+ hwpcb->flags = ipcb->flags;
hwpcb->res1 = hwpcb->res2 = 0;
#if 0
hwpcb->ksp, hwpcb->ptbr, hwrpb->vptb, hwpcb->unique));
#endif
DBGS(("Starting secondary cpu %d: state 0x%lx pal_flags 0x%lx\n",
- cpuid, idle->state, idle->thread.pal_flags));
+ cpuid, idle->state, ipcb->flags));
/* Setup HWRPB fields that SRM uses to activate secondary CPU */
hwrpb->CPU_restart = __smp_callin;
send_secondary_console_msg("START\r\n", cpuid);
- /* Wait 10 seconds for an ACK from the console. Note that jiffies
- aren't ticking yet. */
- for (timeout = 1000000; timeout > 0; timeout--) {
+ /* Wait 10 seconds for an ACK from the console. */
+ timeout = jiffies + 10*HZ;
+ while (time_before(jiffies, timeout)) {
if (cpu->flags & 1)
goto started;
udelay(10);
printk(KERN_ERR "SMP: Processor %d failed to start.\n", cpuid);
return -1;
-started:
+ started:
DBGS(("secondary_cpu_start: SUCCESS for CPU %d!!!\n", cpuid));
return 0;
}
-static int __init fork_by_hand(void)
+static int __init
+fork_by_hand(void)
{
+ /* Don't care about the contents of regs since we'll never
+ reschedule the forked task. */
struct pt_regs regs;
- /*
- * don't care about the regs settings since
- * we'll never reschedule the forked task.
- */
return do_fork(CLONE_VM|CLONE_PID, 0, ®s, 0);
}
struct task_struct *idle;
long timeout;
- /* Cook up an idler for this guy. Note that the address we give
- to kernel_thread is irrelevant -- it's going to start where
- HWRPB.CPU_restart says to start. But this gets all the other
- task-y sort of data structures set up like we wish. */
- /*
- * We can't use kernel_thread since we must avoid to
- * reschedule the child.
- */
+ /* Cook up an idler for this guy. Note that the address we
+ give to kernel_thread is irrelevant -- it's going to start
+ where HWRPB.CPU_restart says to start. But this gets all
+ the other task-y sort of data structures set up like we
+ wish. We can't use kernel_thread since we must avoid
+ rescheduling the child. */
if (fork_by_hand() < 0)
panic("failed fork for CPU %d", cpuid);
idle = init_task.prev_task;
if (!idle)
panic("No idle process for CPU %d", cpuid);
- if (idle == &init_task)
- panic("idle process is init_task for CPU %d", cpuid);
- idle->processor = cpuid;
- idle->cpus_runnable = 1 << cpuid; /* we schedule the first task manually */
+ init_idle(idle, cpuid);
+ unhash_process(idle);
+
__cpu_logical_map[cpunum] = cpuid;
__cpu_number_map[cpuid] = cpunum;
-
- del_from_runqueue(idle);
- unhash_process(idle);
- init_tasks[cpunum] = idle;
DBGS(("smp_boot_one_cpu: CPU %d state 0x%lx flags 0x%lx\n",
cpuid, idle->state, idle->flags));
- /* The secondary will change this once it is happy. Note that
- secondary_cpu_start contains the necessary memory barrier. */
+ /* Signal the secondary to wait a moment. */
smp_secondary_alive = -1;
/* Whirrr, whirrr, whirrrrrrrrr... */
if (secondary_cpu_start(cpuid, idle))
return -1;
+ /* Notify the secondary CPU it can run calibrate_delay. */
mb();
- /* Notify the secondary CPU it can run calibrate_delay() */
smp_secondary_alive = 0;
- /* We've been acked by the console; wait one second for the task
- to start up for real. Note that jiffies aren't ticking yet. */
- for (timeout = 0; timeout < 1000000; timeout++) {
+ /* We've been acked by the console; wait one second for
+ the task to start up for real. */
+ timeout = jiffies + 1*HZ;
+ while (time_before(jiffies, timeout)) {
if (smp_secondary_alive == 1)
goto alive;
udelay(10);
barrier();
}
- /* we must invalidate our stuff as we failed to boot the CPU */
+ /* We must invalidate our stuff as we failed to boot the CPU. */
__cpu_logical_map[cpunum] = -1;
__cpu_number_map[cpuid] = -1;
- /* the idle task is local to us so free it as we don't use it */
- free_task_struct(idle);
-
printk(KERN_ERR "SMP: Processor %d is stuck.\n", cpuid);
return -1;
-alive:
+ alive:
/* Another "Red Snapper". */
return 0;
}
__cpu_number_map[boot_cpuid] = 0;
__cpu_logical_map[0] = boot_cpuid;
- current->processor = boot_cpuid;
+ current_thread_info()->cpu = boot_cpuid;
smp_store_cpu_info(boot_cpuid);
smp_tune_scheduling(boot_cpuid);
smp_setup_percpu_timer(boot_cpuid);
- init_idle();
-
- /* ??? This should be in init_idle. */
- atomic_inc(&init_mm.mm_count);
- current->active_mm = &init_mm;
-
/* Nothing to do on a UP box, or when told not to. */
if (smp_num_probed == 1 || max_cpus == 0) {
+ cpu_present_mask = 1UL << boot_cpuid;
printk(KERN_INFO "SMP mode deactivated.\n");
return;
}
static void
send_ipi_message(unsigned long to_whom, enum ipi_message_type operation)
{
- long i, j;
-
- /* Reduce the number of memory barriers by doing two loops,
- one to set the bits, one to invoke the interrupts. */
-
- mb(); /* Order out-of-band data and bit setting. */
-
- for (i = 0, j = 1; i < NR_CPUS; ++i, j <<= 1) {
- if (to_whom & j)
- set_bit(operation, &ipi_data[i].bits);
- }
-
- mb(); /* Order bit setting and interrupt. */
+ unsigned long i, set, n;
+
+ set = to_whom & -to_whom;
+ if (to_whom == set) {
+ n = __ffs(set);
+ mb();
+ set_bit(operation, &ipi_data[n].bits);
+ mb();
+ wripir(n);
+ } else {
+ mb();
+ for (i = to_whom; i ; i &= ~set) {
+ set = i & -i;
+ n = __ffs(set);
+ set_bit(operation, &ipi_data[n].bits);
+ }
- for (i = 0, j = 1; i < NR_CPUS; ++i, j <<= 1) {
- if (to_whom & j)
- wripir(i);
+ mb();
+ for (i = to_whom; i ; i &= ~set) {
+ set = i & -i;
+ n = __ffs(set);
+ wripir(n);
+ }
}
}
+/* Data for IPI_MIGRATION. */
+static task_t *migration_task;
+
/* Structure and data for smp_call_function. This is designed to
minimize static memory requirements. Plus it looks cleaner. */
/* Atomicly drop data into a shared pointer. The pointer is free if
it is initially locked. If retry, spin until free. */
-static inline int
+static int
pointer_lock (void *lock, void *data, int retry)
{
void *old, *tmp;
mb();
-again:
+ again:
/* Compare and swap with zero. */
asm volatile (
"1: ldq_l %0,%1\n"
which = ops & -ops;
ops &= ~which;
- which = ffz(~which);
+ which = __ffs(which);
- if (which == IPI_RESCHEDULE) {
+ switch (which) {
+ case IPI_RESCHEDULE:
/* Reschedule callback. Everything to be done
is done by the interrupt return path. */
- }
- else if (which == IPI_CALL_FUNC) {
+ break;
+
+ case IPI_MIGRATION:
+ {
+ task_t *t = migration_task;
+ mb();
+ migration_task = 0;
+ sched_task_migrated(t);
+ break;
+ }
+
+ case IPI_CALL_FUNC:
+ {
struct smp_call_struct *data;
void (*func)(void *info);
void *info;
/* Notify the sending CPU that the task is done. */
mb();
if (wait) atomic_dec (&data->unfinished_count);
- }
- else if (which == IPI_CPU_STOP) {
+ break;
+ }
+
+ case IPI_CPU_STOP:
halt();
- }
- else {
+
+ default:
printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n",
this_cpu, which);
+ break;
}
} while (ops);
send_ipi_message(1UL << cpu, IPI_RESCHEDULE);
}
+void
+smp_migrate_task(int cpu, task_t *t)
+{
+#if DEBUG_IPI_MSG
+ if (cpu == hard_smp_processor_id())
+ printk(KERN_WARNING
+ "smp_migrate_task: Sending IPI to self.\n");
+#endif
+ /* Acquire the migration_task mutex. */
+ pointer_lock(&migration_task, t, 1);
+ send_ipi_message(1UL << cpu, IPI_MIGRATION);
+}
+
void
smp_send_stop(void)
{
- unsigned long to_whom = cpu_present_mask ^ (1UL << smp_processor_id());
+ unsigned long to_whom = cpu_present_mask & ~(1UL << smp_processor_id());
#if DEBUG_IPI_MSG
if (hard_smp_processor_id() != boot_cpu_id)
printk(KERN_WARNING "smp_send_stop: Not on boot cpu.\n");
struct smp_call_struct data;
long timeout;
int num_cpus_to_call;
- long i,j;
data.func = func;
data.info = info;
data.wait = wait;
to_whom &= ~(1L << smp_processor_id());
- for (i = 0, j = 1, num_cpus_to_call = 0; i < NR_CPUS; ++i, j <<= 1)
- if (to_whom & j)
- num_cpus_to_call++;
+ num_cpus_to_call = hweight64(to_whom);
atomic_set(&data.unstarted_count, num_cpus_to_call);
atomic_set(&data.unfinished_count, num_cpus_to_call);
\f
#ifdef CONFIG_DEBUG_SPINLOCK
void
-spin_unlock(spinlock_t * lock)
+_raw_spin_unlock(spinlock_t * lock)
{
mb();
lock->lock = 0;
#endif /* CONFIG_DEBUG_SPINLOCK */
\f
#ifdef CONFIG_DEBUG_RWLOCK
-void write_lock(rwlock_t * lock)
+void _raw_write_lock(rwlock_t * lock)
{
long regx, regy;
int stuck_lock, stuck_reader;
}
}
-void read_lock(rwlock_t * lock)
+void _raw_read_lock(rwlock_t * lock)
{
long regx;
int stuck_lock;