Take advantage of new per-CPU scheme.
#include <asm/asmmacro.h>
#include <asm/offsets.h>
#include <asm/signal.h>
+#include <asm/thread_info.h>
#include "../kernel/minstate.h"
GLOBAL_ENTRY(ia32_ret_from_clone)
PT_REGS_UNWIND_INFO(0)
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
/*
* We need to call schedule_tail() to complete the scheduling process.
* Called by ia64_switch_to after do_fork()->copy_thread(). r8 contains the
* address of the previously executing task.
*/
br.call.sptk.many rp=ia64_invoke_schedule_tail
-.ret1: adds r2=IA64_TASK_PTRACE_OFFSET,r13
+.ret1:
+#endif
+ adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
;;
- ld8 r2=[r2]
+ ld4 r2=[r2]
;;
mov r8=0
- tbit.nz p6,p0=r2,PT_SYSCALLTRACE_BIT
+ tbit.nz p6,p0=r2,TIF_SYSCALL_TRACE
(p6) br.cond.spnt .ia32_strace_check_retval
;; // prevent RAW on r8
END(ia32_ret_from_clone)
shr.u r18=r19,16 // get byte size of existing "dirty" partition
;;
mov r16=ar.bsp // get existing backing store pointer
- movl r17=PERCPU_ADDR+IA64_CPU_PHYS_STACKED_SIZE_P8_OFFSET
+ movl r17=THIS_CPU(ia64_phys_stacked_size_p8)
;;
ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8
(pKern) br.cond.dpnt skip_rbs_switch
br.cond.sptk ia64_leave_kernel
END(handle_syscall_error)
+#ifdef CONFIG_SMP
/*
* Invoke schedule_tail(task) while preserving in0-in7, which may be needed
* in case a system call gets restarted.
br.ret.sptk.many rp
END(ia64_invoke_schedule_tail)
+#endif /* CONFIG_SMP */
+
#if __GNUC__ < 3
/*
#include <asm/processor.h>
# ifndef CONFIG_NUMA
-EXPORT_SYMBOL(_cpu_data);
+EXPORT_SYMBOL(cpu_info);
# endif
EXPORT_SYMBOL(kernel_thread);
mov r3=255
adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024
- adds r2=IA64_TASK_PTRACE_OFFSET,r13 // r2 = ¤t->ptrace
;;
cmp.geu p6,p7=r3,r15 // (syscall > 0 && syscall <= 1024+255) ?
movl r16=sys_call_table
if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
pfm_save_regs(task);
- if (local_cpu_data->pfm_syst_wide) pfm_syst_wide_update_task(task, 0);
+# ifdef CONFIG_SMP
+ if (local_cpu_data->pfm_syst_wide)
+ pfm_syst_wide_update_task(task, 0);
+# endif
#endif
if (IS_IA32_PROCESS(ia64_task_regs(task)))
if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
pfm_load_regs(task);
+# ifdef CONFIG_SMP
if (local_cpu_data->pfm_syst_wide) pfm_syst_wide_update_task(task, 1);
+# endif
#endif
if (IS_IA32_PROCESS(ia64_task_regs(task)))
extern char _end;
-#ifdef CONFIG_NUMA
- struct cpuinfo_ia64 *boot_cpu_data;
-#else
- struct cpuinfo_ia64 _cpu_data[NR_CPUS] __attribute__ ((section ("__special_page_section")));
-#endif
+unsigned long __per_cpu_offset[NR_CPUS];
+struct cpuinfo_ia64 cpu_info __per_cpu_data;
+unsigned long ia64_phys_stacked_size_p8;
unsigned long ia64_cycles_per_usec;
struct ia64_boot_param *ia64_boot_param;
struct screen_info screen_info;
c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
}
+void
+setup_per_cpu_areas (void)
+{
+ /* start_kernel() requires this... */
+}
+
/*
* cpu_init() initializes state that is per-CPU. This function acts
* as a 'CPU state barrier', nothing should get across.
void
cpu_init (void)
{
+ extern char __per_cpu_start[], __phys_per_cpu_start[], __per_cpu_end[];
extern void __init ia64_mmu_init (void *);
unsigned long num_phys_stacked;
pal_vm_info_2_u_t vmi;
unsigned int max_ctx;
- struct cpuinfo_ia64 *my_cpu_data;
-#ifdef CONFIG_NUMA
- int cpu, order;
+ struct cpuinfo_ia64 *my_cpu_info;
+ void *my_cpu_data;
+ int cpu = smp_processor_id();
- /*
- * If NUMA is configured, the cpu_data array is not preallocated. The boot cpu
- * allocates entries for every possible cpu. As the remaining cpus come online,
- * they reallocate a new cpu_data structure on their local node. This extra work
- * is required because some boot code references all cpu_data structures
- * before the cpus are actually started.
- */
- if (!boot_cpu_data) {
- my_cpu_data = alloc_bootmem_pages_node(NODE_DATA(numa_node_id()),
- sizeof(struct cpuinfo_ia64));
- boot_cpu_data = my_cpu_data;
- my_cpu_data->cpu_data[0] = my_cpu_data;
- for (cpu = 1; cpu < NR_CPUS; ++cpu)
- my_cpu_data->cpu_data[cpu]
- = alloc_bootmem_pages_node(NODE_DATA(numa_node_id()),
- sizeof(struct cpuinfo_ia64));
- for (cpu = 1; cpu < NR_CPUS; ++cpu)
- memcpy(my_cpu_data->cpu_data[cpu]->cpu_data,
- my_cpu_data->cpu_data, sizeof(my_cpu_data->cpu_data));
- } else {
- order = get_order(sizeof(struct cpuinfo_ia64));
- my_cpu_data = page_address(alloc_pages_node(numa_node_id(), GFP_KERNEL, order));
- memcpy(my_cpu_data, boot_cpu_data->cpu_data[smp_processor_id()],
- sizeof(struct cpuinfo_ia64));
- __free_pages(virt_to_page(boot_cpu_data->cpu_data[smp_processor_id()]),
- order);
- for (cpu = 0; cpu < NR_CPUS; ++cpu)
- boot_cpu_data->cpu_data[cpu]->cpu_data[smp_processor_id()] = my_cpu_data;
- }
-#else
- my_cpu_data = cpu_data(smp_processor_id());
-#endif
+ my_cpu_data = alloc_bootmem_pages(__per_cpu_end - __per_cpu_start);
+ memcpy(my_cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
+
+ __per_cpu_offset[cpu] = (char *) my_cpu_data - __per_cpu_start;
+
+ my_cpu_info = my_cpu_data + ((char *) &cpu_info - __per_cpu_start);
/*
* We can't pass "local_cpu_data" to identify_cpu() because we haven't called
* depends on the data returned by identify_cpu(). We break the dependency by
* accessing cpu_data() the old way, through identity mapped space.
*/
- identify_cpu(my_cpu_data);
+ identify_cpu(my_cpu_info);
/* Clear the stack memory reserved for pt_regs: */
memset(ia64_task_regs(current), 0, sizeof(struct pt_regs));
printk ("cpu_init: PAL RSE info failed, assuming 96 physical stacked regs\n");
num_phys_stacked = 96;
}
- local_cpu_data->phys_stacked_size_p8 = num_phys_stacked*8 + 8;
-
+ /* size of physical stacked register partition plus 8 bytes: */
+ ia64_phys_stacked_size_p8 = num_phys_stacked*8 + 8;
platform_cpu_init();
}
#define IPI_CALL_FUNC 0
#define IPI_CPU_STOP 1
+/* This needs to be cacheline aligned because it is written to by *other* CPUs. */
+static __u64 ipi_operation __per_cpu_data ____cacheline_aligned;
+
static void
stop_this_cpu (void)
{
handle_IPI (int irq, void *dev_id, struct pt_regs *regs)
{
int this_cpu = smp_processor_id();
- unsigned long *pending_ipis = &local_cpu_data->ipi_operation;
+ unsigned long *pending_ipis = &ipi_operation;
unsigned long ops;
/* Count this now; we may make a call that never returns. */
static inline void
send_IPI_single (int dest_cpu, int op)
{
- set_bit(op, &cpu_data(dest_cpu)->ipi_operation);
+ set_bit(op, &ipi_operation);
platform_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0);
}
print " * This file was generated by arch/ia64/tools/print_offsets.awk."
print " *"
print " */"
- #
- # This is a cheesy hack. Make sure that
- # PT_PTRACED == 1<<PT_PTRACED_BIT.
- #
- print "#define PT_PTRACED_BIT 0"
- print "#define PT_SYSCALLTRACE_BIT 1"
}
# look for .tab:
{ "SIGFRAME_SIZE", sizeof (struct sigframe) },
{ "UNW_FRAME_INFO_SIZE", sizeof (struct unw_frame_info) },
{ "", 0 }, /* spacer */
- { "IA64_TASK_PTRACE_OFFSET", offsetof (struct task_struct, ptrace) },
- { "IA64_TASK_THREAD_OFFSET", offsetof (struct task_struct, thread) },
{ "IA64_TASK_THREAD_KSP_OFFSET", offsetof (struct task_struct, thread.ksp) },
-#ifdef CONFIG_PERFMON
- { "IA64_TASK_PFM_OVFL_BLOCK_RESET_OFFSET",offsetof(struct task_struct, thread.pfm_ovfl_block_reset) },
-#endif
- { "IA64_TASK_PID_OFFSET", offsetof (struct task_struct, pid) },
- { "IA64_TASK_MM_OFFSET", offsetof (struct task_struct, mm) },
{ "IA64_PT_REGS_CR_IPSR_OFFSET", offsetof (struct pt_regs, cr_ipsr) },
{ "IA64_PT_REGS_CR_IIP_OFFSET", offsetof (struct pt_regs, cr_iip) },
{ "IA64_PT_REGS_CR_IFS_OFFSET", offsetof (struct pt_regs, cr_ifs) },
{ "IA64_SIGFRAME_SIGCONTEXT_OFFSET", offsetof (struct sigframe, sc) },
{ "IA64_CLONE_VFORK", CLONE_VFORK },
{ "IA64_CLONE_VM", CLONE_VM },
- { "IA64_CPU_IRQ_COUNT_OFFSET", offsetof (struct cpuinfo_ia64, irq_stat.f.irq_count) },
- { "IA64_CPU_BH_COUNT_OFFSET", offsetof (struct cpuinfo_ia64, irq_stat.f.bh_count) },
- { "IA64_CPU_PHYS_STACKED_SIZE_P8_OFFSET",offsetof (struct cpuinfo_ia64, phys_stacked_size_p8)},
};
static const char *tabs = "\t\t\t\t\t\t\t\t\t\t";
printf ("/*\n * DO NOT MODIFY\n *\n * This file was generated by "
"arch/ia64/tools/print_offsets.\n *\n */\n\n");
- /* This is stretching things a bit, but entry.S needs the bit number
- for PT_PTRACED and it can't include <linux/sched.h> so this seems
- like a reasonably solution. At least the code won't break in
- subtle ways should PT_PTRACED ever change. Ditto for
- PT_TRACESYS_BIT. */
- printf ("#define PT_PTRACED_BIT\t\t\t%u\n", ffs (PT_PTRACED) - 1);
-#if 0
- printf ("#define PT_SYSCALLTRACE_BIT\t\t\t%u\n\n", ffs (PT_SYSCALLTRACE) - 1);
-#endif
-
for (i = 0; i < sizeof (tab) / sizeof (tab[0]); ++i)
{
if (tab[i].name[0] == '\0')
#include <linux/config.h>
+#include <asm/cache.h>
#include <asm/ptrace.h>
#include <asm/system.h>
machvec_end = .;
#endif
- __start___ksymtab = .; /* Kernel symbol table */
- __ksymtab : AT(ADDR(__ksymtab) - PAGE_OFFSET)
- { *(__ksymtab) }
- __stop___ksymtab = .;
-
- __start___kallsyms = .; /* All kernel symbols for debugging */
- __kallsyms : AT(ADDR(__kallsyms) - PAGE_OFFSET)
- { *(__kallsyms) }
- __stop___kallsyms = .;
-
/* Unwind info & table: */
. = ALIGN(8);
.IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - PAGE_OFFSET)
.opd : AT(ADDR(.opd) - PAGE_OFFSET)
{ *(.opd) }
- /* Per-cpu data: */
- __per_cpu_start = .;
- .data.percpu : { *(.data.percpu) }
- __per_cpu_end = .;
-
/* Initialization code and data: */
. = ALIGN(PAGE_SIZE);
.data.init_task : AT(ADDR(.data.init_task) - PAGE_OFFSET)
{ *(.data.init_task) }
- .data.page_aligned : AT(ADDR(.data.page_aligned) - PAGE_OFFSET)
- { *(.data.idt) }
-
- . = ALIGN(64);
+ . = ALIGN(SMP_CACHE_BYTES);
.data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - PAGE_OFFSET)
{ *(.data.cacheline_aligned) }
.kstrtab : AT(ADDR(.kstrtab) - PAGE_OFFSET)
{ *(.kstrtab) }
+ /* Per-cpu data: */
+ . = ALIGN(PAGE_SIZE);
+ __phys_per_cpu_start = .;
+ .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - PAGE_OFFSET)
+ {
+ __per_cpu_start = .;
+ *(.data.percpu)
+ __per_cpu_end = .;
+ }
+ . = __phys_per_cpu_start + 4096; /* ensure percpu fits into smallest page size (4KB) */
+
.data : AT(ADDR(.data) - PAGE_OFFSET)
{ *(.data) *(.gnu.linkonce.d*) CONSTRUCTORS }
{ *(.sbss) *(.scommon) }
.bss : AT(ADDR(.bss) - PAGE_OFFSET)
{ *(.bss) *(COMMON) }
- . = ALIGN(64 / 8);
+
_end = .;
/* Stabs debugging sections. */
/*
* Copyright (C) 1998-2000 Hewlett-Packard Co
- * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * David Mosberger-Tang <davidm@hpl.hp.com>
*/
/* Bytes per L1 (data) cache line. */
#include <linux/config.h>
+#include <linux/compiler.h>
+
#include <asm/ptrace.h>
#include <asm/kregs.h>
#include <asm/system.h>
*/
#define IA64_USEC_PER_CYC_SHIFT 41
+#define __HAVE_ARCH_PER_CPU
+
+#define THIS_CPU(var) (var)
+
#ifndef __ASSEMBLY__
#include <linux/threads.h>
#include <asm/unwind.h>
#include <asm/atomic.h>
+extern unsigned long __per_cpu_offset[NR_CPUS];
+
+#define per_cpu(var, cpu) (*(__typeof__(&(var))) ((void *) &(var) + __per_cpu_offset[cpu]))
+#define this_cpu(var) (var)
+
/* like above but expressed as bitfields for more efficient access: */
struct ia64_psr {
__u64 reserved0 : 1;
* CPU type, hardware bug flags, and per-CPU state. Frequently used
* state comes earlier:
*/
-struct cpuinfo_ia64 {
+extern struct cpuinfo_ia64 {
/* irq_stat must be 64-bit aligned */
union {
struct {
__u64 irq_and_bh_counts;
} irq_stat;
__u32 softirq_pending;
- __u32 phys_stacked_size_p8; /* size of physical stacked registers + 8 */
__u64 itm_delta; /* # of clock cycles between clock ticks */
__u64 itm_next; /* interval timer mask value to use for next clock tick */
__u64 *pgd_quick;
__u64 prof_multiplier;
__u32 pfm_syst_wide;
__u32 pfm_dcr_pp;
- /* this is written to by *other* CPUs: */
- __u64 ipi_operation ____cacheline_aligned;
-#endif
-#ifdef CONFIG_NUMA
- void *node_directory;
- int numa_node_id;
- struct cpuinfo_ia64 *cpu_data[NR_CPUS];
#endif
- /* Platform specific word. MUST BE LAST IN STRUCT */
- __u64 platform_specific;
-} __attribute__ ((aligned (PAGE_SIZE))) ;
+} cpu_info __per_cpu_data;
/*
* The "local" data pointer. It points to the per-CPU data of the currently executing
* CPU, much like "current" points to the per-task data of the currently executing task.
*/
-#define local_cpu_data ((struct cpuinfo_ia64 *) PERCPU_ADDR)
-
-/*
- * On NUMA systems, cpu_data for each cpu is allocated during cpu_init() & is allocated on
- * the node that contains the cpu. This minimizes off-node memory references. cpu_data
- * for each cpu contains an array of pointers to the cpu_data structures of each of the
- * other cpus.
- *
- * On non-NUMA systems, cpu_data is a static array allocated at compile time. References
- * to the cpu_data of another cpu is done by direct references to the appropriate entry of
- * the array.
- */
-#ifdef CONFIG_NUMA
-# define cpu_data(cpu) local_cpu_data->cpu_data[cpu]
-# define numa_node_id() (local_cpu_data->numa_node_id)
-#else
- extern struct cpuinfo_ia64 _cpu_data[NR_CPUS];
-# define cpu_data(cpu) (&_cpu_data[cpu])
-#endif
+#define local_cpu_data (&this_cpu(cpu_info))
+#define cpu_data(cpu) (&per_cpu(cpu_info, cpu))
extern void identify_cpu (struct cpuinfo_ia64 *);
extern void print_cpu_info (struct cpuinfo_ia64 *);
extern spinlock_t kernel_flag;
-#define kernel_locked() spin_is_locked(&kernel_flag)
+#ifdef CONFIG_SMP
+# define kernel_locked() spin_is_locked(&kernel_flag)
+# define check_irq_holder(cpu) \
+do { \
+ if (global_irq_holder == (cpu)) \
+ BUG(); \
+} while (0)
+#else
+# define kernel_locked() (1)
+#endif
/*
* Release global kernel lock and global interrupt lock
*/
-static __inline__ void
-release_kernel_lock(struct task_struct *task, int cpu)
-{
- if (unlikely(task->lock_depth >= 0)) {
- spin_unlock(&kernel_flag);
- if (global_irq_holder == (cpu)) \
- BUG(); \
- }
-}
+#define release_kernel_lock(task, cpu) \
+do { \
+ if (unlikely(task->lock_depth >= 0)) { \
+ spin_unlock(&kernel_flag); \
+ check_irq_holder(cpu); \
+ } \
+} while (0)
/*
* Re-acquire the kernel lock
*/
-static __inline__ void
-reacquire_kernel_lock(struct task_struct *task)
-{
- if (unlikely(task->lock_depth >= 0))
- spin_lock(&kernel_flag);
-}
+#define reacquire_kernel_lock(task) \
+do { \
+ if (unlikely(task->lock_depth >= 0)) \
+ spin_lock(&kernel_flag); \
+} while (0)
/*
* Getting the big kernel lock.