From 89eabf146605aaef62d9b8714a4128f300467858 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Nov 2007 15:10:16 -0500 Subject: [PATCH] Import 1.3.31 --- CREDITS | 3 +- Documentation/SMP.txt | 38 ++ Makefile | 2 +- arch/alpha/kernel/entry.S | 4 +- arch/alpha/kernel/irq.c | 12 +- arch/alpha/kernel/lca.c | 3 + arch/alpha/kernel/time.c | 4 +- arch/i386/Makefile | 1 + arch/i386/boot/compressed/head.S | 29 +- arch/i386/boot/setup.S | 1 + arch/i386/config.in | 2 + arch/i386/kernel/Makefile | 36 +- arch/i386/kernel/entry.S | 161 +++++- arch/i386/kernel/head.S | 56 +- arch/i386/kernel/hexify.c | 31 ++ arch/i386/kernel/irq.c | 60 +- arch/i386/kernel/process.c | 33 +- arch/i386/kernel/setup.c | 97 +++- arch/i386/kernel/smp.c | 918 +++++++++++++++++++++++++++++++ arch/i386/kernel/time.c | 266 ++++++--- arch/i386/kernel/trampoline.S | 75 +++ arch/i386/kernel/trampoline32.S | 21 + arch/i386/kernel/traps.c | 56 +- arch/i386/mm/init.c | 28 + drivers/block/genhd.c | 11 +- drivers/block/ide.c | 3 +- drivers/block/triton.c | 8 +- drivers/char/keyboard.c | 5 + drivers/char/mem.c | 36 +- drivers/char/msbusmouse.c | 11 +- drivers/char/psaux.c | 2 +- drivers/char/random.c | 21 +- drivers/net/3c501.c | 5 +- drivers/net/3c503.c | 68 ++- drivers/net/3c503.h | 6 + drivers/net/3c509.c | 4 +- drivers/net/8390.c | 48 +- drivers/net/8390.h | 21 +- drivers/net/CONFIG | 5 +- drivers/net/ac3200.c | 39 +- drivers/net/e2100.c | 53 +- drivers/net/eql.c | 2 +- drivers/net/hp-plus.c | 64 ++- drivers/net/hp.c | 38 +- drivers/net/loopback.c | 1 + drivers/net/ne.c | 86 ++- drivers/net/plip.c | 16 +- drivers/net/ppp.c | 2 +- drivers/net/smc-ultra.c | 51 +- drivers/net/tulip.c | 2 +- drivers/net/wd.c | 57 +- drivers/scsi/53c7,8xx.c | 2 +- drivers/scsi/eata_dma.c | 7 +- drivers/scsi/eata_dma_proc.c | 8 +- drivers/scsi/eata_generic.h | 3 + drivers/scsi/eata_pio.c | 5 +- drivers/scsi/scsi_proc.c | 4 +- drivers/scsi/sd.c | 2 +- drivers/scsi/sr_ioctl.c | 24 + drivers/scsi/st.c | 31 +- drivers/sound/sound_switch.c | 4 +- fs/binfmt_elf.c | 4 +- fs/ext2/super.c | 2 +- fs/read_write.c | 90 +++ include/asm-alpha/checksum.h | 11 + include/asm-alpha/io.h | 8 + include/asm-alpha/param.h | 9 +- include/asm-alpha/unistd.h | 4 +- include/asm-i386/bitops.h | 17 +- include/asm-i386/checksum.h | 23 +- include/asm-i386/delay.h | 9 + include/asm-i386/io.h | 6 + include/asm-i386/irq.h | 171 ++++++ include/asm-i386/locks.h | 133 +++++ include/asm-i386/page.h | 10 + include/asm-i386/smp.h | 7 +- include/asm-i386/system.h | 56 +- include/asm-i386/unistd.h | 2 + include/linux/cdrom.h | 12 +- include/linux/igmp.h | 1 + include/linux/mroute.h | 51 +- include/linux/proc_fs.h | 2 + include/linux/sched.h | 17 +- include/linux/smp.h | 12 + include/linux/tasks.h | 6 +- include/net/netlink.h | 13 + init/main.c | 76 ++- kernel/exit.c | 11 +- kernel/fork.c | 16 +- kernel/ksyms.c | 5 +- kernel/sched.c | 60 +- kernel/sys.c | 2 +- kernel/time.c | 2 + net/Changes | 23 +- net/Makefile | 2 +- net/ipv4/Makefile | 6 +- net/ipv4/icmp.c | 47 +- net/ipv4/ip.c | 285 +++++++--- net/ipv4/ip_fw.c | 11 +- net/ipv4/ipmr.c | 589 +++++++++++++++++++- net/ipv4/tcp.c | 2 + net/netlink.c | 250 +++++++++ net/socket.c | 12 +- 103 files changed, 4212 insertions(+), 555 deletions(-) create mode 100644 Documentation/SMP.txt create mode 100644 arch/i386/kernel/hexify.c create mode 100644 arch/i386/kernel/smp.c create mode 100644 arch/i386/kernel/trampoline.S create mode 100644 arch/i386/kernel/trampoline32.S create mode 100644 include/asm-i386/locks.h create mode 100644 include/net/netlink.h create mode 100644 net/netlink.c diff --git a/CREDITS b/CREDITS index 704d44de7365..9dfb3f47b621 100644 --- a/CREDITS +++ b/CREDITS @@ -648,7 +648,7 @@ S: East Brunswick, New Jersey 08816 S: USA N: Rick Miller -E: rick@discus.mil.wi.us +E: rick@digalogsys.com D: Linux Device Registrar (Major/minor numbers), "au-play", "bwBASIC" S: S78 W16203 Woods Road S: Muskego, Wisconsin 53150 @@ -753,7 +753,6 @@ S: Germany N: Ken Pizzini E: ken@halcyon.com D: CDROM driver "sonycd535" (Sony CDU-535/531) -S: N: Frederic Potter E: Frederic.Potter@masi.ibp.fr diff --git a/Documentation/SMP.txt b/Documentation/SMP.txt new file mode 100644 index 000000000000..60d5ccf8f49b --- /dev/null +++ b/Documentation/SMP.txt @@ -0,0 +1,38 @@ + +SMP support for Linux with up to 32 processors using the Intel MP +specification. + +WARNING: + This is experimental. Back up your disks first. Build only +with gcc2.5.8. + + +To fix: + +o Fix sys_idle to exit/enter kernel state and do hlt's. +o Fix scheduler decisions to reschedule. Per cpu reschedule ? +o Scheduler ignores stick to CPU advantage. Critical for P6! +o FPU state save/restore - fault any process on FPU and do a + restore on the fault each context switch, do a save if we + faulted that run of the task when we switch away. [DONE] +o Clean up message pass. +o Test for B stepping processors. +o Clean up processor specific/independant split. +o Document it all. [PARTLY DONE] +o Find the exception/crash bug. +o Halt other CPU's on reset/panic doesn't always work. +o Dont waste page at 4K - dont need it now.(watch the GDT code). +o Dump bootup pages once booted somehow. +o Verify message pass safe for >2 CPU's - should be now it + is atomic lock based. +o Dont schedule switches between idle tasks (if current->pid==0 && + next->pid==0 && next!=current) [IN] +o Clean up warnings/volatiles. +o Send CTRL-ALT-DEL to pid 1 not task[1]! [IN] +o Fix load_TR() for non contiguous processor ids +o Iterate over the slave timer requests if one is lost (keep a count per cpu) +o Distribute irq's (locking present just needs the 82489 to be asked + nicely). +o 486 startup code. +o How to handle mixed FPU/non FPU processors. +o Support 4Mb page mode again diff --git a/Makefile b/Makefile index 9b31409960aa..4c6b5f6ff15c 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 1 PATCHLEVEL = 3 -SUBLEVEL = 30 +SUBLEVEL = 31 ARCH = i386 diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index 9359095e17ad..ac0bb78214bb 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -590,7 +590,7 @@ sys_call_table: .quad sys_setsockopt, sys_listen, do_entSys, do_entSys, do_entSys .quad do_entSys, sys_sigsuspend, do_entSys, do_entSys, do_entSys .quad do_entSys, sys_gettimeofday, sys_getrusage, sys_getsockopt, do_entSys - .quad do_entSys, do_entSys, sys_settimeofday, sys_fchown, sys_fchmod + .quad sys_readv, sys_writev, sys_settimeofday, sys_fchown, sys_fchmod .quad sys_recvfrom, sys_setreuid, sys_setregid, sys_rename, sys_truncate .quad sys_ftruncate, do_entSys, sys_setgid, sys_sendto, sys_shutdown .quad sys_socketpair, sys_mkdir, sys_rmdir, sys_utimes, do_entSys @@ -629,6 +629,6 @@ sys_call_table: /* linux-specific system calls start at 300 */ /*300*/ .quad sys_bdflush, sys_sethae, sys_mount, sys_adjtimex, sys_swapoff .quad sys_getdents, sys_create_module, sys_init_module, sys_delete_module, sys_get_kernel_syms - .quad sys_syslog, sys_reboot, sys_clone, do_entSys, do_entSys + .quad sys_syslog, sys_reboot, sys_clone, sys_uselib, do_entSys .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 82dff0f79430..e51660f15b99 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -24,6 +24,8 @@ #include #include +extern void timer_interrupt(struct pt_regs * regs); + static unsigned char cache_21 = 0xff; static unsigned char cache_A1 = 0xff; @@ -223,6 +225,9 @@ int request_irq(unsigned int irq, void (*handler)(int, struct pt_regs *), if (irq >= NR_IRQS) return -EINVAL; + /* don't accept requests for irq #0 */ + if (!irq) + return -EINVAL; action = irq + irq_action; if (action->handler) return -EBUSY; @@ -282,12 +287,16 @@ static void unexpected_irq(int irq, struct pt_regs * regs) if (irq_action[i].handler) printk("[%s:%d] ", irq_action[i].name, i); printk("\n"); +#if defined(CONFIG_ALPHA_JENSEN) printk("64=%02x, 60=%02x, 3fa=%02x 2fa=%02x\n", inb(0x64), inb(0x60), inb(0x3fa), inb(0x2fa)); outb(0x0c, 0x3fc); outb(0x0c, 0x2fc); outb(0,0x61); outb(0,0x461); +#elif defined(CONFIG_ALPHA_NONAME) + printk("61=%02x, 64=%02x, 60=%02x\n", inb(0x61), inb(0x64), inb(0x60)); +#endif } static inline void handle_irq(int irq, struct pt_regs * regs) @@ -563,8 +572,7 @@ asmlinkage void do_entInt(unsigned long type, unsigned long vector, unsigned lon printk("Interprocessor interrupt? You must be kidding\n"); break; case 1: - /* timer interrupt.. */ - handle_irq(0, ®s); + timer_interrupt(®s); return; case 2: machine_check(vector, la_ptr, ®s); diff --git a/arch/alpha/kernel/lca.c b/arch/alpha/kernel/lca.c index 3ae016bd1faf..3ed4ad0a4d21 100644 --- a/arch/alpha/kernel/lca.c +++ b/arch/alpha/kernel/lca.c @@ -290,6 +290,9 @@ void lca_machine_check (unsigned long vector, unsigned long la, struct pt_regs * printk("esr=%lx, ear=%lx, ioc_stat0=%lx, ioc_stat1=%lx\n", *(unsigned long*)LCA_MEM_ESR, *(unsigned long*)LCA_MEM_EAR, *(unsigned long*)LCA_IOC_STAT0, *(unsigned long*)LCA_IOC_STAT1); +#ifdef CONFIG_ALPHA_NONAME + printk("NMMI status & control (0x61)=%02x\n", inb(0x61)); +#endif } #endif /* CONFIG_ALPHA_LCA */ diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index ba425ff21966..7352a6cd347f 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -32,7 +32,7 @@ static int set_rtc_mmss(unsigned long); * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick */ -static void timer_interrupt(int irq, struct pt_regs * regs) +void timer_interrupt(struct pt_regs * regs) { /* last time the cmos clock got updated */ static long last_rtc_update=0; @@ -130,8 +130,6 @@ void time_init(void) year += 100; xtime.tv_sec = mktime(year, mon, day, hour, min, sec); xtime.tv_usec = 0; - if (request_irq(TIMER_IRQ, timer_interrupt, 0, "timer") != 0) - panic("Could not allocate timer IRQ!"); } /* diff --git a/arch/i386/Makefile b/arch/i386/Makefile index d931eeb51d4b..0b6d7f378ad0 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -91,6 +91,7 @@ install: vmlinux archclean: @$(MAKEBOOT) clean + $(MAKE) -C arch/$(ARCH)/kernel clean archdep: @$(MAKEBOOT) dep diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S index b0d26cb5d809..14bf161ab6a3 100644 --- a/arch/i386/boot/compressed/head.S +++ b/arch/i386/boot/compressed/head.S @@ -9,7 +9,12 @@ * * NOTE!!! Startup happens at absolute address 0x00001000, which is also where * the page directory will exist. The startup code will be overwritten by - * the page directory. + * the page directory. [According to comments etc elsewhere on a compressed + * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] + * + * In SMP mode we keep this page safe. Really we ought to shuffle things and + * put the trampoline here. - AC. An SMP trampoline enters with %cx holding + * the stack base. * * Page 0 is deliberately kept safe, since System Management Mode code in * laptops may need to access the BIOS data stored there. This is also @@ -19,10 +24,12 @@ .text #define __ASSEMBLY__ +#include #include #include .globl startup_32 + startup_32: cld cli @@ -31,6 +38,24 @@ startup_32: mov %ax,%es mov %ax,%fs mov %ax,%gs +#ifdef CONFIG_SMP + orw %bx,%bx # What state are we in BX=1 for SMP + # 0 for boot + jz 2f # Initial boot + +/* + * We are trampolining an SMP processor + */ + mov %ax,%ss + xorl %eax,%eax # Back to 0 + mov %cx,%ax # SP low 16 bits + movl %eax,%esp + pushl 0 # Clear NT + popfl + ljmp $(KERNEL_CS), $0x100000 # Into C and sanity + +2: +#endif lss SYMBOL_NAME(stack_start),%esp xorl %eax,%eax 1: incl %eax # check that A20 really IS enabled @@ -58,4 +83,6 @@ startup_32: * Do the decompression, and jump to the new kernel.. */ call SYMBOL_NAME(decompress_kernel) + xorl %ebx,%ebx ljmp $(KERNEL_CS), $0x100000 + diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S index 9a4533a0267f..d12951a79e50 100644 --- a/arch/i386/boot/setup.S +++ b/arch/i386/boot/setup.S @@ -344,6 +344,7 @@ end_move: lmsw ax ! This is it! jmp flush_instr flush_instr: + mov bx,#0 ! Flag to indicate a boot jmpi 0x1000,KERNEL_CS ! jmp offset 1000 of segment 0x10 (cs) ! This routine checks that the keyboard command queue is empty diff --git a/arch/i386/config.in b/arch/i386/config.in index ff92a99d8d41..8a4dad907e90 100644 --- a/arch/i386/config.in +++ b/arch/i386/config.in @@ -40,6 +40,7 @@ fi #if [ "$CONFIG_M586" = "n" ]; then bool 'Use -m486 flag for 486-specific optimizations' CONFIG_M486 y #fi +bool 'SMP Kernel (experimental - gcc2.5.8 only: see Documentation/SMP.txt)' CONFIG_SMP n comment 'Loadable module support' bool 'Set version information on all symbols for modules' CONFIG_MODVERSIONS n @@ -74,6 +75,7 @@ bool 'Amateur Radio AX.25 Level 2' CONFIG_AX25 n if [ "$CONFIG_AX25" = "y" ]; then bool 'Amateur Radio NET/ROM' CONFIG_NETROM n fi +bool 'Kernel/User network link driver(ALPHA)' CONFIG_NETLINK n fi comment 'SCSI support' diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 0996699f8d7a..71333b47dddd 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -7,21 +7,51 @@ # # Note 2! The CFLAGS definitions are now in the main makefile... +AS86 =as86 -0 -a +AS386 =as86 -3 +LD86 =ld86 -0 + + #.S.s: # $(CPP) -D__ASSEMBLY__ -traditional $< -o $*.s .S.o: $(CC) -D__ASSEMBLY__ -traditional -c $< -o $*.o all: kernel.o head.o + O_TARGET := kernel.o O_OBJS := process.o signal.o entry.o traps.o irq.o vm86.o bios32.o \ ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o - -#head.o: head.s +ifdef CONFIG_SMP +O_OBJS += smp.o +endif head.o: head.S $(TOPDIR)/include/linux/tasks.h $(CC) -D__ASSEMBLY__ -traditional -c $*.S -o $*.o -# $(CPP) -traditional -o $*.s $< + +hexify: + $(HOSTCC) hexify.c -o hexify + +trampoline.hex: trampoline hexify + (dd if=trampoline bs=1 skip=32 | ./hexify >trampoline.hex ) + +trampoline: trampoline.o trampoline32.o + $(LD86) -s -o $@ trampoline.o trampoline32.o + +trampoline.o: trampoline.s + $(AS86) -o $@ $< + +trampoline32.o: trampoline32.s + $(AS386) -o $@ $< + +trampoline.s: trampoline.S $(CONFIGURE) $(TOPDIR)/include/linux/config.h Makefile + $(CPP) -traditional $< -o $@ + +trampoline32.s: trampoline32.S $(CONFIGURE) $(TOPDIR)/include/linux/config.h Makefile + $(CPP) -traditional $< -o $@ + +clean: + rm -f trampoline hexify include $(TOPDIR)/Rules.make diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index f09a507d288c..53cb453419aa 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -41,8 +41,11 @@ */ #include +#include #include #include +#define ASSEMBLY +#include EBX = 0x00 ECX = 0x04 @@ -102,13 +105,89 @@ ENOSYS = 38 movl $(USER_DS),%edx; \ mov %dx,%fs; +#ifdef CONFIG_SMP + +#define GET_PROCESSOR_ID \ + movl SYMBOL_NAME(apic_reg), %edx; \ + movl 32(%edx), %eax;\ + movl %eax,SYMBOL_NAME(apic_retval); \ + shrl $24,%eax; \ + andb $0x0F,%al; + +/* + * Get the processor ID multiplied by 4 + */ + +#define GET_PROCESSOR_OFFSET(x) \ + movl SYMBOL_NAME(apic_reg), x ; \ + movl 32( x ), x ; \ + shrl $22, x ; \ + andl $0x3C, x ; + +/* macro LEAVE_KERNEL decrements kernel_counter and resets kernel_flag and + saves processor variables if zero */ +#define LEAVE_KERNEL \ + pushfl; \ + cli; \ + decl SYMBOL_NAME(syscall_count); \ + decl SYMBOL_NAME(kernel_counter); \ + jnz 1f; \ + movb $(NO_PROC_ID), SYMBOL_NAME(active_kernel_processor); \ + lock; \ + btrl $0, SYMBOL_NAME(kernel_flag); \ +1: popfl; + +/* macro ENTER_KERNEL waits for entering the kernel, increments + kernel_counter, and reloads the processor variables if necessary + uses : %eax, %edx (pushed and popped) + + Note: We go to great pains to minimise the number of locked operations. + We want to spin without locking, and lock when we attempt an update. + The pentium has a MESI cache so the spin without lock will exit when + another CPU write invalidates our cache, and the lock is avoided when + possible so we don't play ping-pong games with the cache line. + +*/ + +#define ENTER_KERNEL \ + pushl %eax; \ + pushl %edx; \ + pushfl; \ + cli; \ + GET_PROCESSOR_ID \ +1: lock; \ + btsl $0, SYMBOL_NAME(kernel_flag); \ + jnc 3f; \ + cmpb SYMBOL_NAME(active_kernel_processor), %al; \ + je 4f; \ +2: incl SYMBOL_NAME(smp_spins); \ + btl %al, SYMBOL_NAME(smp_invalidate_needed); \ + jnc 5f; \ + lock; \ + btrl %al, SYMBOL_NAME(smp_invalidate_needed); \ + jnc 5f; \ + movl %cr3,%edx; \ + movl %edx,%cr3; \ +5: btl $0, SYMBOL_NAME(kernel_flag); \ + jc 2b; \ + jmp 1b; \ +3: movb %al, SYMBOL_NAME(active_kernel_processor); \ +4: incl SYMBOL_NAME(kernel_counter); \ + incl SYMBOL_NAME(syscall_count); \ + popfl; \ + popl %edx; \ + popl %eax; + + #define RESTORE_ALL \ cmpw $(KERNEL_CS),CS(%esp); \ je 1f; \ - movl SYMBOL_NAME(current),%eax; \ + GET_PROCESSOR_OFFSET(%edx) \ + movl SYMBOL_NAME(current_set)(,%edx), %eax ; ; \ movl dbgreg7(%eax),%ebx; \ movl %ebx,%db7; \ -1: popl %ebx; \ +1: LEAVE_KERNEL \ + popl %ebx; \ popl %ecx; \ popl %edx; \ popl %esi; \ @@ -122,10 +201,38 @@ ENOSYS = 38 addl $4,%esp; \ iret +#else + +#define RESTORE_ALL \ + cmpw $(KERNEL_CS),CS(%esp); \ + je 1f; \ + movl SYMBOL_NAME(current_set),%eax; \ + movl dbgreg7(%eax),%ebx; \ + movl %ebx,%db7; \ +1: \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ + popl %esi; \ + popl %edi; \ + popl %ebp; \ + popl %eax; \ + pop %ds; \ + pop %es; \ + pop %fs; \ + pop %gs; \ + addl $4,%esp; \ + iret +#endif + + ENTRY(lcall7) pushfl # We get a different stack layout with call gates, pushl %eax # which has to be cleaned up later.. SAVE_ALL +#ifdef CONFIG_SMP + ENTER_KERNEL +#endif movl EIP(%esp),%eax # due to call gates, this is eflags, not eip.. movl CS(%esp),%edx # this is eip.. movl EFLAGS(%esp),%ecx # and this is cs.. @@ -133,7 +240,12 @@ ENTRY(lcall7) movl %edx,EIP(%esp) # Now we move them to their "normal" places movl %ecx,CS(%esp) # movl %esp,%eax - movl SYMBOL_NAME(current),%edx +#ifdef CONFIG_SMP + GET_PROCESSOR_OFFSET(%edx) # Processor offset into edx + movl SYMBOL_NAME(current_set)(,%edx),%edx +#else + movl SYMBOL_NAME(current_set),%edx +#endif pushl %eax movl exec_domain(%edx),%edx # Get the execution domain movl 4(%edx),%edx # Get the lcall7 handler for the domain @@ -153,18 +265,26 @@ handle_bottom_half: ALIGN reschedule: pushl $ret_from_sys_call - jmp SYMBOL_NAME(schedule) + jmp SYMBOL_NAME(schedule) # test ENTRY(system_call) pushl %eax # save orig_eax SAVE_ALL +#ifdef CONFIG_SMP + ENTER_KERNEL +#endif movl $-ENOSYS,EAX(%esp) cmpl $(NR_syscalls),%eax jae ret_from_sys_call movl SYMBOL_NAME(sys_call_table)(,%eax,4),%eax testl %eax,%eax je ret_from_sys_call - movl SYMBOL_NAME(current),%ebx +#ifdef CONFIG_SMP + GET_PROCESSOR_OFFSET(%edx) + movl SYMBOL_NAME(current_set)(,%edx),%ebx +#else + movl SYMBOL_NAME(current_set),%ebx +#endif andl $~CF_MASK,EFLAGS(%esp) # clear carry - assume no errors movl $0,errno(%ebx) movl %db6,%edx @@ -184,7 +304,12 @@ ENTRY(system_call) movl ORIG_EAX(%esp),%eax call SYMBOL_NAME(sys_call_table)(,%eax,4) movl %eax,EAX(%esp) # save the return value - movl SYMBOL_NAME(current),%eax +#ifdef CONFIG_SMP + GET_PROCESSOR_OFFSET(%eax) + movl SYMBOL_NAME(current_set)(,%eax),%eax +#else + movl SYMBOL_NAME(current_set),%eax +#endif movl errno(%eax),%edx negl %edx je 1f @@ -211,7 +336,12 @@ ret_from_sys_call: movl %eax,EFLAGS(%esp) # stupid cmpl $0,SYMBOL_NAME(need_resched) jne reschedule - movl SYMBOL_NAME(current),%eax +#ifdef CONFIG_SMP + GET_PROCESSOR_OFFSET(%eax) + movl SYMBOL_NAME(current_set)(,%eax), %eax +#else + movl SYMBOL_NAME(current_set),%eax +#endif cmpl SYMBOL_NAME(task),%eax # task[0] cannot have signals je 2f cmpl $0,state(%eax) # state @@ -277,8 +407,16 @@ error_code: mov %dx,%es movl $(USER_DS),%edx mov %dx,%fs +#ifdef CONFIG_SMP + ENTER_KERNEL +#endif pushl %eax - movl SYMBOL_NAME(current),%eax +#ifdef CONFIG_SMP + GET_PROCESSOR_OFFSET(%eax) + movl SYMBOL_NAME(current_set)(,%eax), %eax +#else + movl SYMBOL_NAME(current_set),%eax +#endif movl %db6,%edx movl %edx,dbgreg6(%eax) # save current hardware debugging status popl %eax @@ -294,6 +432,9 @@ ENTRY(coprocessor_error) ENTRY(device_not_available) pushl $-1 # mark this as an int SAVE_ALL +#ifdef CONFIG_SMP + ENTER_KERNEL +#endif pushl $ret_from_sys_call movl %cr0,%eax testl $0x4,%eax # EM (math emulation bit) @@ -518,4 +659,6 @@ ENTRY(sys_call_table) .long SYMBOL_NAME(sys_select) .long SYMBOL_NAME(sys_flock) .long SYMBOL_NAME(sys_msync) - .space (NR_syscalls-144)*4 + .long SYMBOL_NAME(sys_readv) /* 145 */ + .long SYMBOL_NAME(sys_writev) + .space (NR_syscalls-146)*4 diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 20587602a76e..3dfe6cad76b7 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -9,6 +9,7 @@ */ .text +#include #include #include #include @@ -32,7 +33,22 @@ startup_32: mov %ax,%es mov %ax,%fs mov %ax,%gs +#ifdef CONFIG_SMP + orw %bx,%bx + jz 1f /* Initial CPU cleans BSS */ +/* + * Set up the stack + */ + mov %ax,%ss + xorl %eax,%eax + movw %cx, %ax + movl %eax,%esp + pushl $0 + popfl + jmp checkCPUtype +1: lss stack_start,%esp +#endif CONFIG_SMP /* * Clear BSS first so that there are no surprises... */ @@ -84,6 +100,10 @@ startup_32: rep movsb 1: +#ifdef CONFIG_SMP +checkCPUtype: +#endif + /* check if it is 486 or 386. */ /* * XXX - this does a lot of unnecessary setup. Alignment checks don't @@ -150,7 +170,32 @@ is386: pushl %ecx # restore original EFLAGS orl $2,%eax # set MP 2: movl %eax,%cr0 call check_x87 +#ifdef CONFIG_SMP + movb ready,%eax + orb %eax,%eax + jz 3f + movl $ SYMBOL_NAME(swapper_pg_dir), %eax + movl %eax, %cr3 +#ifdef GAS_KNOWS_CR4 + movl %cr4,%eax + orl $16,%eax + movl %eax,%cr4 +#else + .byte 0x0f,0x20,0xe0 + orl $16,%eax + .byte 0x0f,0x22,0xe0 +#endif + movl %cr0, %eax + orl $0x80000000, %eax + movl %eax, %cr0 + jmp 4f +#endif +3: call setup_paging +#ifdef CONFIG_SMP + incb ready +#endif +4: lgdt gdt_descr lidt idt_descr ljmp $(KERNEL_CS),$1f @@ -159,7 +204,12 @@ is386: pushl %ecx # restore original EFLAGS mov %ax,%es mov %ax,%fs mov %ax,%gs - lss stack_start,%esp +#ifdef CONFIG_SMP + movl $(KERNEL_DS), %eax + mov %ax,%ss # Reload the stack pointer (segment only) +#else + lss stack_start,%esp # Load processor stack +#endif xorl %eax,%eax lldt %ax pushl %eax # These are the parameters to main :-) @@ -171,6 +221,10 @@ L6: jmp L6 # main should never return here, but # just in case, we know what happens. +#ifdef CONFIG_SMP +ready: .byte 0 +#endif + /* * We depend on ET to be correct. This checks for 287/387. */ diff --git a/arch/i386/kernel/hexify.c b/arch/i386/kernel/hexify.c new file mode 100644 index 000000000000..daa331fec26b --- /dev/null +++ b/arch/i386/kernel/hexify.c @@ -0,0 +1,31 @@ +#include + + +void main() +{ + int c; + int comma=0; + int count=0; + while((c=getchar())!=EOF) + { + unsigned char x=c; + if(comma) + printf(","); + else + comma=1; + if(count==8) + { + count=0; + printf("\n"); + } + if(count==0) + printf("\t"); + printf("0x%02X",c); + count++; + } + if(count) + printf("\n"); + exit(0); +} + + diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 7df807ec2a81..680a1c8a77a6 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -15,6 +15,7 @@ * Naturally it's not a 1:1 relation, but there are similarities. */ +#include #include #include #include @@ -29,6 +30,7 @@ #include #include #include +#include #define CR0_NE 32 @@ -103,19 +105,29 @@ BUILD_IRQ(SECOND,9,0x02) BUILD_IRQ(SECOND,10,0x04) BUILD_IRQ(SECOND,11,0x08) BUILD_IRQ(SECOND,12,0x10) +#ifdef CONFIG_SMP +BUILD_MSGIRQ(SECOND,13,0x20) +#else BUILD_IRQ(SECOND,13,0x20) +#endif BUILD_IRQ(SECOND,14,0x40) BUILD_IRQ(SECOND,15,0x80) +#ifdef CONFIG_SMP +BUILD_RESCHEDIRQ(16) +#endif /* * Pointers to the low-level handlers: first the general ones, then the * fast ones, then the bad ones. */ -static void (*interrupt[16])(void) = { +static void (*interrupt[17])(void) = { IRQ0_interrupt, IRQ1_interrupt, IRQ2_interrupt, IRQ3_interrupt, IRQ4_interrupt, IRQ5_interrupt, IRQ6_interrupt, IRQ7_interrupt, IRQ8_interrupt, IRQ9_interrupt, IRQ10_interrupt, IRQ11_interrupt, - IRQ12_interrupt, IRQ13_interrupt, IRQ14_interrupt, IRQ15_interrupt + IRQ12_interrupt, IRQ13_interrupt, IRQ14_interrupt, IRQ15_interrupt +#ifdef CONFIG_SMP + ,IRQ16_interrupt +#endif }; static void (*fast_interrupt[16])(void) = { @@ -169,11 +181,20 @@ int get_irq_list(char *buf) for (i = 0 ; i < 16 ; i++, action++) { if (!action->handler) continue; - len += sprintf(buf+len, "%2d: %8d %c %s\n", + len += sprintf(buf+len, "%3d: %8d %c %s\n", i, kstat.interrupts[i], (action->flags & SA_INTERRUPT) ? '+' : ' ', action->name); } +/* + * Linus - should you add NMI counts here ????? + */ +#ifdef CONFIG_SMP + len+=sprintf(buf+len, "IPI: %8lu received\n", + ipi_count); + len+=sprintf(buf+len, "LCK: %8lu spins\n", + smp_spins); +#endif return len; } @@ -187,6 +208,10 @@ int get_irq_list(char *buf) asmlinkage void do_IRQ(int irq, struct pt_regs * regs) { struct irqaction * action = irq + irq_action; +#ifdef CONFIG_SMP + if(smp_threads_ready && active_kernel_processor!=smp_processor_id()) + panic("IRQ %d: active processor set wrongly(%d not %d).\n", irq, active_kernel_processor, smp_processor_id()); +#endif kstat.interrupts[irq]++; #ifdef CONFIG_RANDOM @@ -204,6 +229,11 @@ asmlinkage void do_IRQ(int irq, struct pt_regs * regs) asmlinkage void do_fast_IRQ(int irq) { struct irqaction * action = irq + irq_action; +#ifdef CONFIG_SMP + /* IRQ 13 is allowed - thats an invalidate */ + if(smp_threads_ready && active_kernel_processor!=smp_processor_id() && irq!=13) + panic("fast_IRQ %d: active processor set wrongly(%d not %d).\n", irq, active_kernel_processor, smp_processor_id()); +#endif kstat.interrupts[irq]++; #ifdef CONFIG_RANDOM @@ -283,6 +313,8 @@ void free_irq(unsigned int irq) restore_flags(flags); } +#ifndef CONFIG_SMP + /* * Note that on a 486, we don't want to do a SIGFPE on a irq13 * as the irq is unreliable, and exception 16 works correctly @@ -294,6 +326,8 @@ void free_irq(unsigned int irq) * leads to races. IBM designers who came up with it should * be shot. */ + + static void math_error_irq(int cpl, struct pt_regs *regs) { outb(0,0xF0); @@ -302,6 +336,8 @@ static void math_error_irq(int cpl, struct pt_regs *regs) math_error(); } +#endif + static void no_action(int cpl, struct pt_regs * regs) { } unsigned long probe_irq_on (void) @@ -359,6 +395,10 @@ int probe_irq_off (unsigned long irqs) void init_IRQ(void) { int i; + static unsigned char smptrap=0; + if(smptrap) + return; + smptrap=1; /* set the clock to 100 Hz */ outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */ @@ -366,10 +406,20 @@ void init_IRQ(void) outb(LATCH >> 8 , 0x40); /* MSB */ for (i = 0; i < 16 ; i++) set_intr_gate(0x20+i,bad_interrupt[i]); + /* This bit is a hack because we don't send timer messages to all processors yet */ + /* It has to here .. it doesnt work if you put it down the bottom - assembler explodes 8) */ +#ifdef CONFIG_SMP + set_intr_gate(0x20+i, interrupt[i]); /* IRQ '16' - IPI for rescheduling */ +#endif if (request_irq(2, no_action, SA_INTERRUPT, "cascade")) - printk("Unable to get IRQ2 for cascade\n"); + printk("Unable to get IRQ2 for cascade.\n"); +#ifndef CONFIG_SMP if (request_irq(13,math_error_irq, 0, "math error")) - printk("Unable to get IRQ13 for math-error handler\n"); + printk("Unable to get IRQ13 for math-error handler.\n"); +#else + if (request_irq(13, smp_message_irq, SA_INTERRUPT, "IPI")) + printk("Unable to get IRQ13 for IPI.\n"); +#endif request_region(0x20,0x20,"pic1"); request_region(0xa0,0x20,"pic2"); } diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 02bddb183f17..ba2f2301216b 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -8,6 +8,7 @@ * This file handles the architecture-dependent parts of process handling.. */ +#include #include #include #include @@ -24,6 +25,8 @@ #include #include #include +#include + asmlinkage void ret_from_sys_call(void) __asm__("ret_from_sys_call"); @@ -45,12 +48,40 @@ void enable_hlt(void) asmlinkage int sys_idle(void) { if (current->pid != 0) + { + /* printk("Wrong process idled\n"); SMP bug check */ return -EPERM; - + } +#ifdef CONFIG_SMP + /* + * SMP locking sanity checker + */ + if(smp_processor_id()!=active_kernel_processor) + panic("CPU is %d, kernel CPU is %d in sys_idle!\n", + smp_processor_id(), active_kernel_processor); + if(syscall_count!=1) + printk("sys_idle: syscall count is not 1 (%ld)\n", syscall_count); + if(kernel_counter!=1) + { + printk("CPU %d, sys_idle, kernel_counter is %ld\n", smp_processor_id(), kernel_counter); + if(!kernel_counter) + panic("kernel locking botch"); + } + /* + * Until we have C unlocking done + */ + current->counter = -100; + schedule(); + return 0; +#endif /* endless idle loop with no priority at all */ current->counter = -100; for (;;) { +#ifdef CONFIG_SMP + if (cpu_data[smp_processor_id()].hlt_works_ok && !hlt_counter && !need_resched) +#else if (hlt_works_ok && !hlt_counter && !need_resched) +#endif __asm__("hlt"); schedule(); } diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 727df0c16de5..ffffbb2d8965 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -26,6 +26,7 @@ #include #include +#include /* * Tell us the machine setup.. @@ -83,6 +84,13 @@ void setup_arch(char **cmdline_p, unsigned long memory_start, memory_end; char c = ' ', *to = command_line, *from = COMMAND_LINE; int len = 0; + static unsigned char smptrap=0; + + if(smptrap==1) + { + return; + } + smptrap=1; ROOT_DEV = to_kdev_t(ORIG_ROOT_DEV); drive_info = DRIVE_INFO; @@ -148,9 +156,10 @@ int get_cpuinfo(char * buffer) { static const char *model[2][9]={{"DX","SX","DX/2","4","SX/2","6", "DX/2-WB","DX/4"}, - {"Pentium 60/66","Pentium 90/100","3", + {"Pentium 60/66","Pentium 75+","3", "4","5","6","7","8"}}; char mask[2]; +#ifndef CONFIG_SMP mask[0] = x86_mask+'@'; mask[1] = '\0'; return sprintf(buffer,"cpu\t\t: %c86\n" @@ -188,4 +197,90 @@ int get_cpuinfo(char * buffer) x86_capability & 256 ? "yes" : "no", loops_per_sec/500000, (loops_per_sec/5000) % 100 ); +#else + char *bp=buffer; + int i; + bp+=sprintf(bp,"cpu\t\t: "); + for(i=0;i<32;i++) + if(cpu_present_map&(1< + * Supported by Caldera http://www.caldera.com. + * Much of the core SMP work is based on previous work by Thomas Radke, to + * whom a great many thanks are extended. + * + * This code is released under the GNU public license version 2 or + * later. + * + * Fixes + * Felix Koop: NR_CPUS used properly + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern void *vremap(unsigned long offset, unsigned long size); /* Linus hasnt put this in the headers yet */ + +static int smp_found_config=0; /* Have we found an SMP box */ + +unsigned long cpu_present_map = 0; /* Bitmask of existing CPU's */ +int smp_num_cpus; /* Total count of live CPU's */ +int smp_threads_ready=0; /* Set when the idlers are all forked */ +volatile unsigned long cpu_callin_map[NR_CPUS] = {0,}; /* We always use 0 the rest is ready for parallel delivery */ +volatile unsigned long smp_invalidate_needed; /* Used for the invalidate map thats also checked in the spinlock */ +struct cpuinfo_x86 cpu_data[NR_CPUS]; /* Per cpu bogomips and other parameters */ +static unsigned int num_processors = 0; /* Internal processor count */ +static unsigned long io_apic_addr = 0; /* Address of the I/O apic (not yet used) */ +unsigned char boot_cpu_id = 0; /* Processor that is doing the boot up */ +static unsigned char *kstack_base,*kstack_end; /* Kernel stack list pointers */ +static int smp_activated = 0; /* Tripped once we need to start cross invalidating */ +static volatile int smp_commenced=0; /* Tripped when we start scheduling */ +static unsigned char nlong=0; /* Apparent value for boot CPU */ +unsigned char *apic_reg=((unsigned char *)&nlong)-0x20; /* Later set to the vremap() of the APIC */ +unsigned long apic_retval; /* Just debugging the assembler.. */ +unsigned char *kernel_stacks[NR_CPUS]; /* Kernel stack pointers for CPU's (debugging) */ + +static volatile unsigned char smp_cpu_in_msg[NR_CPUS]; /* True if this processor is sending an IPI */ +static volatile unsigned long smp_msg_data; /* IPI data pointer */ +static volatile int smp_src_cpu; /* IPI sender processor */ +static volatile int smp_msg_id; /* Message being sent */ + +volatile unsigned long kernel_flag=0; /* Kernel spinlock */ +volatile unsigned char active_kernel_processor = NO_PROC_ID; /* Processor holding kernel spinlock */ +volatile unsigned long kernel_counter=0; /* Number of times the processor holds the lock */ +volatile unsigned long syscall_count=0; /* Number of times the processor holds the syscall lock */ +volatile unsigned long smp_spins=0; /* Count of cycles wasted to spinning */ + +volatile unsigned long ipi_count; /* Number of IPI's delivered */ + +/* + * Checksum an MP configuration block. + */ + +static int mpf_checksum(unsigned char *mp, int len) +{ + int sum=0; + while(len--) + sum+=*mp++; + return sum&0xFF; +} + +/* + * Processor encoding in an MP configuration block + */ + +static char *mpc_family(int family,int model) +{ + static char n[32]; + static char *model_defs[]= + { + "80486DX","80486DX", + "80486SX","80486DX/2 or 80487", + "80486SL","Intel5X2(tm)", + "Unknown","Unknown", + "80486DX/4" + }; + if(family==0x5) + return("Pentium(tm)"); + if(family==0x0F && model==0x0F) + return("Special controller"); + if(family==0x04 && model<9) + return model_defs[model]; + sprintf(n,"Unknown CPU [%d:%d]",family, model); + return n; +} + +/* + * Read the MPC + */ + +static int smp_read_mpc(struct mp_config_table *mpc) +{ + char str[16]; + int count=sizeof(*mpc); + int apics=0; + unsigned char *mpt=((unsigned char *)mpc)+count; + + if(memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) + { + printk("Bad signature [%c%c%c%c].\n", + mpc->mpc_signature[0], + mpc->mpc_signature[1], + mpc->mpc_signature[2], + mpc->mpc_signature[3]); + return 1; + } + if(mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) + { + printk("Checksum error.\n"); + return 1; + } + if(mpc->mpc_spec!=0x01) + { + printk("Unsupported version (%d)\n",mpc->mpc_spec); + return 1; + } + memcpy(str,mpc->mpc_oem,8); + str[8]=0; + printk("OEM ID: %s ",str); + memcpy(str,mpc->mpc_productid,12); + str[12]=0; + printk("Product ID: %s ",str); + printk("APIC at: 0x%lX\n",mpc->mpc_lapic); + + /* + * Now process the configuration blocks. + */ + + while(countmpc_length) + { + switch(*mpt) + { + case MP_PROCESSOR: + { + struct mpc_config_processor *m= + (struct mpc_config_processor *)mpt; + if(m->mpc_cpuflag&CPU_ENABLED) + { + printk("Processor #%d %s APIC version %d\n", + m->mpc_apicid, + mpc_family((m->mpc_cpufeature& + CPU_FAMILY_MASK)>>8, + (m->mpc_cpufeature& + CPU_MODEL_MASK)>>4), + m->mpc_apicver); + if(m->mpc_featureflag&(1<<0)) + printk(" Floating point unit present.\n"); + if(m->mpc_featureflag&(1<<7)) + printk(" Machine Exception supported.\n"); + if(m->mpc_featureflag&(1<<8)) + printk(" 64 bit compare & exchange supported.\n"); + if(m->mpc_featureflag&(1<<9)) + printk(" Internal APIC present.\n"); + if(m->mpc_cpuflag&CPU_BOOTPROCESSOR) + { + printk(" Bootup CPU\n"); + boot_cpu_id=m->mpc_apicid; + nlong = boot_cpu_id<<24; /* Dummy 'self' for bootup */ + } + else /* Boot CPU already counted */ + num_processors++; + + if(m->mpc_apicid>NR_CPUS) + printk("Processor #%d unused. (Max %d processors).\n",m->mpc_apicid, NR_CPUS); + else + cpu_present_map|=(1<mpc_apicid); + } + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + case MP_BUS: + { + struct mpc_config_bus *m= + (struct mpc_config_bus *)mpt; + memcpy(str,m->mpc_bustype,6); + str[6]=0; + printk("Bus #%d is %s\n", + m->mpc_busid, + str); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + case MP_IOAPIC: + { + struct mpc_config_ioapic *m= + (struct mpc_config_ioapic *)mpt; + if(m->mpc_flags&MPC_APIC_USABLE) + { + apics++; + printk("I/O APIC #%d Version %d at 0x%lX.\n", + m->mpc_apicid,m->mpc_apicver, + m->mpc_apicaddr); + io_apic_addr = m->mpc_apicaddr; + } + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + case MP_INTSRC: + { + struct mpc_config_intsrc *m= + (struct mpc_config_intsrc *)mpt; + + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + case MP_LINTSRC: + { + struct mpc_config_intlocal *m= + (struct mpc_config_intlocal *)mpt; + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + } + } + if(apics>1) + printk("Warning: Multiple APIC's not supported.\n"); + return num_processors; +} + +/* + * Scan the memory blocks for an SMP configuration block. + */ + +void smp_scan_config(unsigned long base, unsigned long length) +{ + unsigned long *bp=(unsigned long *)base; + struct intel_mp_floating *mpf; + num_processors = 1; /* The boot processor */ + +/* printk("Scan SMP from %p for %ld bytes.\n", + bp,length);*/ + if(sizeof(*mpf)!=16) + printk("Error: MPF size\n"); + + while(length>0) + { + if(*bp==SMP_MAGIC_IDENT) + { + mpf=(struct intel_mp_floating *)bp; + if(mpf->mpf_length==1 && + !mpf_checksum((unsigned char *)bp,16) && + mpf->mpf_specification==1) + { + printk("Intel multiprocessing (MPv1.1) available.\n"); + if(mpf->mpf_feature2&(1<<7)) + printk(" IMCR and PIC mode supported.\n"); + smp_found_config=1; + /* + * Now see if we need to read further. + */ + if(mpf->mpf_feature1!=0) + { + num_processors=2; + printk("I/O APIC at 0xFEC00000.\n"); + printk("Bus#0 is "); + } + switch(mpf->mpf_feature1) + { + case 1: + printk("ISA"); + break; + case 2: + printk("EISA with no IRQ8 chaining"); + break; + case 3: + printk("EISA"); + break; + case 4: + printk("MCA"); + break; + case 5: + printk("ISA\nBus#1 is PCI"); + break; + case 6: + printk("EISA\nBus #1 is PCI"); + break; + case 7: + printk("MCA\nBus #1 is PCI"); + break; + case 0: + break; + default: + printk("???\nUnknown standard configuration %d\n", + mpf->mpf_feature1); + return; + } + if(mpf->mpf_physptr) + smp_read_mpc((void *)mpf->mpf_physptr); + printk("Processors: %d\n", num_processors); + } + } + bp+=4; + length-=16; + } +} + +/* + * Trampoline 80x86 program as an array. + */ + +static unsigned char trampoline_data[]={ +#include "trampoline.hex" +}; + +/* + * Currently trivial. Write the real->protected mode + * bootstrap into the page concerned. The caller + * has made sure its suitably aligned. + */ + +static void install_trampoline(unsigned char *mp) +{ + memcpy(mp,trampoline_data,sizeof(trampoline_data)); +} + +/* + * We are called very early to get the low memory for the trampoline/kernel stacks + * This has to be done by mm/init.c to parcel us out nice low memory. We allocate + * the kernel stacks at 4K, 8K, 12K... currently (0-03FF is preserved for SMM and + * other things). + */ + +unsigned long smp_alloc_memory(unsigned long mem_base) +{ + int size=(num_processors-1)*PAGE_SIZE; /* Number of stacks needed */ + /* + * Our stacks have to be below the 1Mb line, and mem_base on entry + * is 4K aligned. + */ + + if(mem_base+size>=0x9F000) + panic("smp_alloc_memory: Insufficient low memory for kernel stacks.\n"); + kstack_base=(void *)mem_base; + mem_base+=size; + kstack_end=(void *)mem_base; + return mem_base; +} + +/* + * Hand out stacks one at a time. + */ + +static void *get_kernel_stack(void) +{ + void *stack=kstack_base; + if(kstack_base>=kstack_end) + return NULL; + kstack_base+=PAGE_SIZE; + return stack; +} + + +/* + * The bootstrap kernel entry code has set these up. Save them for + * a given CPU + */ + +void smp_store_cpu_info(int id) +{ + struct cpuinfo_x86 *c=&cpu_data[id]; + c->hard_math=hard_math; /* Always assumed same currently */ + c->x86=x86; + c->x86_model=x86_model; + c->x86_mask=x86_mask; + c->x86_capability=x86_capability; + c->fdiv_bug=fdiv_bug; + c->wp_works_ok=wp_works_ok; /* Always assumed the same currently */ + c->hlt_works_ok=hlt_works_ok; + c->udelay_val=loops_per_sec; + strcpy(c->x86_vendor_id, x86_vendor_id); +} + +/* + * Architecture specific routine called by the kernel just before init is + * fired off. This allows the BP to have everything in order [we hope]. + * At the end of this all the AP's will hit the system scheduling and off + * we go. Each AP will load the system gdt's and jump through the kernel + * init into idle(). At this point the scheduler will one day take over + * and give them jobs to do. smp_callin is a standard routine + * we use to track CPU's as they power up. + */ + +void smp_commence(void) +{ + /* + * Lets the callin's below out of their loop. + */ + smp_commenced=1; +} + +void smp_callin(void) +{ + int cpuid=GET_APIC_ID(apic_read(APIC_ID)); + unsigned long l; + /* + * Activate our APIC + */ + +/* printk("CALLIN %d\n",smp_processor_id());*/ + l=apic_read(APIC_SPIV); + l|=(1<<8); /* Enable */ + apic_write(APIC_SPIV,l); + sti(); + /* + * Get our bogomips. + */ + calibrate_delay(); + /* + * Save our processor parameters + */ + smp_store_cpu_info(cpuid); + /* + * Allow the master to continue. + */ + set_bit(cpuid, &cpu_callin_map[0]); + /* + * Until we are ready for SMP scheduling + */ + load_ldt(0); +/* printk("Testing faulting...\n"); + *(long *)0=1; OOPS... */ + local_invalidate(); + while(!smp_commenced); + local_invalidate(); +/* printk("Commenced..\n");*/ + + /* This assumes the processor id's are consecutive 0..n-1 - FIXME */ + load_TR(cpuid); +/* while(1);*/ +} + +/* + * Cycle through the processors sending pentium IPI's to boot each. + */ + +void smp_boot_cpus(void) +{ + int i=0; + int cpucount=0; + void *stack; + extern unsigned long init_user_stack[]; + + /* + * Map the local APIC into kernel space + */ + + apic_reg = vremap(0xFEE00000,4096); + + + if(apic_reg == NULL) + panic("Unable to map local apic.\n"); + + /* + * Now scan the cpu present map and fire up anything we find. + */ + + + kernel_stacks[boot_cpu_id]=(void *)init_user_stack; /* Set up for boot processor first */ + + smp_store_cpu_info(boot_cpu_id); /* Final full version of the data */ + + active_kernel_processor=boot_cpu_id; + + for(i=0;i>12); /* Boot on the stack */ + apic_write(APIC_ICR, cfg); /* Kick the second */ + udelay(10); /* Masses of time */ + cfg=apic_read(APIC_ESR); + if(cfg&4) /* Send accept error */ + printk("Processor refused startup request.\n"); + else + { + for(timeout=0;timeout<50000;timeout++) + { + if(cpu_callin_map[0]&(1<utime++; + if (current->pid) + { + if (current->priority < 15) + kstat.cpu_nice++; + else + kstat.cpu_user++; + } + /* Update ITIMER_VIRT for current task if not in a system call */ + if (current->it_virt_value && !(--current->it_virt_value)) { + current->it_virt_value = current->it_virt_incr; + send_sig(SIGVTALRM,current,1); + } + } else { + current->stime++; + if(current->pid) + kstat.cpu_system++; +#ifdef CONFIG_PROFILE + if (prof_buffer && current->pid) { + extern int _stext; + unsigned long eip = regs->eip - (unsigned long) &_stext; + eip >>= CONFIG_PROFILE_SHIFT; + if (eip < prof_len) + prof_buffer[eip]++; + } +#endif + } + /* + * check the cpu time limit on the process. + */ + if ((current->rlim[RLIMIT_CPU].rlim_max != RLIM_INFINITY) && + (((current->stime + current->utime) / HZ) >= current->rlim[RLIMIT_CPU].rlim_max)) + send_sig(SIGKILL, current, 1); + if ((current->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) && + (((current->stime + current->utime) % HZ) == 0)) { + unsigned long psecs = (current->stime + current->utime) / HZ; + /* send when equal */ + if (psecs == current->rlim[RLIMIT_CPU].rlim_cur) + send_sig(SIGXCPU, current, 1); + /* and every five seconds thereafter. */ + else if ((psecs > current->rlim[RLIMIT_CPU].rlim_cur) && + ((psecs - current->rlim[RLIMIT_CPU].rlim_cur) % 5) == 0) + send_sig(SIGXCPU, current, 1); + } + + /* Update ITIMER_PROF for the current task */ + if (current->it_prof_value && !(--current->it_prof_value)) { + current->it_prof_value = current->it_prof_incr; + send_sig(SIGPROF,current,1); + } + + + /* + * Don't reschedule if we are in an interrupt... + * [This is test code and not needed in the end] + */ + +/* if(intr_count==1) + {*/ + + /* + * See if the slave processors need a schedule. + */ + + if ( 0 > --current->counter || current->pid == 0) + { + current->counter = 0; + need_resched=1; + } +/* }*/ + + /* + * Clear the IPI + */ + apic_read(APIC_SPIV); /* Dummy read */ + apic_write(APIC_EOI, 0); /* Docs say use 0 for future compatibility */ +} + +/* + * Message call back. + */ + +void smp_message_irq(int cpl, struct pt_regs *regs) +{ + static int n=0; + int i=smp_processor_id(); +/* if(n++%d(%d,%ld)\n",smp_src_cpu,i,smp_msg_id,smp_msg_data);*/ + switch(smp_msg_id) + { + case 0: /* IRQ 13 testing - boring */ + return; + + /* + * A TLB flush is needed. + */ + + case MSG_INVALIDATE_TLB: + if(clear_bit(i,&smp_invalidate_needed)) + local_invalidate(); + set_bit(i, &cpu_callin_map[0]); + cpu_callin_map[0]|=1< last_rtc_update + 660 && - xtime.tv_usec > 500000 - (tick >> 1) && - xtime.tv_usec < 500000 + (tick >> 1)) - if (set_rtc_mmss(xtime.tv_sec) == 0) - last_rtc_update = xtime.tv_sec; - else - last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */ -} + __asm__("divl %2" + :"=a" (quotient), "=d" (remainder) + :"r" (jiffies), + "0" (time_low), "1" (time_high)); -/* Converts Gregorian date to seconds since 1970-01-01 00:00:00. - * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 - * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. - * - * [For the Julian calendar (which was used in Russia before 1917, - * Britain & colonies before 1752, anywhere else before 1582, - * and is still in use by some communities) leave out the - * -year/100+year/400 terms, and add 10.] - * - * This algorithm was first published by Gauss (I think). - * - * WARNING: this function will overflow on 2106-02-07 06:28:16 on - * machines were long is 32-bit! (However, as time_t is signed, we - * will already get problems at other places on 2038-01-19 03:14:08) - */ -static inline unsigned long mktime(unsigned int year, unsigned int mon, - unsigned int day, unsigned int hour, - unsigned int min, unsigned int sec) -{ - if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */ - mon += 12; /* Puts Feb last since it has leap day */ - year -= 1; - } - return ((( - (unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day) + - year*365 - 719499 - )*24 + hour /* now have hours */ - )*60 + min /* now have minutes */ - )*60 + sec; /* finally seconds */ -} + /* Read the time counter */ + __asm__(".byte 0x0f,0x31" + :"=a" (time_low), "=d" (time_high)); -void time_init(void) -{ - unsigned int year, mon, day, hour, min, sec; - int i; + /* .. relative to previous jiffy (32 bits is enough) */ + time_low -= (unsigned long) last_timer_cc; - /* The Linux interpretation of the CMOS clock register contents: - * When the Update-In-Progress (UIP) flag goes from 1 to 0, the - * RTC registers show the second which has precisely just started. - * Let's hope other operating systems interpret the RTC the same way. + /* + * Time offset = (1000000/HZ * remainder) / quotient. */ - /* read RTC exactly on falling edge of update flag */ - for (i = 0 ; i < 1000000 ; i++) /* may take up to 1 second... */ - if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) - break; - for (i = 0 ; i < 1000000 ; i++) /* must try at least 2.228 ms */ - if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)) - break; - do { /* Isn't this overkill ? UIP above should guarantee consistency */ - sec = CMOS_READ(RTC_SECONDS); - min = CMOS_READ(RTC_MINUTES); - hour = CMOS_READ(RTC_HOURS); - day = CMOS_READ(RTC_DAY_OF_MONTH); - mon = CMOS_READ(RTC_MONTH); - year = CMOS_READ(RTC_YEAR); - } while (sec != CMOS_READ(RTC_SECONDS)); - if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) - { - BCD_TO_BIN(sec); - BCD_TO_BIN(min); - BCD_TO_BIN(hour); - BCD_TO_BIN(day); - BCD_TO_BIN(mon); - BCD_TO_BIN(year); - } - if ((year += 1900) < 1970) - year += 100; - xtime.tv_sec = mktime(year, mon, day, hour, min, sec); - xtime.tv_usec = 0; - if (request_irq(TIMER_IRQ, timer_interrupt, 0, "timer") != 0) - panic("Could not allocate timer IRQ!"); + __asm__("mull %1\n\t" + "divl %2" + :"=a" (quotient), "=d" (remainder) + :"r" (quotient), + "0" (time_low), "1" (1000000/HZ)); + + /* + * Due to rounding errors (and jiffies inconsistencies), + * we need to check the result so that we'll get a timer + * that is monotonous. + */ + if (quotient >= 1000000/HZ) + quotient = 1000000/HZ-1; + return quotient; } /* This function must be called with interrupts disabled @@ -160,7 +114,7 @@ void time_init(void) #define TICK_SIZE tick -static inline unsigned long do_gettimeoffset(void) +static unsigned long do_slow_gettimeoffset(void) { int count; unsigned long offset = 0; @@ -181,6 +135,8 @@ static inline unsigned long do_gettimeoffset(void) return offset + count; } +static unsigned long (*do_gettimeoffset)(void) = do_slow_gettimeoffset; + /* * This version of gettimeofday has near microsecond resolution. */ @@ -280,3 +236,133 @@ static int set_rtc_mmss(unsigned long nowtime) return retval; } + +/* last time the cmos clock got updated */ +static long last_rtc_update = 0; + +/* + * timer_interrupt() needs to keep up the real-time clock, + * as well as call the "do_timer()" routine every clocktick + */ +static inline void timer_interrupt(int irq, struct pt_regs * regs) +{ + do_timer(regs); + + /* + * If we have an externally synchronized Linux clock, then update + * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be + * called as close as possible to 500 ms before the new second starts. + */ + if (time_state != TIME_BAD && xtime.tv_sec > last_rtc_update + 660 && + xtime.tv_usec > 500000 - (tick >> 1) && + xtime.tv_usec < 500000 + (tick >> 1)) + if (set_rtc_mmss(xtime.tv_sec) == 0) + last_rtc_update = xtime.tv_sec; + else + last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */ + /* As we return to user mode fire off the other CPU schedulers.. this is + basically because we don't yet share IRQ's around. This message is + rigged to be safe on the 386 - basically its a hack, so don't look + closely for now.. */ + smp_message_pass(MSG_ALL_BUT_SELF, MSG_RESCHEDULE, 0L, 0); + +} + +/* + * This is the same as the above, except we _also_ save the current + * cycle counter value at the time of the timer interrupt, so that + * we later on can estimate the time of day more exactly. + */ +static void pentium_timer_interrupt(int irq, struct pt_regs * regs) +{ + /* read Pentium cycle counter */ + __asm__(".byte 0x0f,0x31" + :"=a" (((unsigned long *) &last_timer_cc)[0]), + "=d" (((unsigned long *) &last_timer_cc)[1])); + timer_interrupt(irq, regs); +} + +/* Converts Gregorian date to seconds since 1970-01-01 00:00:00. + * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 + * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. + * + * [For the Julian calendar (which was used in Russia before 1917, + * Britain & colonies before 1752, anywhere else before 1582, + * and is still in use by some communities) leave out the + * -year/100+year/400 terms, and add 10.] + * + * This algorithm was first published by Gauss (I think). + * + * WARNING: this function will overflow on 2106-02-07 06:28:16 on + * machines were long is 32-bit! (However, as time_t is signed, we + * will already get problems at other places on 2038-01-19 03:14:08) + */ +static inline unsigned long mktime(unsigned int year, unsigned int mon, + unsigned int day, unsigned int hour, + unsigned int min, unsigned int sec) +{ + if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */ + mon += 12; /* Puts Feb last since it has leap day */ + year -= 1; + } + return ((( + (unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day) + + year*365 - 719499 + )*24 + hour /* now have hours */ + )*60 + min /* now have minutes */ + )*60 + sec; /* finally seconds */ +} + +void time_init(void) +{ + void (*irq_handler)(int, struct pt_regs *); + unsigned int year, mon, day, hour, min, sec; + int i; + + /* The Linux interpretation of the CMOS clock register contents: + * When the Update-In-Progress (UIP) flag goes from 1 to 0, the + * RTC registers show the second which has precisely just started. + * Let's hope other operating systems interpret the RTC the same way. + */ + /* read RTC exactly on falling edge of update flag */ + for (i = 0 ; i < 1000000 ; i++) /* may take up to 1 second... */ + if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) + break; + for (i = 0 ; i < 1000000 ; i++) /* must try at least 2.228 ms */ + if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)) + break; + do { /* Isn't this overkill ? UIP above should guarantee consistency */ + sec = CMOS_READ(RTC_SECONDS); + min = CMOS_READ(RTC_MINUTES); + hour = CMOS_READ(RTC_HOURS); + day = CMOS_READ(RTC_DAY_OF_MONTH); + mon = CMOS_READ(RTC_MONTH); + year = CMOS_READ(RTC_YEAR); + } while (sec != CMOS_READ(RTC_SECONDS)); + if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + { + BCD_TO_BIN(sec); + BCD_TO_BIN(min); + BCD_TO_BIN(hour); + BCD_TO_BIN(day); + BCD_TO_BIN(mon); + BCD_TO_BIN(year); + } + if ((year += 1900) < 1970) + year += 100; + xtime.tv_sec = mktime(year, mon, day, hour, min, sec); + xtime.tv_usec = 0; + + /* If we have the CPU hardware time counters, use them */ + irq_handler = timer_interrupt; + if (x86_capability & 16) { + irq_handler = pentium_timer_interrupt; + do_gettimeoffset = do_fast_gettimeoffset; + /* read Pentium cycle counter */ + __asm__(".byte 0x0f,0x31" + :"=a" (((unsigned long *) &init_timer_cc)[0]), + "=d" (((unsigned long *) &init_timer_cc)[1])); + } + if (request_irq(TIMER_IRQ, irq_handler, 0, "timer") != 0) + panic("Could not allocate timer IRQ!"); +} diff --git a/arch/i386/kernel/trampoline.S b/arch/i386/kernel/trampoline.S new file mode 100644 index 000000000000..8eb8c921f98a --- /dev/null +++ b/arch/i386/kernel/trampoline.S @@ -0,0 +1,75 @@ +! +! Trampoline.S Derived from Setup.S by Linus Torvalds +! +! Entry: CS:IP point to the start of our code, we are +! in real mode with no stack, but the rest of the +! trampoline page to make our stack and everything else +! is a mystery. +! +! In fact we don't actually need a stack so we don't +! set one up. +! +! We jump into the boot/compressed/head.S code. So you'd +! better be running a compressed kernel image or you +! won't get very far. +! +#define __ASSEMBLY__ +#include +#include + +.text + extrn startup32 + +entry start +start: +! nop +! jmp start ! Test + mov ax,cs ! Code and data in the same place + mov ds,ax ! + mov cx,ax ! Pass stack info to the 32bit boot + add cx,cx + add cx,cx + add cx,cx + add cx,cx ! Segment -> Offset + add cx, #4096 ! End of page is wanted + mov bx,#1 ! Flag an SMP trampoline + cli ! We should be safe anyway + + lidt idt_48 ! load idt with 0,0 + lgdt gdt_48 ! load gdt with whatever is appropriate + + xor ax,ax + inc ax ! protected mode (PE) bit + lmsw ax ! Into protected mode + jmp flush_instr +flush_instr: + jmpi 8192+startup32,KERNEL_CS ! Jump to the 32bit trampoline code +! jmpi 0x100000,KERNEL_CS ! Jump into the 32bit startup +! .byte 0x66,0x67 ! 32bit +! .byte 0xea,0x00,0x00,0x10,0x00,0x10,0x00 !jmpi .0x100000,KERNEL_CS + +gdt: + .word 0,0,0,0 ! dummy + + .word 0,0,0,0 ! unused + + .word 0x07FF ! 8Mb - limit=2047 (2048*4096=8Mb) + .word 0x0000 ! base address=0 + .word 0x9A00 ! code read/exec + .word 0x00C0 ! granularity=4096, 386 + + .word 0x07FF ! 8Mb - limit=2047 (2048*4096=8Mb) + .word 0x0000 ! base address=0 + .word 0x9200 ! data read/write + .word 0x00C0 ! granularity=4096, 386 + +idt_48: + .word 0 ! idt limit=0 + .word 0,0 ! idt base=0L + +gdt_48: + .word 0x800 ! gdt limit=2048, 256 GDT entries + .word 8192+gdt,0x0 ! gdt base = 8192+gdt (first SMP CPU) + ! we load the others with the first table + ! saves rewriting gdt_48 for each + diff --git a/arch/i386/kernel/trampoline32.S b/arch/i386/kernel/trampoline32.S new file mode 100644 index 000000000000..e33aac6b7572 --- /dev/null +++ b/arch/i386/kernel/trampoline32.S @@ -0,0 +1,21 @@ +! +! 32bit side of the trampoline code +! +#define __ASSEMBLY__ +#include +#include +! +! +! Anything but a relative address here will be wrong by 8K... +! + .globl startup32 +.text +startup32: +! Run the kernel + mov eax,#KERNEL_DS + mov ds,ax + mov eax,#0xA5A5A5A5 + mov [8192],eax + jmpi 0x100000,KERNEL_CS +l1: + .byte 0xEA,0x00,0x00,0x10,0x00,0x10,0x00 diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index ab57e2c70a42..70474fdf6e32 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -10,6 +10,7 @@ * to mainly kill the offending process (probably by giving it a signal, * but possibly by killing it outright if necessary). */ +#include #include #include #include @@ -111,6 +112,7 @@ int kstack_depth_to_print = 24; } console_verbose(); printk("%s: %04lx\n", str, err & 0xffff); + printk("CPU: %d\n", smp_processor_id()); printk("EIP: %04x:%08lx\nEFLAGS: %08lx\n", 0xffff & regs->cs,regs->eip,regs->eflags); printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", regs->eax, regs->ebx, regs->ecx, regs->edx); @@ -190,11 +192,15 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) asmlinkage void do_nmi(struct pt_regs * regs, long error_code) { +#ifdef CONFIG_SMP_NMI_INVAL + smp_invalidate_rcv(); +#else #ifndef CONFIG_IGNORE_NMI printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); printk("You probably have a hardware problem with your RAM chips or a\n"); printk("power saving mode enabled.\n"); #endif +#endif } asmlinkage void do_debug(struct pt_regs * regs, long error_code) @@ -236,7 +242,19 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code) void math_error(void) { struct i387_hard_struct * env; - +#ifdef CONFIG_SMP + env=¤t->tss.i387.hard; + send_sig(SIGFPE, current, 1); + /* + * Save the info for the exception handler + */ + __asm__ __volatile__("fnsave %0":"=m" (*env)); + current->flags&=~PF_USEDFPU; + /* + * Cause a trap if they use the FPU again. + */ + stts(); +#else clts(); if (!last_task_used_math) { __asm__("fnclex"); @@ -253,6 +271,7 @@ void math_error(void) env->fos = env->twd; env->swd &= 0xffff3800; env->twd = 0xffffffff; +#endif } asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code) @@ -270,6 +289,30 @@ asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code) */ asmlinkage void math_state_restore(void) { +#ifdef CONFIG_SMP +/* + * SMP is actually simpler than uniprocessor for once. Because + * we can't pull the delayed FPU switching trick Linus does + * we simply have to do the restore each context switch and + * set the flag. switch_to() will always save the state in + * case we swap processors. We also don't use the coprocessor + * timer - IRQ 13 mode isnt used with SMP machines (thank god). + * + * If this actually works it will be a miracle however + */ + __asm__ __volatile__("clts"); /* Allow maths ops (or we recurse) */ + if(current->used_math) + __asm__("frstor %0": :"m" (current->tss.i387)); + else + { + /* + * Our first FPU usage, clean the chip. + */ + __asm__("fninit"); + current->used_math = 1; + } + current->flags|=PF_USEDFPU; /* So we fnsave on switch_to() */ +#else __asm__ __volatile__("clts"); if (last_task_used_math == current) return; @@ -287,6 +330,7 @@ asmlinkage void math_state_restore(void) current->used_math=1; } timer_active &= ~(1< #include #include +#include #include #include @@ -111,10 +112,27 @@ unsigned long paging_init(unsigned long start_mem, unsigned long end_mem) * and SMM (for laptops with [34]86/SL chips) may need it. It is read * and write protected to detect null pointer references in the * kernel. + * It may also hold the MP configuration table when we are booting SMP. */ #if 0 memset((void *) 0, 0, PAGE_SIZE); #endif +#ifdef CONFIG_SMP + smp_scan_config(0x0,0x400); /* Scan the bottom 1K for a signature */ + /* + * FIXME: Linux assumes you have 640K of base ram.. this continues + * the error... + */ + smp_scan_config(639*0x400,0x400); /* Scan the top 1K of base RAM */ + smp_scan_config(0xF0000,0x10000); /* Scan the 64K of bios */ + /* + * If it is an SMP machine we should know now, unless the configuration + * is in an EISA/MCA bus machine with an extended bios data area. I don't + * have such a machine so someone else can fill in the check of the EBDA + * here. + */ +/* smp_alloc_memory(8192); */ +#endif #ifdef CONFIG_TEST_VERIFY_AREA wp_works_ok = 0; #endif @@ -123,6 +141,7 @@ unsigned long paging_init(unsigned long start_mem, unsigned long end_mem) pg_dir = swapper_pg_dir; while (address < end_mem) { #ifdef CONFIG_PENTIUM_MM +#ifndef CONFIG_SMP if (address <= end_mem + 4*1024*1024 && (x86_capability & 8)) { #ifdef GAS_KNOWS_CR4 @@ -143,6 +162,7 @@ unsigned long paging_init(unsigned long start_mem, unsigned long end_mem) address += 4*1024*1024; continue; } +#endif #endif /* map the memory at virtual addr 0xC0000000 */ pg_table = (pte_t *) (PAGE_MASK & pgd_val(pg_dir[768])); @@ -184,6 +204,14 @@ void mem_init(unsigned long start_mem, unsigned long end_mem) /* mark usable pages in the mem_map[] */ start_low_mem = PAGE_ALIGN(start_low_mem); + +#ifdef CONFIG_SMP + /* + * But first pinch a few for the stack/trampoline stuff + */ + start_low_mem += PAGE_SIZE; /* 32bit startup code */ + start_low_mem = smp_alloc_memory(start_low_mem); /* AP processor stacks */ +#endif start_mem = PAGE_ALIGN(start_mem); /* diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c index 5cfa29869eca..291e4bc5f45d 100644 --- a/drivers/block/genhd.c +++ b/drivers/block/genhd.c @@ -88,7 +88,7 @@ static void extended_partition(struct gendisk *hd, kdev_t dev) this_sector = first_sector; while (1) { - if ((current_minor & mask) >= hd->max_p) + if ((current_minor & mask) == 0) return; if (!(bh = bread(dev,0,1024))) return; @@ -134,7 +134,7 @@ static void extended_partition(struct gendisk *hd, kdev_t dev) add_partition(hd, current_minor, this_sector+p->start_sect, p->nr_sects); current_minor++; - if ((current_minor & mask) >= hd->max_p) + if ((current_minor & mask) == 0) goto done; } /* @@ -254,8 +254,6 @@ check_table: if (!p->nr_sects) continue; add_partition(hd, minor, first_sector+p->start_sect, p->nr_sects); - if ((current_minor & 0x3f) >= 60) - continue; if (p->sys_ind == EXTENDED_PARTITION) { printk(" <"); /* @@ -280,7 +278,7 @@ check_table: p = (struct partition *) (0x1be + data); for (i = 4 ; i < 16 ; i++, current_minor++) { p--; - if ((current_minor & mask) >= mask-2) + if ((current_minor & mask) == 0) break; if (!(p->start_sect && p->nr_sects)) continue; @@ -299,6 +297,7 @@ check_table: static int osf_partition(struct gendisk *hd, unsigned int dev, unsigned long first_sector) { int i; + int mask = (1 << hd->minor_shift) - 1; struct buffer_head *bh; struct disklabel { u32 d_magic; @@ -351,6 +350,8 @@ static int osf_partition(struct gendisk *hd, unsigned int dev, unsigned long fir return 0; } for (i = 0 ; i < label->d_npartitions; i++, partition++) { + if ((current_minor & mask) == 0) + break; if (partition->p_size) add_partition(hd, current_minor, first_sector+partition->p_offset, diff --git a/drivers/block/ide.c b/drivers/block/ide.c index c1caf7fac353..67bb831793f5 100644 --- a/drivers/block/ide.c +++ b/drivers/block/ide.c @@ -149,10 +149,9 @@ * Driver compile-time options are in ide.h * * To do, in likely order of completion: - * - figure out why Mitsumi ATAPI cdroms are having trouble.. * - add ioctls to get/set interface timings on cmd640, ht6560b, triton * - modify kernel to obtain BIOS geometry for drives on 2nd/3rd/4th i/f - * - improved CMD support: probably handing this off to someone else + * - improved CMD support: handed this off to someone else * - find someone to work on IDE *tape drive* support */ diff --git a/drivers/block/triton.c b/drivers/block/triton.c index 3d29a5cdb9c6..7067e820099e 100644 --- a/drivers/block/triton.c +++ b/drivers/block/triton.c @@ -44,8 +44,14 @@ * Testing was done with an ASUS P55TP4XE/100 system and the following drives: * * Quantum Fireball 1080A (1Gig w/83kB buffer), DMA mode2, PIO mode4. - * - DMA mode2 works fine (7.4MB/sec), despite the tiny on-drive buffer. + * - DMA mode2 works well (7.4MB/sec), despite the tiny on-drive buffer. * - This drive also does PIO mode4, at about the same speed as DMA mode2. + * An awesome drive for the price! + * + * Fujitsu M1606TA (1Gig w/256kB buffer), DMA mode2, PIO mode4. + * - DMA mode2 gives horrible performance (1.6MB/sec), despite the good + * size of the on-drive buffer and a boasted 10ms average access time. + * - PIO mode4 was better, but peaked at a mere 4.5MB/sec. * * Micropolis MC2112A (1Gig w/508kB buffer), drive pre-dates EIDE and ATA2. * - DMA works fine (2.2MB/sec), probably due to the large on-drive buffer. diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c index 81d837bbac63..611608f6b32b 100644 --- a/drivers/char/keyboard.c +++ b/drivers/char/keyboard.c @@ -1186,6 +1186,11 @@ unsigned long kbd_init(unsigned long kmem_start) request_irq(KEYBOARD_IRQ, keyboard_interrupt, 0, "keyboard"); request_region(0x60,16,"kbd"); #ifdef __alpha__ + /* if there is an input byte left, eat it up: */ + if (inb(0x64) & 0x01) { + inb(0x60); + } + /* enable keyboard interrupts, PC/AT mode */ kb_wait(); outb(0x60,0x64); /* write PS/2 Mode Register */ diff --git a/drivers/char/mem.c b/drivers/char/mem.c index eca107b838ed..78583ddc6094 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -26,7 +26,7 @@ extern long soundcard_init(long mem_start); #endif -static int read_ram(struct inode * inode, struct file * file,char * buf, int count) +static int read_ram(struct inode * inode, struct file * file, char * buf, int count) { return -EIO; } @@ -36,11 +36,12 @@ static int write_ram(struct inode * inode, struct file * file, const char * buf, return -EIO; } -static int read_mem(struct inode * inode, struct file * file,char * buf, int count) +static int read_mem(struct inode * inode, struct file * file, char * buf, int count) { unsigned long p = file->f_pos; int read; + p += PAGE_OFFSET; if (count < 0) return -EINVAL; if (MAP_NR(p) >= MAP_NR(high_memory)) @@ -48,24 +49,27 @@ static int read_mem(struct inode * inode, struct file * file,char * buf, int cou if (count > high_memory - p) count = high_memory - p; read = 0; - while (p < PAGE_SIZE && count > 0) { +#if defined(__i386__) /* we don't have page 0 mapped on x86.. */ + while (p < PAGE_OFFSET + PAGE_SIZE && count > 0) { put_user(0,buf); buf++; p++; count--; read++; } - memcpy_tofs(buf,(void *) p,count); +#endif + memcpy_tofs(buf, (void *) p, count); read += count; file->f_pos += read; return read; } -static int write_mem(struct inode * inode, struct file * file,const char * buf, int count) +static int write_mem(struct inode * inode, struct file * file, const char * buf, int count) { unsigned long p = file->f_pos; int written; + p += PAGE_OFFSET; if (count < 0) return -EINVAL; if (MAP_NR(p) >= MAP_NR(high_memory)) @@ -73,14 +77,16 @@ static int write_mem(struct inode * inode, struct file * file,const char * buf, if (count > high_memory - p) count = high_memory - p; written = 0; - while (p < PAGE_SIZE && count > 0) { +#if defined(__i386__) /* we don't have page 0 mapped on x86.. */ + while (PAGE_OFFSET + p < PAGE_SIZE && count > 0) { /* Hmm. Do something? */ buf++; p++; count--; written++; } - memcpy_fromfs((void *) p,buf,count); +#endif + memcpy_fromfs((void *) p, buf, count); written += count; file->f_pos += written; return count; @@ -121,7 +127,7 @@ static int read_kmem(struct inode *inode, struct file *file, char *buf, int coun return read1 + read2; } -static int read_port(struct inode * inode,struct file * file,char * buf, int count) +static int read_port(struct inode * inode, struct file * file,char * buf, int count) { unsigned int i = file->f_pos; char * tmp = buf; @@ -135,7 +141,7 @@ static int read_port(struct inode * inode,struct file * file,char * buf, int cou return tmp-buf; } -static int write_port(struct inode * inode,struct file * file,const char * buf, int count) +static int write_port(struct inode * inode, struct file * file, const char * buf, int count) { unsigned int i = file->f_pos; const char * tmp = buf; @@ -149,17 +155,17 @@ static int write_port(struct inode * inode,struct file * file,const char * buf, return tmp-buf; } -static int read_null(struct inode * node,struct file * file,char * buf,int count) +static int read_null(struct inode * node, struct file * file, char * buf, int count) { return 0; } -static int write_null(struct inode * inode,struct file * file, const char * buf, int count) +static int write_null(struct inode * inode, struct file * file, const char * buf, int count) { return count; } -static int read_zero(struct inode * node,struct file * file,char * buf,int count) +static int read_zero(struct inode * node, struct file * file, char * buf, int count) { int left; @@ -179,12 +185,12 @@ static int mmap_zero(struct inode * inode, struct file * file, struct vm_area_st return 0; } -static int read_full(struct inode * node,struct file * file,char * buf,int count) +static int read_full(struct inode * node, struct file * file, char * buf,int count) { return count; } -static int write_full(struct inode * inode,struct file * file, const char * buf, int count) +static int write_full(struct inode * inode, struct file * file, const char * buf, int count) { return -ENOSPC; } @@ -199,7 +205,7 @@ static int null_lseek(struct inode * inode, struct file * file, off_t offset, in return file->f_pos=0; } /* - * The memory devices use the full 32 bits of the offset, and so we cannot + * The memory devices use the full 32/64 bits of the offset, and so we cannot * check against negative addresses: they are ok. The return value is weird, * though, in that case (0). * diff --git a/drivers/char/msbusmouse.c b/drivers/char/msbusmouse.c index c19c103f1c66..0ccb72a8b17e 100644 --- a/drivers/char/msbusmouse.c +++ b/drivers/char/msbusmouse.c @@ -39,6 +39,7 @@ #endif #include +#include #include #include #include @@ -188,6 +189,10 @@ unsigned long ms_bus_mouse_init(unsigned long kmem_start) mouse.buttons = 0x80; mouse.dx = mouse.dy = 0; mouse.wait = NULL; + + if (check_region(MS_MSE_CONTROL_PORT, 0x04)) + return -ENODEV; + if (inb_p(MS_MSE_SIGNATURE_PORT) == 0xde) { mse_byte = inb_p(MS_MSE_SIGNATURE_PORT); @@ -210,6 +215,7 @@ unsigned long ms_bus_mouse_init(unsigned long kmem_start) #endif } MS_MSE_INT_OFF(); + request_region(MS_MSE_CONTROL_PORT, 0x04, "MS Busmouse"); printk("Microsoft BusMouse detected and installed.\n"); mouse_register(&ms_bus_mouse); #ifdef MODULE @@ -224,7 +230,10 @@ void cleanup_module(void) { if (MOD_IN_USE) printk("msbusmouse: in use, remove delayed\n"); - mouse_deregister(&ms_bus_mouse); + else { + mouse_deregister(&ms_bus_mouse); + release_region(MS_MSE_CONTROL_PORT, 0x04); + } } #endif diff --git a/drivers/char/psaux.c b/drivers/char/psaux.c index 05d5f01d763f..77719f8aaaa9 100644 --- a/drivers/char/psaux.c +++ b/drivers/char/psaux.c @@ -499,7 +499,6 @@ unsigned long psaux_init(unsigned long kmem_start) printk("PS/2 auxiliary pointing device detected -- driver installed.\n"); aux_present = 1; kbd_read_mask = AUX_OBUF_FULL; - mouse_register(&psaux_mouse); } else { #ifdef MODULE return -EIO; @@ -507,6 +506,7 @@ unsigned long psaux_init(unsigned long kmem_start) return kmem_start; /* No mouse at all */ #endif } + mouse_register(&psaux_mouse); queue = (struct aux_queue *) kmem_start; kmem_start += sizeof (struct aux_queue); memset(queue, 0, sizeof(*queue)); diff --git a/drivers/char/random.c b/drivers/char/random.c index ddf3d238cf14..992b4f04655f 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -426,16 +426,19 @@ static void add_timer_randomness(struct random_bucket *r, #if defined (__i386__) /* - * On a 386, read the high resolution timer. We assume that - * this gives us 2 bits of randomness. XXX This needs + * On a Pentium, read the cycle counter. We assume that + * this gives us 8 bits of randomness. XXX This needs * investigation. - */ - outb_p(0x00, 0x43); /* latch the count ASAP */ - add_entropy_byte(r, inb_p(0x40), 1); - add_entropy_byte(r, inb(0x40), 1); - r->entropy_count += 2; - if (r->entropy_count > r->bit_length) - r->entropy_count = r->bit_length; + */ + if (x86_capability & 16) { + unsigned long low, high; + __asm__(".byte 0x0f,0x31" + :"=a" (low), "=d" (high)); + add_entropy_byte(r, low, 1); + r->entropy_count += 8; + if (r->entropy_count > r->bit_length) + r->entropy_count = r->bit_length; + } #endif } diff --git a/drivers/net/3c501.c b/drivers/net/3c501.c index b3afd64de9ce..8b2956d0b7da 100644 --- a/drivers/net/3c501.c +++ b/drivers/net/3c501.c @@ -654,6 +654,7 @@ el1_get_stats(struct device *dev) } /* Set or clear the multicast filter for this adaptor. + num_addrs == -2 All multicast hosts num_addrs == -1 Promiscuous mode, receive all packets num_addrs == 0 Normal mode, clear multicast list num_addrs > 0 Multicast mode, receive normal and MC packets, and do @@ -664,8 +665,8 @@ set_multicast_list(struct device *dev, int num_addrs, void *addrs) { int ioaddr = dev->base_addr; - if (num_addrs > 0) { - outb(RX_MULT, RX_CMD); + if (num_addrs > 0 || num_addrs==-2) { + outb(RX_MULT, RX_CMD); /* Multicast or all multicast is the same */ inb(RX_STATUS); /* Clear status. */ } else if (num_addrs < 0) { outb(RX_PROM, RX_CMD); diff --git a/drivers/net/3c503.c b/drivers/net/3c503.c index 0b602150a9f9..144fc012e637 100644 --- a/drivers/net/3c503.c +++ b/drivers/net/3c503.c @@ -71,8 +71,10 @@ static void el2_reset_8390(struct device *dev); static void el2_init_card(struct device *dev); static void el2_block_output(struct device *dev, int count, const unsigned char *buf, const start_page); -static int el2_block_input(struct device *dev, int count, char *buf, +static void el2_block_input(struct device *dev, int count, struct sk_buff *skb, int ring_offset); +static void el2_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, + int ring_page); /* This routine probes for a memory-mapped 3c503 board by looking for @@ -148,6 +150,7 @@ el2_probe1(struct device *dev, int ioaddr) { int i, iobase_reg, membase_reg, saved_406; static unsigned version_printed = 0; + unsigned long vendor_id; /* Reset and/or avoid any lurking NE2000 */ if (inb(ioaddr + 0x408) == 0xff) { @@ -167,11 +170,11 @@ el2_probe1(struct device *dev, int ioaddr) saved_406 = inb_p(ioaddr + 0x406); outb_p(ECNTRL_RESET|ECNTRL_THIN, ioaddr + 0x406); /* Reset it... */ outb_p(ECNTRL_THIN, ioaddr + 0x406); - /* Map the station addr PROM into the lower I/O ports. */ + /* Map the station addr PROM into the lower I/O ports. We now check + for both the old and new 3Com prefix */ outb(ECNTRL_SAPROM|ECNTRL_THIN, ioaddr + 0x406); - if ( inb(ioaddr + 0) != 0x02 - || inb(ioaddr + 1) != 0x60 - || inb(ioaddr + 2) != 0x8c) { + vendor_id = inb(ioaddr)*0x10000 + inb(ioaddr + 1)*0x100 + inb(ioaddr + 2); + if ((vendor_id != OLD_3COM_ID) && (vendor_id != NEW_3COM_ID)) { /* Restore the register we frobbed. */ outb(saved_406, ioaddr + 0x406); return ENODEV; @@ -250,6 +253,7 @@ el2_probe1(struct device *dev, int ioaddr) ei_status.rx_start_page = EL2SM_START_PG + TX_PAGES; ei_status.stop_page = EL2SM_STOP_PG; ei_status.reset_8390 = &el2_reset_8390; + ei_status.get_8390_hdr = &el2_get_8390_hdr; ei_status.block_input = &el2_block_input; ei_status.block_output = &el2_block_output; @@ -268,7 +272,7 @@ el2_probe1(struct device *dev, int ioaddr) dev->stop = &el2_close; if (dev->mem_start) - printk("\n%s: %s with shared memory at %#6lx-%#6lx,\n", + printk("\n%s: %s with shared memory at %#6lx-%#6lx.\n", dev->name, ei_status.name, dev->mem_start, dev->mem_end-1); else printk("\n%s: %s using programmed I/O (REJUMPER for SHARED MEMORY).\n", @@ -388,12 +392,9 @@ el2_block_output(struct device *dev, int count, outb(EGACFR_NORM, E33G_GACFR); /* Enable RAM and interrupts. */ if (dev->mem_start) { /* Shared memory transfer */ - void *dest_addr = (void *)(dev->mem_start + - ((start_page - ei_status.tx_start_page) << 8)); - memcpy(dest_addr, buf, count); - if (ei_debug > 2 && memcmp(dest_addr, buf, count)) - printk("%s: 3c503 send_packet() bad memory copy @ %#5x.\n", - dev->name, (int) dest_addr); + unsigned long dest_addr = dev->mem_start + + ((start_page - ei_status.tx_start_page) << 8); + memcpy_toio(dest_addr, buf, count); return; } /* No shared memory, put the packet out the slow way. */ @@ -420,9 +421,34 @@ el2_block_output(struct device *dev, int count, return; } +/* Read the 4 byte, page aligned 8390 specific header. */ +static void +el2_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, int ring_page) +{ + unsigned int i; + unsigned long hdr_start = dev->mem_start + ((ring_page - EL2SM_START_PG)<<8); + + if (dev->mem_start) { /* Use the shared memory. */ + memcpy_fromio(hdr, hdr_start, sizeof(struct e8390_pkt_hdr)); + return; + } + + /* No shared memory, use programmed I/O. Ugh. */ + outb(0, E33G_DMAAL); + outb_p(ring_page & 0xff, E33G_DMAAH); + outb_p((ei_status.interface_num == 0 ? ECNTRL_THIN : ECNTRL_AUI) | ECNTRL_INPUT + | ECNTRL_START, E33G_CNTRL); + + /* Header is less than 8 bytes, so we can ignore the FIFO. */ + for(i = 0; i < sizeof(struct e8390_pkt_hdr); i++) + ((char *)(hdr))[i] = inb_p(E33G_FIFOH); + + outb_p(ei_status.interface_num == 0 ? ECNTRL_THIN : ECNTRL_AUI, E33G_CNTRL); +} + /* Returns the new ring pointer. */ -static int -el2_block_input(struct device *dev, int count, char *buf, int ring_offset) +static void +el2_block_input(struct device *dev, int count, struct sk_buff *skb, int ring_offset) { int boguscount = 0; int end_of_ring = dev->rmem_end; @@ -434,13 +460,14 @@ el2_block_input(struct device *dev, int count, char *buf, int ring_offset) if (dev->mem_start + ring_offset + count > end_of_ring) { /* We must wrap the input move. */ int semi_count = end_of_ring - (dev->mem_start + ring_offset); - memcpy(buf, (char *)dev->mem_start + ring_offset, semi_count); + memcpy_fromio(skb->data, dev->mem_start + ring_offset, semi_count); count -= semi_count; - memcpy(buf + semi_count, (char *)dev->rmem_start, count); - return dev->rmem_start + count; + memcpy_fromio(skb->data + semi_count, dev->rmem_start, count); + } else { + /* Packet is in one chunk -- we can copy + cksum. */ + eth_io_copy_and_sum(skb, dev->mem_start + ring_offset, count, 0); } - memcpy(buf, (char *)dev->mem_start + ring_offset, count); - return ring_offset + count; + return; } /* No shared memory, use programmed I/O. */ outb(ring_offset & 0xff, E33G_DMAAL); @@ -459,10 +486,9 @@ el2_block_input(struct device *dev, int count, char *buf, int ring_offset) boguscount = 0; break; } - buf[i] = inb_p(E33G_FIFOH); + (skb->data)[i] = inb_p(E33G_FIFOH); } outb_p(ei_status.interface_num == 0 ? ECNTRL_THIN : ECNTRL_AUI, E33G_CNTRL); - return 0; } #ifdef MODULE char kernel_version[] = UTS_RELEASE; diff --git a/drivers/net/3c503.h b/drivers/net/3c503.h index f469a92c3abd..d40af25e2c70 100644 --- a/drivers/net/3c503.h +++ b/drivers/net/3c503.h @@ -6,6 +6,12 @@ #define EL2H (dev->base_addr + 0x400) #define EL2L (dev->base_addr) +/* Vendor unique hardware addr. prefix. 3Com has 2 because they ran + out of available addresses on the first one... */ + +#define OLD_3COM_ID 0x02608c +#define NEW_3COM_ID 0x0020af + /* Shared memory management parameters */ #define EL2SM_START_PG (0x20) /* First page of TX buffer */ diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c index 6e9a13a7f390..c1f6d6ff820e 100644 --- a/drivers/net/3c509.c +++ b/drivers/net/3c509.c @@ -262,7 +262,7 @@ int el3_probe(struct device *dev) dev->stop = &el3_close; dev->get_stats = &el3_get_stats; #ifdef HAVE_MULTICAST - dev->set_multicast_list = &set_multicast_list; + dev->set_multicast_list = &set_multicast_list; #endif /* Fill in the generic fields of the device structure. */ @@ -652,7 +652,7 @@ set_multicast_list(struct device *dev, int num_addrs, void *addrs) printk("%s: Setting Rx mode to %d addresses.\n", dev->name, num_addrs); } } - if (num_addrs > 0) { + if (num_addrs > 0 || num_addrs == -2) { outw(SetRxFilter|RxStation|RxMulticast|RxBroadcast, ioaddr + EL3_CMD); } else if (num_addrs < 0) { outw(SetRxFilter | RxStation | RxMulticast | RxBroadcast | RxProm, diff --git a/drivers/net/8390.c b/drivers/net/8390.c index 44403377c0f6..a26b530ee590 100644 --- a/drivers/net/8390.c +++ b/drivers/net/8390.c @@ -19,6 +19,8 @@ Changelog: Paul Gortmaker : remove set_bit lock, other cleanups. + Paul Gortmaker : add ei_get_8390_hdr() so we can pass skb's to + ei_block_input() for eth_io_copy_and_sum(). */ @@ -70,15 +72,18 @@ static const char *version = int start_page) Write the COUNT bytes of BUF to the packet buffer at START_PAGE. The "page" value uses the 8390's 256-byte pages. - int block_input(struct device *dev, int count, char *buf, int ring_offset) - Read COUNT bytes from the packet buffer into BUF. Start reading from - RING_OFFSET, the address as the 8390 sees it. The first read will - always be the 4 byte, page aligned 8390 header. *If* there is a + void get_8390_hdr(struct device *dev, struct e8390_hdr *hdr, int ring_page) + Read the 4 byte, page aligned 8390 header. *If* there is a subsequent read, it will be of the rest of the packet. + void block_input(struct device *dev, int count, struct sk_buff *skb, int ring_offset) + Read COUNT bytes from the packet buffer into the skb data area. Start + reading from RING_OFFSET, the address as the 8390 sees it. This will always + follow the read of the 8390 header. */ #define ei_reset_8390 (ei_local->reset_8390) #define ei_block_output (ei_local->block_output) #define ei_block_input (ei_local->block_input) +#define ei_get_8390_hdr (ei_local->get_8390_hdr) /* use 0 for production, 1 for verification, >2 for debug */ #ifdef EI_DEBUG @@ -152,17 +157,16 @@ static int ei_start_xmit(struct sk_buff *skb, struct device *dev) printk("%s: xmit on stopped card\n", dev->name); return 1; } - printk(KERN_DEBUG "%s: transmit timed out, TX status %#2x, ISR %#2x.\n", - dev->name, txsr, isr); - /* Does the 8390 thinks it has posted an interrupt? */ - if (isr) - printk(KERN_DEBUG "%s: Possible IRQ conflict on IRQ%d?\n", dev->name, dev->irq); - else { - /* The 8390 probably hasn't gotten on the cable yet. */ - printk(KERN_DEBUG "%s: Possible network cable problem?\n", dev->name); - if(ei_local->stat.tx_packets==0) - ei_local->interface_num ^= 1; /* Try a different xcvr. */ + + printk(KERN_DEBUG "%s: Tx timed out, %s TSR=%#2x, ISR=%#2x, t=%d.\n", + dev->name, (txsr & ENTSR_ABT) ? "excess collisions." : + (isr) ? "lost interrupt?" : "cable problem?", txsr, isr, tickssofar); + + if (!isr && !ei_local->stat.tx_packets) { + /* The 8390 probably hasn't gotten on the cable yet. */ + ei_local->interface_num ^= 1; /* Try a different xcvr. */ } + /* Try to restart the card. Perhaps the user has fixed something. */ ei_reset_8390(dev); NS8390_init(dev, 1); @@ -312,7 +316,7 @@ void ei_interrupt(int irq, struct pt_regs * regs) /* Ignore any RDC interrupts that make it back to here. */ if (interrupts & ENISR_RDC) { - outb_p(ENISR_RDC, e8390_base + EN0_ISR); + outb_p(ENISR_RDC, e8390_base + EN0_ISR); } outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, e8390_base + E8390_CMD); @@ -431,10 +435,9 @@ static void ei_receive(struct device *dev) break; /* Done for now */ current_offset = this_frame << 8; - ei_block_input(dev, sizeof(rx_frame), (char *)&rx_frame, - current_offset); + ei_get_8390_hdr(dev, &rx_frame, this_frame); - pkt_len = rx_frame.count - sizeof(rx_frame); + pkt_len = rx_frame.count - sizeof(struct e8390_pkt_hdr); next_frame = this_frame + 1 + ((pkt_len+4)>>8); @@ -470,9 +473,8 @@ static void ei_receive(struct device *dev) } else { skb_reserve(skb,2); /* IP headers on 16 byte boundaries */ skb->dev = dev; - - ei_block_input(dev, pkt_len, skb_put(skb,pkt_len), - current_offset + sizeof(rx_frame)); + skb_put(skb, pkt_len); /* Make room */ + ei_block_input(dev, pkt_len, skb, current_offset + sizeof(rx_frame)); skb->protocol=eth_type_trans(skb,dev); netif_rx(skb); ei_local->stat.rx_packets++; @@ -533,7 +535,7 @@ static void ei_rx_overrun(struct device *dev) easy way of timing something in that range, so we use 'jiffies' as a sanity check. */ while ((inb_p(e8390_base+EN0_ISR) & ENISR_RESET) == 0) - if (jiffies - reset_start_time > 1) { + if (jiffies - reset_start_time > 2*HZ/100) { printk("%s: reset did not complete at ei_rx_overrun.\n", dev->name); NS8390_init(dev, 1); @@ -576,7 +578,7 @@ static void set_multicast_list(struct device *dev, int num_addrs, void *addrs) { short ioaddr = dev->base_addr; - if (num_addrs > 0) { + if (num_addrs > 0 || num_addrs == -2) { /* The multicast-accept list is initialized to accept-all, and we rely on higher-level filtering for now. */ outb_p(E8390_RXCONFIG | 0x08, ioaddr + EN0_RXCR); diff --git a/drivers/net/8390.h b/drivers/net/8390.h index 49747884eb81..fce3b00d70d8 100644 --- a/drivers/net/8390.h +++ b/drivers/net/8390.h @@ -9,6 +9,7 @@ #include #include +#include #define TX_2X_PAGES 12 #define TX_1X_PAGES 6 @@ -16,6 +17,13 @@ #define ETHER_ADDR_LEN 6 +/* The 8390 specific per-packet-header format. */ +struct e8390_pkt_hdr { + unsigned char status; /* status */ + unsigned char next; /* pointer to next packet. */ + unsigned short count; /* header + packet length in bytes */ +}; + /* From 8390.c */ extern int ei_debug; extern struct sigaction ei_sigaction; @@ -39,8 +47,9 @@ extern int autoirq_report(int waittime); struct ei_device { const char *name; void (*reset_8390)(struct device *); + void (*get_8390_hdr)(struct device *, struct e8390_pkt_hdr *, int); void (*block_output)(struct device *, int, const unsigned char *, int); - int (*block_input)(struct device *, int, char *, int); + void (*block_input)(struct device *, int, struct sk_buff *, int); unsigned open:1; unsigned word16:1; /* We have the 16-bit (vs 8-bit) version of the card. */ unsigned txing:1; /* Transmit Active */ @@ -63,8 +72,8 @@ struct ei_device { /* The maximum number of 8390 interrupt service routines called per IRQ. */ #define MAX_SERVICE 12 -/* The maximum number of jiffies waited before assuming a Tx failed. */ -#define TX_TIMEOUT 20 +/* The maximum time waited (in jiffies) before assuming a Tx failed. (20ms) */ +#define TX_TIMEOUT (20*HZ/100) #define ei_status (*(struct ei_device *)(dev->priv)) @@ -155,10 +164,4 @@ struct ei_device { #define ENTSR_CDH 0x40 /* The collision detect "heartbeat" signal was lost. */ #define ENTSR_OWC 0x80 /* There was an out-of-window collision. */ -/* The per-packet-header format. */ -struct e8390_pkt_hdr { - unsigned char status; /* status */ - unsigned char next; /* pointer to next packet. */ - unsigned short count; /* header + packet length in bytes */ -}; #endif /* _8390_h */ diff --git a/drivers/net/CONFIG b/drivers/net/CONFIG index 57d9c05d55c3..7da2806874a1 100644 --- a/drivers/net/CONFIG +++ b/drivers/net/CONFIG @@ -9,15 +9,14 @@ # # CONFIG_WD80x3 The Western Digital (SMC) WD80x3 driver # WD_SHMEM=xxx Forces the address of the shared memory -# WD_no_mapout Don't map out the shared memory (faster, but -# your machine may not warm-boot). # CONFIG_NE2000 The NE-[12]000 clone driver. # PACKETBUF_MEMSIZE Allows an extra-large packet buffer to be # used. Usually pointless under Linux. # show_all_SAPROM Show the entire address PROM, not just the # ethernet address, during boot. # CONFIG_NE_RW_BUGFIX Patch an obscure bug with a version of the 8390. -# CONFIG_NE_SANITY ?? +# CONFIG_NE_SANITY Double check the internal card xfer address +# against the driver's value. Useful for debugging. # CONFIG_HPLAN The HP-LAN driver (for 8390-based boards only). # rw_bugfix Fix the same obscure bug. # CONFIG_EL2 The 3c503 EtherLink II driver diff --git a/drivers/net/ac3200.c b/drivers/net/ac3200.c index c4b7208d0707..26075af15841 100644 --- a/drivers/net/ac3200.c +++ b/drivers/net/ac3200.c @@ -29,6 +29,7 @@ static const char *version = #include #include +#include #include "8390.h" /* Offsets from the base address. */ @@ -72,10 +73,13 @@ static int ac_probe1(int ioaddr, struct device *dev); static int ac_open(struct device *dev); static void ac_reset_8390(struct device *dev); -static int ac_block_input(struct device *dev, int count, - char *buf, int ring_offset); +static void ac_block_input(struct device *dev, int count, + struct sk_buff *skb, int ring_offset); static void ac_block_output(struct device *dev, const int count, const unsigned char *buf, const int start_page); +static void ac_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, + int ring_page); + static int ac_close_card(struct device *dev); @@ -195,6 +199,7 @@ static int ac_probe1(int ioaddr, struct device *dev) ei_status.reset_8390 = &ac_reset_8390; ei_status.block_input = &ac_block_input; ei_status.block_output = &ac_block_output; + ei_status.get_8390_hdr = &ac_get_8390_hdr; dev->open = &ac_open; dev->stop = &ac_close_card; @@ -237,33 +242,43 @@ static void ac_reset_8390(struct device *dev) return; } +/* Grab the 8390 specific header. Similar to the block_input routine, but + we don't need to be concerned with ring wrap as the header will be at + the start of a page, so we optimize accordingly. */ + +static void +ac_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, int ring_page) +{ + unsigned long hdr_start = dev->mem_start + ((ring_page - AC_START_PG)<<8); + memcpy_fromio(hdr, hdr_start, sizeof(struct e8390_pkt_hdr)); +} + /* Block input and output are easy on shared memory ethercards, the only complication is when the ring buffer wraps. */ -static int ac_block_input(struct device *dev, int count, char *buf, +static void ac_block_input(struct device *dev, int count, struct sk_buff *skb, int ring_offset) { - long xfer_start = dev->mem_start + ring_offset - (AC_START_PG<<8); + unsigned long xfer_start = dev->mem_start + ring_offset - (AC_START_PG<<8); if (xfer_start + count > dev->rmem_end) { /* We must wrap the input move. */ int semi_count = dev->rmem_end - xfer_start; - memcpy(buf, (char*)xfer_start, semi_count); + memcpy_fromio(skb->data, xfer_start, semi_count); count -= semi_count; - memcpy(buf + semi_count, (char *)dev->rmem_start, count); - return dev->rmem_start + count; + memcpy_fromio(skb->data + semi_count, dev->rmem_start, count); + } else { + /* Packet is in one chunk -- we can copy + cksum. */ + eth_io_copy_and_sum(skb, xfer_start, count, 0); } - memcpy(buf, (char*)xfer_start, count); - - return ring_offset + count; } static void ac_block_output(struct device *dev, int count, const unsigned char *buf, int start_page) { - long shmem = dev->mem_start + ((start_page - AC_START_PG)<<8); + unsigned long shmem = dev->mem_start + ((start_page - AC_START_PG)<<8); - memcpy((unsigned char *)shmem, buf, count); + memcpy_toio(shmem, buf, count); } static int ac_close_card(struct device *dev) diff --git a/drivers/net/e2100.c b/drivers/net/e2100.c index 9904985b1e5c..011917ab0a01 100644 --- a/drivers/net/e2100.c +++ b/drivers/net/e2100.c @@ -19,7 +19,7 @@ station address region, and the low three bits of next outb() *address* is used as the write value for that register. Either someone wasn't too used to dem bit en bites, or they were trying to obfuscate the - programming interface. + programming interface. There is an additional complication when setting the window on the packet buffer. You must first do a read into the packet buffer region with the @@ -48,6 +48,7 @@ static const char *version = #include #include +#include #include "8390.h" static int e21_probe_list[] = {0x300, 0x280, 0x380, 0x220, 0}; @@ -99,10 +100,13 @@ int e21_probe1(struct device *dev, int ioaddr); static int e21_open(struct device *dev); static void e21_reset_8390(struct device *dev); -static int e21_block_input(struct device *dev, int count, - char *buf, int ring_offset); +static void e21_block_input(struct device *dev, int count, + struct sk_buff *skb, int ring_offset); static void e21_block_output(struct device *dev, int count, const unsigned char *buf, const start_page); +static void e21_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, + int ring_page); + static int e21_close(struct device *dev); @@ -189,7 +193,7 @@ int e21_probe1(struct device *dev, int ioaddr) ei_status.tx_start_page = E21_TX_START_PG; ei_status.rx_start_page = E21_RX_START_PG; ei_status.stop_page = E21_RX_STOP_PG; - ei_status.saved_irq = dev->irq; + ei_status.saved_irq = dev->irq; /* Check the media port used. The port can be passed in on the low mem_end bits. */ @@ -227,6 +231,7 @@ int e21_probe1(struct device *dev, int ioaddr) ei_status.reset_8390 = &e21_reset_8390; ei_status.block_input = &e21_block_input; ei_status.block_output = &e21_block_output; + ei_status.get_8390_hdr = &e21_get_8390_hdr; dev->open = &e21_open; dev->stop = &e21_close; NS8390_init(dev, 0); @@ -277,28 +282,40 @@ e21_reset_8390(struct device *dev) return; } +/* Grab the 8390 specific header. We put the 2k window so the header page + appears at the start of the shared memory. */ + +static void +e21_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, int ring_page) +{ + + short ioaddr = dev->base_addr; + char *shared_mem = (char *)dev->mem_start; + + mem_on(ioaddr, shared_mem, ring_page); + + memcpy_fromio(hdr, shared_mem, sizeof(struct e8390_pkt_hdr)); + + /* Turn off memory access: we would need to reprogram the window anyway. */ + mem_off(ioaddr); + +} + /* Block input and output are easy on shared memory ethercards. The E21xx makes block_input() especially easy by wrapping the top ring buffer to the bottom automatically. */ -static int -e21_block_input(struct device *dev, int count, char *buf, int ring_offset) +static void +e21_block_input(struct device *dev, int count, struct sk_buff *skb, int ring_offset) { short ioaddr = dev->base_addr; char *shared_mem = (char *)dev->mem_start; - int start_page = (ring_offset>>8); - mem_on(ioaddr, shared_mem, start_page); + mem_on(ioaddr, shared_mem, (ring_offset>>8)); - /* We'll always get a 4 byte header read first. */ - if (count == 4) - ((int*)buf)[0] = ((int*)shared_mem)[0]; - else - memcpy(buf, shared_mem + (ring_offset & 0xff), count); + /* Packet is always in one chunk -- we can copy + cksum. */ + eth_io_copy_and_sum(skb, dev->mem_start + (ring_offset & 0xff), count, 0); - /* Turn off memory access: we would need to reprogram the window anyway. */ mem_off(ioaddr); - - return 0; } static void @@ -310,10 +327,10 @@ e21_block_output(struct device *dev, int count, const unsigned char *buf, /* Set the shared memory window start by doing a read, with the low address bits specifying the starting page. */ - *(shared_mem + start_page); + readb(shared_mem + start_page); mem_on(ioaddr, shared_mem, start_page); - memcpy((char*)shared_mem, buf, count); + memcpy_toio(shared_mem, buf, count); mem_off(ioaddr); } diff --git a/drivers/net/eql.c b/drivers/net/eql.c index de613e3f3499..2f5eb57d6002 100644 --- a/drivers/net/eql.c +++ b/drivers/net/eql.c @@ -25,7 +25,7 @@ static const char *version = * Inspirations: * The Harried and Overworked Alan Cox * Conspiracies: - * The Alan Cox and Arisian plot to get someone else to do the code, which + * The Alan Cox and Mike McLagan plot to get someone else to do the code, which * turned out to be me. */ diff --git a/drivers/net/hp-plus.c b/drivers/net/hp-plus.c index 1a1fdfe05c51..c2dce009646d 100644 --- a/drivers/net/hp-plus.c +++ b/drivers/net/hp-plus.c @@ -101,14 +101,18 @@ int hpp_probe1(struct device *dev, int ioaddr); static void hpp_reset_8390(struct device *dev); static int hpp_open(struct device *dev); static int hpp_close(struct device *dev); -static int hpp_mem_block_input(struct device *dev, int count, - char *buf, int ring_offset); +static void hpp_mem_block_input(struct device *dev, int count, + struct sk_buff *skb, int ring_offset); static void hpp_mem_block_output(struct device *dev, int count, const unsigned char *buf, const start_page); -static int hpp_io_block_input(struct device *dev, int count, - char *buf, int ring_offset); +static void hpp_mem_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, + int ring_page); +static void hpp_io_block_input(struct device *dev, int count, + struct sk_buff *skb, int ring_offset); static void hpp_io_block_output(struct device *dev, int count, const unsigned char *buf, const start_page); +static void hpp_io_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, + int ring_page); /* Probe a list of addresses for an HP LAN+ adaptor. @@ -221,11 +225,13 @@ int hpp_probe1(struct device *dev, int ioaddr) ei_status.reset_8390 = &hpp_reset_8390; ei_status.block_input = &hpp_io_block_input; ei_status.block_output = &hpp_io_block_output; + ei_status.get_8390_hdr = &hpp_io_get_8390_hdr; /* Check if the memory_enable flag is set in the option register. */ if (mem_start) { ei_status.block_input = &hpp_mem_block_input; ei_status.block_output = &hpp_mem_block_output; + ei_status.get_8390_hdr = &hpp_mem_get_8390_hdr; dev->mem_start = mem_start; dev->rmem_start = dev->mem_start + TX_2X_PAGES*256; dev->mem_end = dev->rmem_end @@ -307,23 +313,49 @@ hpp_reset_8390(struct device *dev) return; } -/* Block input and output, similar to the Crynwr packet driver. +/* The programmed-I/O version of reading the 4 byte 8390 specific header. Note that transfer with the EtherTwist+ must be on word boundaries. */ -static int -hpp_io_block_input(struct device *dev, int count, char *buf, int ring_offset) +static void +hpp_io_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, int ring_page) +{ + int ioaddr = dev->base_addr - NIC_OFFSET; + + outw((ring_page<<8), ioaddr + HPP_IN_ADDR); + insw(ioaddr + HP_DATAPORT, hdr, sizeof(struct e8390_pkt_hdr)>>1); +} + +/* Block input and output, similar to the Crynwr packet driver. */ + +static void +hpp_io_block_input(struct device *dev, int count, struct sk_buff *skb, int ring_offset) { int ioaddr = dev->base_addr - NIC_OFFSET; + char *buf = skb->data; outw(ring_offset, ioaddr + HPP_IN_ADDR); insw(ioaddr + HP_DATAPORT, buf, count>>1); if (count & 0x01) buf[count-1] = inw(ioaddr + HP_DATAPORT); - return ring_offset + count; } -static int -hpp_mem_block_input(struct device *dev, int count, char *buf, int ring_offset) +/* The corresponding shared memory versions of the above 2 functions. */ + +static void +hpp_mem_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, int ring_page) +{ + int ioaddr = dev->base_addr - NIC_OFFSET; + int option_reg = inw(ioaddr + HPP_OPTION); + + outw((ring_page<<8), ioaddr + HPP_IN_ADDR); + outw(option_reg & ~(MemDisable + BootROMEnb), ioaddr + HPP_OPTION); + memcpy_fromio(hdr, dev->mem_start, sizeof(struct e8390_pkt_hdr)); + outw(option_reg, ioaddr + HPP_OPTION); + hdr->count = (hdr->count + 3) & ~3; /* Round up allocation. */ +} + +static void +hpp_mem_block_input(struct device *dev, int count, struct sk_buff *skb, int ring_offset) { int ioaddr = dev->base_addr - NIC_OFFSET; int option_reg = inw(ioaddr + HPP_OPTION); @@ -331,11 +363,13 @@ hpp_mem_block_input(struct device *dev, int count, char *buf, int ring_offset) outw(ring_offset, ioaddr + HPP_IN_ADDR); outw(option_reg & ~(MemDisable + BootROMEnb), ioaddr + HPP_OPTION); - /* Caution: this relies on 8390.c rounding up allocations! */ - memcpy(buf, (char*)dev->mem_start, (count + 3) & ~3); - outw(option_reg, ioaddr + HPP_OPTION); - return ring_offset + count; + /* Caution: this relies on get_8390_hdr() rounding up count! + Also note that we *can't* use eth_io_copy_and_sum() because + it will not always copy "count" bytes (e.g. padded IP). */ + + memcpy_fromio(skb->data, dev->mem_start, count); + outw(option_reg, ioaddr + HPP_OPTION); } /* A special note: we *must* always transfer >=16 bit words. @@ -359,7 +393,7 @@ hpp_mem_block_output(struct device *dev, int count, outw(start_page << 8, ioaddr + HPP_OUT_ADDR); outw(option_reg & ~(MemDisable + BootROMEnb), ioaddr + HPP_OPTION); - memcpy((char *)dev->mem_start, buf, (count + 3) & ~3); + memcpy_toio(dev->mem_start, buf, (count + 3) & ~3); outw(option_reg, ioaddr + HPP_OPTION); return; diff --git a/drivers/net/hp.c b/drivers/net/hp.c index 16fdca8e2708..a41cd9aaab53 100644 --- a/drivers/net/hp.c +++ b/drivers/net/hp.c @@ -60,10 +60,13 @@ int hp_probe(struct device *dev); int hp_probe1(struct device *dev, int ioaddr); static void hp_reset_8390(struct device *dev); -static int hp_block_input(struct device *dev, int count, - char *buf, int ring_offset); +static void hp_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, + int ring_page); +static void hp_block_input(struct device *dev, int count, + struct sk_buff *skb , int ring_offset); static void hp_block_output(struct device *dev, int count, const unsigned char *buf, const start_page); + static void hp_init_card(struct device *dev); /* The map from IRQ number to HP_CONFIGURE register setting. */ @@ -184,6 +187,7 @@ int hp_probe1(struct device *dev, int ioaddr) ei_status.stop_page = wordmode ? HP_16BSTOP_PG : HP_8BSTOP_PG; ei_status.reset_8390 = &hp_reset_8390; + ei_status.get_8390_hdr = &hp_get_8390_hdr; ei_status.block_input = &hp_block_input; ei_status.block_output = &hp_block_output; hp_init_card(dev); @@ -214,17 +218,40 @@ hp_reset_8390(struct device *dev) return; } -/* Block input and output, similar to the Crynwr packet driver. If you +static void +hp_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, int ring_page) +{ + int nic_base = dev->base_addr; + int saved_config = inb_p(nic_base - NIC_OFFSET + HP_CONFIGURE); + + outb_p(saved_config | HP_DATAON, nic_base - NIC_OFFSET + HP_CONFIGURE); + outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, nic_base); + outb_p(sizeof(struct e8390_pkt_hdr), nic_base + EN0_RCNTLO); + outb_p(0, nic_base + EN0_RCNTHI); + outb_p(0, nic_base + EN0_RSARLO); /* On page boundary */ + outb_p(ring_page, nic_base + EN0_RSARHI); + outb_p(E8390_RREAD+E8390_START, nic_base); + + if (ei_status.word16) + insw(nic_base - NIC_OFFSET + HP_DATAPORT, hdr, sizeof(struct e8390_pkt_hdr)>>1); + else + insb(nic_base - NIC_OFFSET + HP_DATAPORT, hdr, sizeof(struct e8390_pkt_hdr)); + + outb_p(saved_config & (~HP_DATAON), nic_base - NIC_OFFSET + HP_CONFIGURE); +} + +/* Block input and output, similar to the Crynwr packet driver. If you are porting to a new ethercard look at the packet driver source for hints. The HP LAN doesn't use shared memory -- we put the packet out through the "remote DMA" dataport. */ -static int -hp_block_input(struct device *dev, int count, char *buf, int ring_offset) +static void +hp_block_input(struct device *dev, int count, struct sk_buff *skb, int ring_offset) { int nic_base = dev->base_addr; int saved_config = inb_p(nic_base - NIC_OFFSET + HP_CONFIGURE); int xfer_count = count; + char *buf = skb->data; outb_p(saved_config | HP_DATAON, nic_base - NIC_OFFSET + HP_CONFIGURE); outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, nic_base); @@ -251,7 +278,6 @@ hp_block_input(struct device *dev, int count, char *buf, int ring_offset) dev->name, ring_offset + xfer_count, addr); } outb_p(saved_config & (~HP_DATAON), nic_base - NIC_OFFSET + HP_CONFIGURE); - return ring_offset + count; } static void diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 6f413f376e9f..347541b87bdd 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -14,6 +14,7 @@ * Alan Cox : Fixed oddments for NET3.014 * Alan Cox : Rejig for NET3.029 snap #3 * Alan Cox : Fixed NET3.029 bugs and sped up + * Larry McVoy : Tiny tweak to double performance * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/drivers/net/ne.c b/drivers/net/ne.c index c0f6a8c9bce5..463f08e71fe1 100644 --- a/drivers/net/ne.c +++ b/drivers/net/ne.c @@ -73,6 +73,7 @@ bad_clone_list[] = { {"NE1000","NE2000-invalid", {0x00, 0x00, 0xd8}}, /* Ancient real NE1000. */ {"NN1000", "NN2000", {0x08, 0x03, 0x08}}, /* Outlaw no-name clone. */ {"4-DIM8","4-DIM16", {0x00,0x00,0x4d,}}, /* Outlaw 4-Dimension cards. */ + {"Con-Intl_8", "Con-Intl_16", {0x00, 0x00, 0x24}}, /* Connect Int'nl */ {0,} }; #endif @@ -92,8 +93,10 @@ int ne_probe(struct device *dev); static int ne_probe1(struct device *dev, int ioaddr); static void ne_reset_8390(struct device *dev); -static int ne_block_input(struct device *dev, int count, - char *buf, int ring_offset); +static void ne_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, + int ring_page); +static void ne_block_input(struct device *dev, int count, + struct sk_buff *skb, int ring_offset); static void ne_block_output(struct device *dev, const int count, const unsigned char *buf, const int start_page); @@ -181,14 +184,12 @@ static int ne_probe1(struct device *dev, int ioaddr) /* DON'T change these to inb_p/outb_p or reset will fail on clones. */ outb(inb(ioaddr + NE_RESET), ioaddr + NE_RESET); - /* wait 20 ms for the dust to settle. */ - while (jiffies - reset_start_time < 2*HZ/100) - barrier(); + while ((inb_p(ioaddr + EN0_ISR) & ENISR_RESET) == 0) + if (jiffies - reset_start_time > 2*HZ/100) { + printk(" not found (no reset ack).\n"); + return ENODEV; + } - if ((inb_p(ioaddr+EN0_ISR) & ENISR_RESET) == 0) { - printk(" not found (no reset ack).\n"); - return ENODEV; - } outb_p(0xff, ioaddr + EN0_ISR); /* Ack all intr. */ } @@ -338,6 +339,7 @@ static int ne_probe1(struct device *dev, int ioaddr) ei_status.reset_8390 = &ne_reset_8390; ei_status.block_input = &ne_block_input; ei_status.block_output = &ne_block_output; + ei_status.get_8390_hdr = &ne_get_8390_hdr; NS8390_init(dev, 0); return 0; } @@ -366,27 +368,63 @@ ne_reset_8390(struct device *dev) outb_p(ENISR_RESET, NE_BASE + EN0_ISR); /* Ack intr. */ } +/* Grab the 8390 specific header. Similar to the block_input routine, but + we don't need to be concerned with ring wrap as the header will be at + the start of a page, so we optimize accordingly. */ + +static void +ne_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, int ring_page) +{ + + int nic_base = dev->base_addr; + + /* This *shouldn't* happen. If it does, it's the last thing you'll see */ + if (ei_status.dmaing) { + printk("%s: DMAing conflict in ne_get_8390_hdr " + "[DMAstat:%d][irqlock:%d][intr:%d].\n", + dev->name, ei_status.dmaing, ei_status.irqlock, + dev->interrupt); + return; + } + + ei_status.dmaing |= 0x01; + outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, nic_base+ NE_CMD); + outb_p(sizeof(struct e8390_pkt_hdr), nic_base + EN0_RCNTLO); + outb_p(0, nic_base + EN0_RCNTHI); + outb_p(0, nic_base + EN0_RSARLO); /* On page boundary */ + outb_p(ring_page, nic_base + EN0_RSARHI); + outb_p(E8390_RREAD+E8390_START, nic_base + NE_CMD); + + if (ei_status.word16) + insw(NE_BASE + NE_DATAPORT, hdr, sizeof(struct e8390_pkt_hdr)>>1); + else + insb(NE_BASE + NE_DATAPORT, hdr, sizeof(struct e8390_pkt_hdr)); + + outb_p(ENISR_RDC, nic_base + EN0_ISR); /* Ack intr. */ + ei_status.dmaing &= ~0x01; +} + /* Block input and output, similar to the Crynwr packet driver. If you are porting to a new ethercard, look at the packet driver source for hints. - The NEx000 doesn't share it on-board packet memory -- you have to put + The NEx000 doesn't share the on-board packet memory -- you have to put the packet out through the "remote DMA" dataport using outb. */ -static int -ne_block_input(struct device *dev, int count, char *buf, int ring_offset) +static void +ne_block_input(struct device *dev, int count, struct sk_buff *skb, int ring_offset) { - int nic_base = dev->base_addr; #ifdef CONFIG_NE_SANITY int xfer_count = count; #endif + int nic_base = dev->base_addr; + char *buf = skb->data; /* This *shouldn't* happen. If it does, it's the last thing you'll see */ if (ei_status.dmaing) { - if (ei_debug > 0) - printk("%s: DMAing conflict in ne_block_input " - "[DMAstat:%d][irqlock:%d][intr:%d].\n", - dev->name, ei_status.dmaing, ei_status.irqlock, - dev->interrupt); - return 0; + printk("%s: DMAing conflict in ne_block_input " + "[DMAstat:%d][irqlock:%d][intr:%d].\n", + dev->name, ei_status.dmaing, ei_status.irqlock, + dev->interrupt); + return; } ei_status.dmaing |= 0x01; outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, nic_base+ NE_CMD); @@ -431,7 +469,6 @@ ne_block_input(struct device *dev, int count, char *buf, int ring_offset) #endif outb_p(ENISR_RDC, nic_base + EN0_ISR); /* Ack intr. */ ei_status.dmaing &= ~0x01; - return ring_offset + count; } static void @@ -452,11 +489,10 @@ ne_block_output(struct device *dev, int count, /* This *shouldn't* happen. If it does, it's the last thing you'll see */ if (ei_status.dmaing) { - if (ei_debug > 0) - printk("%s: DMAing conflict in ne_block_output." - "[DMAstat:%d][irqlock:%d][intr:%d]\n", - dev->name, ei_status.dmaing, ei_status.irqlock, - dev->interrupt); + printk("%s: DMAing conflict in ne_block_output." + "[DMAstat:%d][irqlock:%d][intr:%d]\n", + dev->name, ei_status.dmaing, ei_status.irqlock, + dev->interrupt); return; } ei_status.dmaing |= 0x01; diff --git a/drivers/net/plip.c b/drivers/net/plip.c index 7ad84018c5cb..0e7f6274a5b3 100644 --- a/drivers/net/plip.c +++ b/drivers/net/plip.c @@ -1,4 +1,4 @@ -/* $Id: plip.c,v 1.14 1995/09/18 04:57:24 gniibe Exp $ */ +/* $Id: plip.c,v 1.15 1995/10/03 01:47:09 gniibe Exp $ */ /* PLIP: A parallel port "network" driver for Linux. */ /* This driver is for parallel port with 5-bit cable (LapLink (R) cable). */ /* @@ -24,6 +24,8 @@ * Niibe Yutaka * - Module initialization. You can specify I/O addr and IRQ: * # insmod plip.o io=0x3bc irq=7 + * - MTU fix. + * - Make sure other end is OK, before sending a packet. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -45,6 +47,10 @@ * So, this PLIP can't communicate the PLIP of Linux v1.0. */ +/* + * To use with DOS box, please do (Turn on ARP switch): + * # ifconfig plip[0-2] arp + */ static const char *version = "NET3 PLIP version 2.1 gniibe@mri.co.jp\n"; /* @@ -534,7 +540,8 @@ plip_receive_packet(struct device *dev, struct net_local *nl, if (plip_receive(nibble_timeout, status_addr, &rcv->nibble, &rcv->length.b.msb)) return TIMEOUT; - if (rcv->length.h > dev->mtu || rcv->length.h < 8) { + if (rcv->length.h > dev->mtu + dev->hard_header_len + || rcv->length.h < 8) { printk("%s: bogus packet size %d.\n", dev->name, rcv->length.h); return ERROR; } @@ -672,6 +679,9 @@ plip_send_packet(struct device *dev, struct net_local *nl, switch (snd->state) { case PLIP_PK_TRIGGER: + if ((inb(PAR_STATUS(dev)) & 0xf8) != 0x80) + return TIMEOUT; + /* Trigger remote rx interrupt. */ outb(0x08, data_addr); cx = nl->trigger; @@ -894,7 +904,7 @@ plip_tx_packet(struct sk_buff *skb, struct device *dev) return 1; } - if (skb->len > dev->mtu) { + if (skb->len > dev->mtu + dev->hard_header_len) { printk("%s: packet too big, %d.\n", dev->name, (int)skb->len); dev->tbusy = 0; return 0; diff --git a/drivers/net/ppp.c b/drivers/net/ppp.c index b096ef5ecd27..ee910d74471a 100644 --- a/drivers/net/ppp.c +++ b/drivers/net/ppp.c @@ -746,7 +746,7 @@ static void ppp_write_wakeup(struct tty_struct *tty) ppp->stats.sbytes += actual; if (actual == count) { ppp->xtail = 0; - tty->flags &= ~TTY_DO_WRITE_WAKEUP; + tty->flags &= ~(1 << TTY_DO_WRITE_WAKEUP); ppp_output_done(ppp); } else { diff --git a/drivers/net/smc-ultra.c b/drivers/net/smc-ultra.c index ce2cb40c65a9..a78014352e22 100644 --- a/drivers/net/smc-ultra.c +++ b/drivers/net/smc-ultra.c @@ -66,8 +66,10 @@ int ultra_probe1(struct device *dev, int ioaddr); static int ultra_open(struct device *dev); static void ultra_reset_8390(struct device *dev); -static int ultra_block_input(struct device *dev, int count, - char *buf, int ring_offset); +static void ultra_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, + int ring_page); +static void ultra_block_input(struct device *dev, int count, + struct sk_buff *skb, int ring_offset); static void ultra_block_output(struct device *dev, int count, const unsigned char *buf, const start_page); static int ultra_close_card(struct device *dev); @@ -211,6 +213,7 @@ int ultra_probe1(struct device *dev, int ioaddr) ei_status.reset_8390 = &ultra_reset_8390; ei_status.block_input = &ultra_block_input; ei_status.block_output = &ultra_block_output; + ei_status.get_8390_hdr = &ultra_get_8390_hdr; dev->open = &ultra_open; dev->stop = &ultra_close_card; NS8390_init(dev, 0); @@ -253,44 +256,56 @@ ultra_reset_8390(struct device *dev) return; } +/* Grab the 8390 specific header. Similar to the block_input routine, but + we don't need to be concerned with ring wrap as the header will be at + the start of a page, so we optimize accordingly. */ + +static void +ultra_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, int ring_page) +{ + + unsigned long hdr_start = dev->mem_start + ((ring_page - START_PG)<<8); + + outb(ULTRA_MEMENB, dev->base_addr - ULTRA_NIC_OFFSET); /* shmem on */ + memcpy_fromio(hdr, hdr_start, sizeof(struct e8390_pkt_hdr)); + outb(0x00, dev->base_addr - ULTRA_NIC_OFFSET); /* shmem off */ +} + /* Block input and output are easy on shared memory ethercards, the only complication is when the ring buffer wraps. */ -static int -ultra_block_input(struct device *dev, int count, char *buf, int ring_offset) +static void +ultra_block_input(struct device *dev, int count, struct sk_buff *skb, int ring_offset) { - void *xfer_start = (void *)(dev->mem_start + ring_offset - - (START_PG<<8)); + unsigned long xfer_start = dev->mem_start + ring_offset - (START_PG<<8); /* Enable shared memory. */ outb(ULTRA_MEMENB, dev->base_addr - ULTRA_NIC_OFFSET); - if (xfer_start + count > (void*) dev->rmem_end) { + if (xfer_start + count > dev->rmem_end) { /* We must wrap the input move. */ - int semi_count = (void*)dev->rmem_end - xfer_start; - memcpy(buf, xfer_start, semi_count); + int semi_count = dev->rmem_end - xfer_start; + memcpy_fromio(skb->data, xfer_start, semi_count); count -= semi_count; - memcpy(buf + semi_count, (char *)dev->rmem_start, count); - outb(0x00, dev->base_addr - ULTRA_NIC_OFFSET); /* Disable memory. */ - return dev->rmem_start + count; + memcpy_fromio(skb->data + semi_count, dev->rmem_start, count); + } else { + /* Packet is in one chunk -- we can copy + cksum. */ + eth_io_copy_and_sum(skb, xfer_start, count, 0); } - memcpy(buf, xfer_start, count); - outb(0x00, dev->base_addr - ULTRA_NIC_OFFSET); /* Disable memory. */ - return ring_offset + count; + outb(0x00, dev->base_addr - ULTRA_NIC_OFFSET); /* Disable memory. */ } static void ultra_block_output(struct device *dev, int count, const unsigned char *buf, int start_page) { - unsigned char *shmem - = (unsigned char *)dev->mem_start + ((start_page - START_PG)<<8); + unsigned long shmem = dev->mem_start + ((start_page - START_PG)<<8); /* Enable shared memory. */ outb(ULTRA_MEMENB, dev->base_addr - ULTRA_NIC_OFFSET); - memcpy(shmem, buf, count); + memcpy_toio(shmem, buf, count); outb(0x00, dev->base_addr - ULTRA_NIC_OFFSET); /* Disable memory. */ } diff --git a/drivers/net/tulip.c b/drivers/net/tulip.c index 9b0f63e2ab5a..ec512f9b7779 100644 --- a/drivers/net/tulip.c +++ b/drivers/net/tulip.c @@ -696,7 +696,7 @@ set_multicast_list(struct device *dev, int num_addrs, void *addrs) short ioaddr = dev->base_addr; int csr6 = inl(ioaddr + CSR6) & ~0x00D5; - if (num_addrs > 15) { + if (num_addrs > 15 || num_addrs == -2) { /* Too many to filter perfectly -- accept all multicasts. */ outl(csr6 | 0x0080, ioaddr + CSR6); } else if (num_addrs < 0) { /* Set promiscuous. */ diff --git a/drivers/net/wd.c b/drivers/net/wd.c index 9be07c8e8d85..b7244df2c8f9 100644 --- a/drivers/net/wd.c +++ b/drivers/net/wd.c @@ -45,8 +45,10 @@ int wd_probe1(struct device *dev, int ioaddr); static int wd_open(struct device *dev); static void wd_reset_8390(struct device *dev); -static int wd_block_input(struct device *dev, int count, - char *buf, int ring_offset); +static void wd_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, + int ring_page); +static void wd_block_input(struct device *dev, int count, + struct sk_buff *skb, int ring_offset); static void wd_block_output(struct device *dev, int count, const unsigned char *buf, const start_page); static int wd_close_card(struct device *dev); @@ -261,6 +263,7 @@ int wd_probe1(struct device *dev, int ioaddr) ei_status.reset_8390 = &wd_reset_8390; ei_status.block_input = &wd_block_input; ei_status.block_output = &wd_block_output; + ei_status.get_8390_hdr = &wd_get_8390_hdr; dev->open = &wd_open; dev->stop = &wd_close_card; NS8390_init(dev, 0); @@ -316,40 +319,50 @@ wd_reset_8390(struct device *dev) return; } +/* Grab the 8390 specific header. Similar to the block_input routine, but + we don't need to be concerned with ring wrap as the header will be at + the start of a page, so we optimize accordingly. */ + +static void +wd_get_8390_hdr(struct device *dev, struct e8390_pkt_hdr *hdr, int ring_page) +{ + + int wd_cmdreg = dev->base_addr - WD_NIC_OFFSET; /* WD_CMDREG */ + unsigned long hdr_start = dev->mem_start + ((ring_page - WD_START_PG)<<8); + + /* We'll always get a 4 byte header read followed by a packet read, so + we enable 16 bit mode before the header, and disable after the body. */ + if (ei_status.word16) + outb(ISA16 | ei_status.reg5, wd_cmdreg+WD_CMDREG5); + + memcpy_fromio(hdr, hdr_start, sizeof(struct e8390_pkt_hdr)); +} + /* Block input and output are easy on shared memory ethercards, and trivial on the Western digital card where there is no choice of how to do it. The only complications are that the ring buffer wraps, and need to map switch between 8- and 16-bit modes. */ -static int -wd_block_input(struct device *dev, int count, char *buf, int ring_offset) +static void +wd_block_input(struct device *dev, int count, struct sk_buff *skb, int ring_offset) { int wd_cmdreg = dev->base_addr - WD_NIC_OFFSET; /* WD_CMDREG */ - long xfer_start = dev->mem_start + ring_offset - (WD_START_PG<<8); - - /* We'll always get a 4 byte header read followed by a packet read, so - we enable 16 bit mode before the header, and disable after the body. */ - if (count == 4) { - if (ei_status.word16) - outb(ISA16 | ei_status.reg5, wd_cmdreg+WD_CMDREG5); - ((int*)buf)[0] = ((int*)xfer_start)[0]; - return 0; - } + unsigned long xfer_start = dev->mem_start + ring_offset - (WD_START_PG<<8); if (xfer_start + count > dev->rmem_end) { /* We must wrap the input move. */ int semi_count = dev->rmem_end - xfer_start; - memcpy(buf, (char *)xfer_start, semi_count); + memcpy_fromio(skb->data, xfer_start, semi_count); count -= semi_count; - memcpy(buf + semi_count, (char *)dev->rmem_start, count); - } else - memcpy(buf, (char *)xfer_start, count); + memcpy_fromio(skb->data + semi_count, dev->rmem_start, count); + } else { + /* Packet is in one chunk -- we can copy + cksum. */ + eth_io_copy_and_sum(skb, xfer_start, count, 0); + } /* Turn off 16 bit access so that reboot works. ISA brain-damage */ if (ei_status.word16) outb(ei_status.reg5, wd_cmdreg+WD_CMDREG5); - - return 0; } static void @@ -363,10 +376,10 @@ wd_block_output(struct device *dev, int count, const unsigned char *buf, if (ei_status.word16) { /* Turn on and off 16 bit access so that reboot works. */ outb(ISA16 | ei_status.reg5, wd_cmdreg+WD_CMDREG5); - memcpy((char *)shmem, buf, count); + memcpy_toio(shmem, buf, count); outb(ei_status.reg5, wd_cmdreg+WD_CMDREG5); } else - memcpy((char *)shmem, buf, count); + memcpy_toio(shmem, buf, count); } diff --git a/drivers/scsi/53c7,8xx.c b/drivers/scsi/53c7,8xx.c index e478934faad3..725f6f2a69b0 100644 --- a/drivers/scsi/53c7,8xx.c +++ b/drivers/scsi/53c7,8xx.c @@ -1121,7 +1121,7 @@ NCR53c8x0_init_fixup (struct Scsi_Host *host) { * hostdata structure rather than in the RELATIVE area of the * SCRIPTS. */ - + patch_abs_rwri_data (hostdata->script, 0, dmode_memory_to_memory, tmp); patch_abs_rwri_data (hostdata->script, 0, dmode_memory_to_ncr, memory_to_ncr); diff --git a/drivers/scsi/eata_dma.c b/drivers/scsi/eata_dma.c index 5bfbd89e65f7..aede48fd6c37 100644 --- a/drivers/scsi/eata_dma.c +++ b/drivers/scsi/eata_dma.c @@ -881,7 +881,7 @@ short register_HBA(u32 base, struct get_conf *gc, Scsi_Host_Template * tpnt, } /* if gc->DMA_valid it must be an ISA HBA and we have to register it */ - dma_channel = 0xff; + dma_channel = BUSMASTER; if (gc->DMA_valid) { if (request_dma(dma_channel = (8 - gc->DMA_channel) & 7, "eata_dma")) { printk("Unable to allocate DMA channel %d for ISA HBA at %#.4x.\n", @@ -1025,8 +1025,9 @@ short register_HBA(u32 base, struct get_conf *gc, Scsi_Host_Template * tpnt, hd->channel = gc->MAX_CHAN; sh->max_channel = gc->MAX_CHAN; + sh->unique_id = base; sh->base = (char *) base; - sh->io_port = (u16) base; + sh->io_port = base; sh->n_io_port = 9; sh->irq = gc->IRQ; sh->dma_channel = dma_channel; @@ -1318,7 +1319,7 @@ int eata_detect(Scsi_Host_Template * tpnt) SD(HBA_ptr)->EATA_revision, (SD(HBA_ptr)->bustype == 'P')? "PCI ":(SD(HBA_ptr)->bustype == 'E')?"EISA":"ISA ", (u32) HBA_ptr->base, HBA_ptr->irq); - if(HBA_ptr->dma_channel != 0xff) + if(HBA_ptr->dma_channel != BUSMASTER) printk(" %2x ", HBA_ptr->dma_channel); else printk(" %s", "BMST"); diff --git a/drivers/scsi/eata_dma_proc.c b/drivers/scsi/eata_dma_proc.c index 5c486c5a3175..94a672c94706 100644 --- a/drivers/scsi/eata_dma_proc.c +++ b/drivers/scsi/eata_dma_proc.c @@ -90,6 +90,7 @@ int eata_proc_info(char *buffer, char **start, off_t offset, int length, int size, len = 0; off_t begin = 0; off_t pos = 0; + scd = NULL; HBA_ptr = first_HBA; for (i = 1; i <= registered_HBAs; i++) { @@ -135,8 +136,8 @@ int eata_proc_info(char *buffer, char **start, off_t offset, int length, len += size; pos = begin + len; - if(SD(HBA_ptr)->bustype == IS_EISA) { - if (HBA_ptr->dma_channel == 0xff) + if(SD(HBA_ptr)->broken_INQUIRY == TRUE) { + if (HBA_ptr->dma_channel == BUSMASTER) size = sprintf(buffer + len, "DMA: BUSMASTER\n"); else size = sprintf(buffer + len, "DMA: %d\n", HBA_ptr->dma_channel); @@ -146,6 +147,7 @@ int eata_proc_info(char *buffer, char **start, off_t offset, int length, size = sprintf(buffer + len, "Base IO : %#.4x\n", (u32) HBA_ptr->base); len += size; pos = begin + len; + size = sprintf(buffer + len, "Host Bus: EISA\n"); len += size; pos = begin + len; @@ -456,6 +458,7 @@ int eata_proc_info(char *buffer, char **start, off_t offset, int length, goto stop_output; } +#if 0 scd = scsi_devices; size = sprintf(buffer+len,"Attached devices: %s\n", (scd)?"":"none"); @@ -477,6 +480,7 @@ int eata_proc_info(char *buffer, char **start, off_t offset, int length, } scd = scd->next; } +#endif stop_output: DBG(DBG_PROC, printk("2pos: %ld offset: %ld len: %d\n", pos, offset, len)); diff --git a/drivers/scsi/eata_generic.h b/drivers/scsi/eata_generic.h index b429ae04b54e..a70042eb0051 100644 --- a/drivers/scsi/eata_generic.h +++ b/drivers/scsi/eata_generic.h @@ -40,6 +40,9 @@ #define BROKEN_INQUIRY 1 +#define BUSMASTER 0xff +#define PIO 0xfe + #define EATA_SIGNATURE 0x45415441 /* BIG ENDIAN coded "EATA" sig. */ #define EATA_CP_SIZE 44 diff --git a/drivers/scsi/eata_pio.c b/drivers/scsi/eata_pio.c index dbb857c4cdec..8ed9b14abe0c 100644 --- a/drivers/scsi/eata_pio.c +++ b/drivers/scsi/eata_pio.c @@ -783,11 +783,12 @@ int register_pio_HBA(long base, struct get_conf *gc, Scsi_Host_Template * tpnt) SD(sh)->hostid=gc->scsi_id[3]; SD(sh)->devflags=1<scsi_id[3]; SD(sh)->moresupport=gc->MORE_support; + sh->unique_id = base; sh->base = (char *) base; - sh->io_port = (ushort) base; + sh->io_port = base; sh->n_io_port = 8; sh->irq = gc->IRQ; - sh->dma_channel = 0xfe; /* PIO */ + sh->dma_channel = PIO; sh->this_id = gc->scsi_id[3]; sh->can_queue = 1; sh->cmd_per_lun = 1; diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c index ff250511a9bb..b84385cabbd6 100644 --- a/drivers/scsi/scsi_proc.c +++ b/drivers/scsi/scsi_proc.c @@ -265,8 +265,8 @@ void proc_print_scsidevice(Scsi_Device *scd, char *buffer, int *size, int len) int x, y = *size; y = sprintf(buffer + len, - "Channel: %02d Id: %02d Lun: %02d\n Vendor: ", - scd->channel, scd->id, scd->lun); + "Host: scsi%d Channel: %02d Id: %02d Lun: %02d\n Vendor: ", + scd->host->host_no, scd->channel, scd->id, scd->lun); for (x = 0; x < 8; x++) { if (scd->vendor[x] >= 0x20) y += sprintf(buffer + len + y, "%c", scd->vendor[x]); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 28b7ab2982b9..e58a127cd70e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1325,7 +1325,7 @@ int revalidate_scsidisk(kdev_t dev, int maxusage){ * Reset the blocksize for everything so that we can read * the partition table. */ - blksize_size[MAJOR_NR][i] = 1024; + blksize_size[MAJOR_NR][minor] = 1024; }; #ifdef MAYBE_REINIT diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c index dfa7df9bce39..297cae5a9301 100644 --- a/drivers/scsi/sr_ioctl.c +++ b/drivers/scsi/sr_ioctl.c @@ -158,7 +158,31 @@ int sr_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigne result = do_ioctl(target, sr_cmd, NULL, 255); return result; } + + case CDROMPLAYBLK: + { + struct cdrom_blk blk; + + err = verify_area (VERIFY_READ, (void *) arg, sizeof (blk)); + if (err) return err; + + memcpy_fromfs(&blk, (void *) arg, sizeof(blk)); + + sr_cmd[0] = SCMD_PLAYAUDIO10; + sr_cmd[1] = scsi_CDs[target].device->lun << 5; + sr_cmd[2] = blk.from >> 24; + sr_cmd[3] = blk.from >> 16; + sr_cmd[4] = blk.from >> 8; + sr_cmd[5] = blk.from; + sr_cmd[6] = 0; + sr_cmd[7] = blk.len >> 8; + sr_cmd[8] = blk.len; + sr_cmd[9] = 0; + result = do_ioctl(target, sr_cmd, NULL, 255); + return result; + } + case CDROMPLAYTRKIND: { struct cdrom_ti ti; diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index ac397e4fc51b..3821a8093535 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -11,7 +11,7 @@ Copyright 1992, 1993, 1994, 1995 Kai Makisara email Kai.Makisara@metla.fi - Last modified: Mon Sep 25 19:52:16 1995 by root@kai.makisara.fi + Last modified: Sat Sep 30 15:54:57 1995 by root@kai.makisara.fi Some small formal changes - aeb, 950809 */ #ifdef MODULE @@ -684,6 +684,8 @@ scsi_tape_close(struct inode * inode, struct file * filp) if (!SCpnt) return; + SCpnt->request.rq_status = RQ_INACTIVE; /* Mark as not busy */ + if ((STp->buffer)->last_result_fatal != 0) printk("st%d: Error on write filemark.\n", dev); else { @@ -693,7 +695,6 @@ scsi_tape_close(struct inode * inode, struct file * filp) if (STp->two_fm) back_over_eof(STp); } - SCpnt->request.rq_status = RQ_INACTIVE; /* Mark as not busy */ } #if DEBUG @@ -972,7 +973,7 @@ st_write(struct inode * inode, struct file * filp, const char * buf, int count) (STp->buffer)->writing, st_sleep_done, ST_TIMEOUT, MAX_WRITE_RETRIES); } - else + else if (SCpnt != NULL) SCpnt->request.rq_status = RQ_INACTIVE; /* Mark as not busy */ STp->at_sm &= (total != 0); @@ -1192,7 +1193,8 @@ st_read(struct inode * inode, struct file * filp, char * buf, int count) } else if (STp->eof != ST_NOEOF) { STp->eof_hit = 1; - SCpnt->request.rq_status = RQ_INACTIVE; /* Mark as not busy */ + if (SCpnt != NULL) + SCpnt->request.rq_status = RQ_INACTIVE; /* Mark as not busy */ if (total == 0 && STp->eof == ST_FM) { STp->eof = ST_NOEOF; STp->drv_block = 0; @@ -1211,7 +1213,8 @@ st_read(struct inode * inode, struct file * filp, char * buf, int count) } /* for (total = 0; total < count; ) */ - SCpnt->request.rq_status = RQ_INACTIVE; /* Mark as not busy */ + if (SCpnt != NULL) + SCpnt->request.rq_status = RQ_INACTIVE; /* Mark as not busy */ return total; } @@ -1734,7 +1737,7 @@ st_int_ioctl(struct inode * inode,struct file * file, st_ioctl(struct inode * inode,struct file * file, unsigned int cmd_in, unsigned long arg) { - int i, cmd, result; + int i, cmd_nr, cmd_type, result; struct mtop mtc; struct mtpos mt_pos; unsigned char scmd[10]; @@ -1759,10 +1762,10 @@ st_ioctl(struct inode * inode,struct file * file, return scsi_ioctl(STp->device, cmd_in, (void *) arg); } - cmd = cmd_in & IOCCMD_MASK; - if (cmd == (MTIOCTOP & IOCCMD_MASK)) { - - if (((cmd_in & IOCSIZE_MASK) >> IOCSIZE_SHIFT) != sizeof(mtc)) + cmd_type = _IOC_TYPE(cmd_in); + cmd_nr = _IOC_NR(cmd_in); + if (cmd_type == _IOC_TYPE(MTIOCTOP) && cmd_nr == _IOC_NR(MTIOCTOP)) { + if (_IOC_SIZE(cmd_in) != sizeof(mtc)) return (-EINVAL); i = verify_area(VERIFY_READ, (void *)arg, sizeof(mtc)); @@ -1816,9 +1819,9 @@ st_ioctl(struct inode * inode,struct file * file, else return st_int_ioctl(inode, file, mtc.mt_op, mtc.mt_count); } - else if (cmd == (MTIOCGET & IOCCMD_MASK)) { + else if (cmd_type == _IOC_TYPE(MTIOCGET) && cmd_nr == _IOC_NR(MTIOCGET)) { - if (((cmd_in & IOCSIZE_MASK) >> IOCSIZE_SHIFT) != sizeof(struct mtget)) + if (_IOC_SIZE(cmd_in) != sizeof(struct mtget)) return (-EINVAL); i = verify_area(VERIFY_WRITE, (void *)arg, sizeof(struct mtget)); if (i) @@ -1869,14 +1872,14 @@ st_ioctl(struct inode * inode,struct file * file, (STp->mt_status)->mt_erreg = 0; /* Clear after read */ return 0; } - else if (cmd == (MTIOCPOS & IOCCMD_MASK)) { + else if (cmd_type == _IOC_TYPE(MTIOCPOS) && cmd_nr == _IOC_NR(MTIOCPOS)) { if (STp->ready != ST_READY) return (-EIO); #if DEBUG if (debugging) printk("st%d: get tape position.\n", dev); #endif - if (((cmd_in & IOCSIZE_MASK) >> IOCSIZE_SHIFT) != sizeof(struct mtpos)) + if (_IOC_SIZE(cmd_in) != sizeof(struct mtpos)) return (-EINVAL); i = flush_buffer(inode, file, 0); diff --git a/drivers/sound/sound_switch.c b/drivers/sound/sound_switch.c index aa85c2a2c8ff..2aed8bd59b4c 100644 --- a/drivers/sound/sound_switch.c +++ b/drivers/sound/sound_switch.c @@ -58,9 +58,7 @@ put_status (char *s) { int l; - for (l = 0; l < 256, s[l]; l++); /* - * l=strlen(s); - */ + l = strnlen(s, 256); if (status_len + l >= 4000) return 0; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b491690ef19b..a97159fa60f6 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -61,9 +61,9 @@ extern int dump_fpu (elf_fpregset_t *); struct linux_binfmt elf_format = { #ifndef MODULE - NULL, NULL, load_elf_binary, load_elf_library, aout_core_dump + NULL, NULL, load_elf_binary, load_elf_library, elf_core_dump #else - NULL, &mod_use_count_, load_elf_binary, load_elf_library, aout_core_dump + NULL, &mod_use_count_, load_elf_binary, load_elf_library, elf_core_dump #endif }; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index e2c68e65571e..ec6c6106aad0 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -647,7 +647,7 @@ int ext2_remount (struct super_block * sb, int * flags, char * data) /* * Allow the "check" option to be passed as a remount option. */ - set_opt (sb->u.ext2_sb.s_mount_opt, CHECK_NORMAL); + set_opt (new_mount_opt, CHECK_NORMAL); if (!parse_options (data, &tmp, &resuid, &resgid, &new_mount_opt)) return -EINVAL; diff --git a/fs/read_write.c b/fs/read_write.c index 2d3c9cf2b1a3..75a713a7034e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -149,3 +150,92 @@ asmlinkage int sys_write(unsigned int fd,char * buf,unsigned int count) } return written; } + +/* + * OSF/1 (and SunOS) readv/writev emulation. + * + * NOTE! This is not really the way it should be done, + * but it should be good enough for TCP connections, + * notably X11 ;-) + */ +asmlinkage int sys_readv(unsigned long fd, const struct iovec * vector, long count) +{ + int retval; + struct file * file; + struct inode * inode; + + if (fd >= NR_OPEN || !(file = current->files->fd[fd]) || !(inode = file->f_inode)) + return -EBADF; + if (!(file->f_mode & 1)) + return -EBADF; + if (!file->f_op || !file->f_op->read) + return -EINVAL; + if (!count) + return 0; + retval = verify_area(VERIFY_READ, vector, count*sizeof(*vector)); + if (retval) + return retval; + + while (count > 0) { + void * base; + int len, nr; + + base = get_user(&vector->iov_base); + len = get_user(&vector->iov_len); + vector++; + count--; + nr = verify_area(VERIFY_WRITE, base, len); + if (!nr) + nr = file->f_op->read(inode, file, base, len); + if (nr < 0) { + if (retval) + return retval; + return nr; + } + retval += nr; + if (nr != len) + break; + } + return retval; +} + +asmlinkage int sys_writev(unsigned long fd, const struct iovec * vector, long count) +{ + int retval; + struct file * file; + struct inode * inode; + + if (fd >= NR_OPEN || !(file = current->files->fd[fd]) || !(inode = file->f_inode)) + return -EBADF; + if (!(file->f_mode & 2)) + return -EBADF; + if (!file->f_op || !file->f_op->write) + return -EINVAL; + if (!count) + return 0; + retval = verify_area(VERIFY_READ, vector, count*sizeof(*vector)); + if (retval) + return retval; + + while (count > 0) { + void * base; + int len, nr; + + base = get_user(&vector->iov_base); + len = get_user(&vector->iov_len); + vector++; + count--; + nr = verify_area(VERIFY_READ, base, len); + if (!nr) + nr = file->f_op->write(inode, file, base, len); + if (nr < 0) { + if (retval) + return retval; + return nr; + } + retval += nr; + if (nr != len) + break; + } + return retval; +} diff --git a/include/asm-alpha/checksum.h b/include/asm-alpha/checksum.h index bec01a76372f..4fd71c3ac106 100644 --- a/include/asm-alpha/checksum.h +++ b/include/asm-alpha/checksum.h @@ -55,4 +55,15 @@ unsigned int csum_partial_copy( const char *src, char *dst, int len, int sum); extern unsigned short ip_compute_csum(unsigned char * buff, int len); +/* + * Fold a partial checksum without adding pseudo headers + */ + +static inline unsigned short csum_fold(unsigned int sum) +{ + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return ~sum; +} + #endif diff --git a/include/asm-alpha/io.h b/include/asm-alpha/io.h index 2a62069628af..e5d92bf7d254 100644 --- a/include/asm-alpha/io.h +++ b/include/asm-alpha/io.h @@ -127,6 +127,14 @@ extern void outsl (unsigned long port, void *dst, unsigned long count); #define memcpy_fromio(to,from,len) (memcpy_fromio)((to),(unsigned long)(from),(len)) #define memcpy_toio(to,from,len) (memcpy_toio)((unsigned long)(to),(from),(len)) +/* + * XXX - We don't have csum_partial_copy_fromio() yet, so we cheat here and + * just copy it. The net code will then do the checksum later. Presently + * only used by some shared memory 8390 ethernet cards anyway. + */ + +#define eth_io_copy_and_sum(skb,src,len,unused) memcpy_fromio((skb)->data,(src),(len)) + #endif /* __KERNEL__ */ #endif diff --git a/include/asm-alpha/param.h b/include/asm-alpha/param.h index 03f57ecffad3..3189c3c2501d 100644 --- a/include/asm-alpha/param.h +++ b/include/asm-alpha/param.h @@ -1,8 +1,15 @@ #ifndef _ASMAXP_PARAM_H #define _ASMAXP_PARAM_H +#include + #ifndef HZ -#define HZ 1024 +# if defined(CONFIG_ALPHA_EB66) || defined(CONFIG_ALPHA_EB66P) || \ + defined(CONFIG_ALPHA_EB64) || defined(CONFIG_ALPHA_EB64P) +# define HZ 977 /* Evaluation Boards seem to be a little odd */ +# else +# define HZ 1024 /* normal value for Alpha systems */ +# endif #endif #define EXEC_PAGESIZE 8192 diff --git a/include/asm-alpha/unistd.h b/include/asm-alpha/unistd.h index 544b1a72e192..1100fc742cc2 100644 --- a/include/asm-alpha/unistd.h +++ b/include/asm-alpha/unistd.h @@ -77,6 +77,8 @@ #define __NR_gettimeofday 116 #define __NR_getrusage 117 #define __NR_getsockopt 118 +#define __NR_readv 120 +#define __NR_writev 121 #define __NR_settimeofday 122 #define __NR_fchown 123 #define __NR_fchmod 124 @@ -126,7 +128,7 @@ #define __NR_syslog 310 #define __NR_reboot 311 #define __NR_clone 312 - +#define __NR_uselib 313 #ifdef __LIBRARY__ diff --git a/include/asm-i386/bitops.h b/include/asm-i386/bitops.h index 8f9f58ff3aac..3c8bf5abb35b 100644 --- a/include/asm-i386/bitops.h +++ b/include/asm-i386/bitops.h @@ -13,6 +13,14 @@ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). */ +#include + +#ifdef CONFIG_SMP +#define LOCK_PREFIX "lock ; " +#else +#define LOCK_PREFIX "" +#endif + /* * Some hacks to defeat gcc over-optimizations.. */ @@ -23,7 +31,8 @@ extern __inline__ int set_bit(int nr, void * addr) { int oldbit; - __asm__ __volatile__("btsl %2,%1\n\tsbbl %0,%0" + __asm__ __volatile__(LOCK_PREFIX + "btsl %2,%1\n\tsbbl %0,%0" :"=r" (oldbit),"=m" (ADDR) :"ir" (nr)); return oldbit; @@ -33,7 +42,8 @@ extern __inline__ int clear_bit(int nr, void * addr) { int oldbit; - __asm__ __volatile__("btrl %2,%1\n\tsbbl %0,%0" + __asm__ __volatile__(LOCK_PREFIX + "btrl %2,%1\n\tsbbl %0,%0" :"=r" (oldbit),"=m" (ADDR) :"ir" (nr)); return oldbit; @@ -43,7 +53,8 @@ extern __inline__ int change_bit(int nr, void * addr) { int oldbit; - __asm__ __volatile__("btcl %2,%1\n\tsbbl %0,%0" + __asm__ __volatile__(LOCK_PREFIX + "btcl %2,%1\n\tsbbl %0,%0" :"=r" (oldbit),"=m" (ADDR) :"ir" (nr)); return oldbit; diff --git a/include/asm-i386/checksum.h b/include/asm-i386/checksum.h index bbac2d463028..cecc05c39bf2 100644 --- a/include/asm-i386/checksum.h +++ b/include/asm-i386/checksum.h @@ -103,20 +103,17 @@ static inline unsigned short int csum_tcpudp_magic(unsigned long saddr, * Fold a partial checksum without adding pseudo headers */ -static inline unsigned short int csum_fold(unsigned int sum) +static inline unsigned int csum_fold(unsigned int sum) { - __asm__(" - movl %0, %1 - shrl $16, %1 - addw %w1, %w0 - adcl $0, %0 - notl %0 - " - : "=&r" (sum) - : "0" (sum) - ); - return sum; - } + __asm__(" + addl %1, %0 + adcl $0xffff, %0 + " + : "=r" (sum) + : "r" (sum << 16), "0" (sum & 0xffff0000) + ); + return (~sum) >> 16; +} /* * this routine is used for miscellaneous IP-like checksums, mainly diff --git a/include/asm-i386/delay.h b/include/asm-i386/delay.h index e408db01b820..a0966aecb584 100644 --- a/include/asm-i386/delay.h +++ b/include/asm-i386/delay.h @@ -6,6 +6,10 @@ * * Delay routines, using a pre-computed "loops_per_second" value. */ + +#ifdef CONFIG_SMP +#include +#endif extern __inline__ void __delay(int loops) { @@ -27,8 +31,13 @@ extern __inline__ void udelay(unsigned long usecs) usecs *= 0x000010c6; /* 2**32 / 1000000 */ __asm__("mull %0" :"=d" (usecs) +#ifdef CONFIG_SMP + :"a" (usecs),"0" (cpu_data[smp_processor_id()].udelay_val) +#else :"a" (usecs),"0" (loops_per_sec) +#endif :"ax"); + __delay(usecs); } diff --git a/include/asm-i386/io.h b/include/asm-i386/io.h index 8a093ff6ccb6..98e32ce68783 100644 --- a/include/asm-i386/io.h +++ b/include/asm-i386/io.h @@ -77,6 +77,12 @@ extern inline void * phys_to_virt(unsigned long address) #define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c)) #define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c)) +/* + * Again, i386 does not require mem IO specific function. + */ + +#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d)) + /* * Talk about misusing macros.. */ diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h index 46893166ed6d..c30d17baf5a0 100644 --- a/include/asm-i386/irq.h +++ b/include/asm-i386/irq.h @@ -5,6 +5,8 @@ * linux/include/asm/irq.h * * (C) 1992, 1993 Linus Torvalds + * + * IRQ/IPI changes taken from work by Thomas Radke */ #include @@ -124,7 +126,64 @@ extern void enable_irq(unsigned int); #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) #define FAST_IRQ_NAME(nr) IRQ_NAME2(fast_IRQ##nr) #define BAD_IRQ_NAME(nr) IRQ_NAME2(bad_IRQ##nr) + +#ifdef CONFIG_SMP + +#define GET_PROCESSOR_ID \ + "movl "SYMBOL_NAME_STR(apic_reg)", %edx\n\t" \ + "movl 32(%edx), %eax\n\t" \ + "shrl $24,%eax\n\t" \ + "andb $0x0F,%al\n" + +#define ENTER_KERNEL \ + "pushl %eax\n\t" \ + "pushl %edx\n\t" \ + "pushfl\n\t" \ + "cli\n\t" \ + GET_PROCESSOR_ID \ + "1: " \ + "lock\n\t" \ + "btsl $0, "SYMBOL_NAME_STR(kernel_flag)"\n\t" \ + "jnc 3f\n\t" \ + "cmpb "SYMBOL_NAME_STR(active_kernel_processor)", %al\n\t" \ + "je 4f\n\t" \ + "2: " \ + "incl "SYMBOL_NAME_STR(smp_spins)"\n\t" \ + "btl %al, "SYMBOL_NAME_STR(smp_invalidate_needed)"\n\t" \ + "jnc 5f\n\t" \ + "lock\n\t" \ + "btrl %al, "SYMBOL_NAME_STR(smp_invalidate_needed)"\n\t" \ + "jnc 5f\n\t" \ + "movl %cr3,%edx\n\t" \ + "movl %edx,%cr3\n" \ + "5: btl $0, "SYMBOL_NAME_STR(kernel_flag)"\n\t" \ + "jc 2b\n\t" \ + "jmp 1b\n\t" \ + "3: " \ + "movb %al, "SYMBOL_NAME_STR(active_kernel_processor)"\n\t" \ + "4: " \ + "incl "SYMBOL_NAME_STR(kernel_counter)"\n\t" \ + "popfl\n\t" \ + "popl %edx\n\t" \ + "popl %eax\n\t" + +#define LEAVE_KERNEL \ + "pushfl\n\t" \ + "cli\n\t" \ + "decl "SYMBOL_NAME_STR(kernel_counter)"\n\t" \ + "jnz 1f\n\t" \ + "movb $" STR (NO_PROC_ID) ", "SYMBOL_NAME_STR(active_kernel_processor)"\n\t" \ + "lock\n\t" \ + "btrl $0, "SYMBOL_NAME_STR(kernel_flag)"\n\t" \ + "1: " \ + "popfl\n\t" + +/* + * the syscall count inc is a gross hack because ret_from_syscall is used by both irq and + * syscall return paths (urghh). + */ + #define BUILD_IRQ(chip,nr,mask) \ asmlinkage void IRQ_NAME(nr); \ asmlinkage void FAST_IRQ_NAME(nr); \ @@ -134,6 +193,7 @@ __asm__( \ SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \ "pushl $-"#nr"-2\n\t" \ SAVE_ALL \ + ENTER_KERNEL \ ACK_##chip(mask) \ "incl "SYMBOL_NAME_STR(intr_count)"\n\t"\ "sti\n\t" \ @@ -145,10 +205,12 @@ SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \ "cli\n\t" \ UNBLK_##chip(mask) \ "decl "SYMBOL_NAME_STR(intr_count)"\n\t" \ + "incl "SYMBOL_NAME_STR(syscall_count)"\n\t" \ "jmp ret_from_sys_call\n" \ "\n"__ALIGN_STR"\n" \ SYMBOL_NAME_STR(fast_IRQ) #nr "_interrupt:\n\t" \ SAVE_MOST \ + ENTER_KERNEL \ ACK_##chip(mask) \ "incl "SYMBOL_NAME_STR(intr_count)"\n\t" \ "pushl $" #nr "\n\t" \ @@ -157,11 +219,120 @@ SYMBOL_NAME_STR(fast_IRQ) #nr "_interrupt:\n\t" \ "cli\n\t" \ UNBLK_##chip(mask) \ "decl "SYMBOL_NAME_STR(intr_count)"\n\t" \ + LEAVE_KERNEL \ RESTORE_MOST \ "\n"__ALIGN_STR"\n" \ SYMBOL_NAME_STR(bad_IRQ) #nr "_interrupt:\n\t" \ SAVE_MOST \ + ENTER_KERNEL \ ACK_##chip(mask) \ + LEAVE_KERNEL \ RESTORE_MOST); + + +/* + * Message pass must be a fast IRQ.. + */ +#define BUILD_MSGIRQ(chip,nr,mask) \ +asmlinkage void IRQ_NAME(nr); \ +asmlinkage void FAST_IRQ_NAME(nr); \ +asmlinkage void BAD_IRQ_NAME(nr); \ +__asm__( \ +"\n"__ALIGN_STR"\n" \ +SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \ + "pushl $-"#nr"-2\n\t" \ + SAVE_ALL \ + ENTER_KERNEL \ + ACK_##chip(mask) \ + "incl "SYMBOL_NAME_STR(intr_count)"\n\t"\ + "sti\n\t" \ + "movl %esp,%ebx\n\t" \ + "pushl %ebx\n\t" \ + "pushl $" #nr "\n\t" \ + "call "SYMBOL_NAME_STR(do_IRQ)"\n\t" \ + "addl $8,%esp\n\t" \ + "cli\n\t" \ + UNBLK_##chip(mask) \ + "decl "SYMBOL_NAME_STR(intr_count)"\n\t" \ + "incl "SYMBOL_NAME_STR(syscall_count)"\n\t" \ + "jmp ret_from_sys_call\n" \ +"\n"__ALIGN_STR"\n" \ +SYMBOL_NAME_STR(fast_IRQ) #nr "_interrupt:\n\t" \ + SAVE_MOST \ + ACK_##chip(mask) \ + "incl "SYMBOL_NAME_STR(ipi_count)"\n\t" \ + "pushl $" #nr "\n\t" \ + "call "SYMBOL_NAME_STR(do_fast_IRQ)"\n\t" \ + "addl $4,%esp\n\t" \ + "cli\n\t" \ + UNBLK_##chip(mask) \ + RESTORE_MOST \ +"\n"__ALIGN_STR"\n" \ +SYMBOL_NAME_STR(bad_IRQ) #nr "_interrupt:\n\t" \ + SAVE_MOST \ + ACK_##chip(mask) \ + RESTORE_MOST); + +#define BUILD_RESCHEDIRQ(nr) \ +asmlinkage void IRQ_NAME(nr); \ +__asm__( \ +"\n"__ALIGN_STR"\n" \ +SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \ + "pushl $-"#nr"-2\n\t" \ + SAVE_ALL \ + ENTER_KERNEL \ + "incl "SYMBOL_NAME_STR(intr_count)"\n\t"\ + "sti\n\t" \ + "movl %esp,%ebx\n\t" \ + "pushl %ebx\n\t" \ + "pushl $" #nr "\n\t" \ + "call "SYMBOL_NAME_STR(smp_reschedule_irq)"\n\t" \ + "addl $8,%esp\n\t" \ + "cli\n\t" \ + "decl "SYMBOL_NAME_STR(intr_count)"\n\t" \ + "incl "SYMBOL_NAME_STR(syscall_count)"\n\t" \ + "jmp ret_from_sys_call\n"); +#else + +#define BUILD_IRQ(chip,nr,mask) \ +asmlinkage void IRQ_NAME(nr); \ +asmlinkage void FAST_IRQ_NAME(nr); \ +asmlinkage void BAD_IRQ_NAME(nr); \ +__asm__( \ +"\n"__ALIGN_STR"\n" \ +SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \ + "pushl $-"#nr"-2\n\t" \ + SAVE_ALL \ + ACK_##chip(mask) \ + "incl "SYMBOL_NAME_STR(intr_count)"\n\t"\ + "sti\n\t" \ + "movl %esp,%ebx\n\t" \ + "pushl %ebx\n\t" \ + "pushl $" #nr "\n\t" \ + "call "SYMBOL_NAME_STR(do_IRQ)"\n\t" \ + "addl $8,%esp\n\t" \ + "cli\n\t" \ + UNBLK_##chip(mask) \ + "decl "SYMBOL_NAME_STR(intr_count)"\n\t" \ + "jmp ret_from_sys_call\n" \ +"\n"__ALIGN_STR"\n" \ +SYMBOL_NAME_STR(fast_IRQ) #nr "_interrupt:\n\t" \ + SAVE_MOST \ + ACK_##chip(mask) \ + "incl "SYMBOL_NAME_STR(intr_count)"\n\t" \ + "pushl $" #nr "\n\t" \ + "call "SYMBOL_NAME_STR(do_fast_IRQ)"\n\t" \ + "addl $4,%esp\n\t" \ + "cli\n\t" \ + UNBLK_##chip(mask) \ + "decl "SYMBOL_NAME_STR(intr_count)"\n\t" \ + RESTORE_MOST \ +"\n"__ALIGN_STR"\n" \ +SYMBOL_NAME_STR(bad_IRQ) #nr "_interrupt:\n\t" \ + SAVE_MOST \ + ACK_##chip(mask) \ + RESTORE_MOST); + +#endif #endif diff --git a/include/asm-i386/locks.h b/include/asm-i386/locks.h new file mode 100644 index 000000000000..eee51dcdabcc --- /dev/null +++ b/include/asm-i386/locks.h @@ -0,0 +1,133 @@ +/* + * SMP locks primitives for building ix86 locks + * (not yet used). + * + * Alan Cox, alan@cymru.net, 1995 + */ + +/* + * This would be much easier but far less clear and easy + * to borrow for other processors if it was just assembler. + */ + +extern __inline__ void prim_spin_lock(struct spinlock *sp) +{ + int processor=smp_processor_id(); + + /* + * Grab the lock bit + */ + + while(lock_set_bit(0,&sp->lock)) + { + /* + * Failed, but thats cos we own it! + */ + + if(sp->cpu==processor) + { + sp->users++; + return 0; + } + /* + * Spin in the cache S state if possible + */ + while(sp->lock) + { + /* + * Wait for any invalidates to go off + */ + + if(smp_invalidate_needed&(1<spins++; + } + /* + * Someone wrote the line, we go 'I' and get + * the cache entry. Now try and regrab + */ + } + sp->users++;sp->cpu=processor; + return 1; +} + +/* + * Release a spin lock + */ + +extern __inline__ int prim_spin_unlock(struct spinlock *sp) +{ + /* This is safe. The decrement is still guarded by the lock. A multilock would + not be safe this way */ + if(!--sp->users) + { + lock_clear_bit(0,&sp->lock);sp->cpu= NO_PROC_ID; + return 1; + } + return 0; +} + + +/* + * Non blocking lock grab + */ + +extern __inline__ int prim_spin_lock_nb(struct spinlock *sp) +{ + if(lock_set_bit(0,&sp->lock)) + return 0; /* Locked already */ + sp->users++; + return 1; /* We got the lock */ +} + + +/* + * These wrap the locking primtives up for usage + */ + +extern __inline__ void spinlock(struct spinlock *sp) +{ + if(sp->prioritylock_order) + panic("lock order violation: %s (%d)\n", sp->name, current->lock_order); + if(prim_spin_lock(sp)) + { + /* + * We got a new lock. Update the priority chain + */ + sp->oldpri=current->lock_order; + current->lock_order=sp->priority; + } +} + +extern __inline__ void spinunlock(struct spinlock *sp) +{ + if(current->lock_order!=sp->priority) + panic("lock release order violation %s (%d)\n", sp->name, current->lock_order); + if(prim_spin_unlock(sp)) + { + /* + * Update the debugging lock priority chain. We dumped + * our last right to the lock. + */ + current->lock_order=sp->oldpri; + } +} + +extern __inline__ void spintestlock(struct spinlock *sp) +{ + /* + * We do no sanity checks, its legal to optimistically + * get a lower lock. + */ + prim_spin_lock_nb(sp); +} + +extern __inline__ void spintestunlock(struct spinlock *sp) +{ + /* + * A testlock doesnt update the lock chain so we + * must not update it on free + */ + prim_spin_unlock(sp); +} diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index 88e548e8a342..b8000f5f0b50 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -60,6 +60,8 @@ typedef unsigned long pgprot_t; * * ..but the i386 has somewhat limited invalidation capabilities. */ + +#ifndef CONFIG_SMP #define invalidate() \ __asm__ __volatile__("movl %%cr3,%%eax\n\tmovl %%eax,%%cr3": : :"ax") @@ -69,6 +71,14 @@ do { if ((task)->mm == current->mm) invalidate(); } while (0) #define invalidate_page(task,addr) \ do { if ((task)->mm == current->mm) invalidate(); } while (0) +#else +#include +#define local_invalidate() \ +__asm__ __volatile__("movl %%cr3,%%eax\n\tmovl %%eax,%%cr3": : :"ax") +#define invalidate() \ + smp_invalidate(); +#endif + /* Certain architectures need to do special things when pte's * within a page table are directly modified. Thus, the following * hook is made available. diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index edea375e12b7..f107729bba7c 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -1,6 +1,7 @@ #ifndef __ASM_SMP_H #define __ASM_SMP_H +#ifdef CONFIG_SMP #ifndef ASSEMBLY #include @@ -165,7 +166,7 @@ struct cpuinfo_x86 }; -extern struct cpuinfo_x86 cpu_data[NR_PROCS]; +extern struct cpuinfo_x86 cpu_data[NR_CPUS]; /* * Private routines/data @@ -174,7 +175,7 @@ extern struct cpuinfo_x86 cpu_data[NR_PROCS]; extern void smp_scan_config(unsigned long, unsigned long); extern unsigned long smp_alloc_memory(unsigned long mem_base); extern unsigned char *apic_reg; -extern unsigned char *kernel_stacks[NR_PROCS]; +extern unsigned char *kernel_stacks[NR_CPUS]; extern unsigned char boot_cpu_id; extern unsigned long cpu_present_map; extern void smp_invalidate(void); @@ -227,5 +228,5 @@ extern __inline int smp_processor_id(void) #endif /* !ASSEMBLY */ #define NO_PROC_ID 0xFF /* No processor magic marker */ - +#endif #endif diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h index 21a412618b60..b27c24e0e0e1 100644 --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -47,9 +47,62 @@ __asm__("str %%ax\n\t" \ * * It also reloads the debug regs if necessary.. */ + + +#ifdef CONFIG_SMP + /* + * Keep the lock depth straight. If we switch on an interrupt from + * kernel->user task we need to lose a depth, and if we switch the + * other way we need to gain a depth. Same layer switches come out + * the same. + * + * We spot a switch in user mode because the kernel counter is the + * same as the interrupt counter depth. (We never switch during the + * message/invalidate IPI). + * + * We fsave/fwait so that an exception goes off at the right time + * (as a call from the fsave or fwait in effect) rather than to + * the wrong process. + */ + +#define switch_to(tsk) do { \ + cli();\ + if(current->flags&PF_USEDFPU) \ + { \ + __asm__ __volatile__("fnsave %0":"=m" (current->tss.i387.hard)); \ + __asm__ __volatile__("fwait"); \ + current->flags&=~PF_USEDFPU; \ + } \ + current->lock_depth=syscall_count; \ + kernel_counter+=next->lock_depth-current->lock_depth; \ + syscall_count=next->lock_depth; \ +__asm__("pushl %%edx\n\t" \ + "movl "SYMBOL_NAME_STR(apic_reg)",%%edx\n\t" \ + "movl 0x20(%%edx), %%edx\n\t" \ + "shrl $22,%%edx\n\t" \ + "and $0x3C,%%edx\n\t" \ + "xchgl %%ecx,"SYMBOL_NAME_STR(current_set)"(,%%edx)\n\t" \ + "popl %%edx\n\t" \ + "ljmp %0\n\t" \ + "sti\n\t" \ + : /* no output */ \ + :"m" (*(((char *)&tsk->tss.tr)-4)), \ + "c" (tsk) \ + :"cx"); \ + /* Now maybe reload the debug registers */ \ + if(current->debugreg[7]){ \ + loaddebug(0); \ + loaddebug(1); \ + loaddebug(2); \ + loaddebug(3); \ + loaddebug(6); \ + } \ +} while (0) + +#else #define switch_to(tsk) do { \ __asm__("cli\n\t" \ - "xchgl %%ecx,"SYMBOL_NAME_STR(current)"\n\t" \ + "xchgl %%ecx,"SYMBOL_NAME_STR(current_set)"\n\t" \ "ljmp %0\n\t" \ "sti\n\t" \ "cmpl %%ecx,"SYMBOL_NAME_STR(last_task_used_math)"\n\t" \ @@ -69,6 +122,7 @@ __asm__("cli\n\t" \ loaddebug(6); \ } \ } while (0) +#endif #define _set_base(addr,base) \ __asm__("movw %%dx,%0\n\t" \ diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index 7475e66d5280..57c109c69410 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -150,6 +150,8 @@ #define __NR__newselect 142 #define __NR_flock 143 #define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 /* XXX - _foo needs to be __foo, while __NR_bar could be _NR_bar. */ #define _syscall0(type,name) \ diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index 1541e0fdcb20..67ae6641b7f6 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -65,6 +65,13 @@ * CDROM IOCTL structures */ +struct cdrom_blk +{ + unsigned from; + unsigned short len; +}; + + struct cdrom_msf { u_char cdmsf_min0; /* start minute */ @@ -285,7 +292,10 @@ struct cdrom_multisession #define CDROMREADCOOKED 0x5315 /* read data in cooked mode */ #define CDROMSEEK 0x5316 /*seek msf address*/ - +/* + * for playing audio in logical block addressing mode + */ +#define CDROMPLAYBLK 0x5317 /* (struct cdrom_blk) */ /* * CD-ROM-specific SCSI command opcodes diff --git a/include/linux/igmp.h b/include/linux/igmp.h index a749dc947eaa..7af40081f09e 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -42,6 +42,7 @@ struct igmphdr #define IGMP_MTRACE_RESP 0x1e #define IGMP_MTRACE 0x1f + /* * Use the BSD names for these for compatibility */ diff --git a/include/linux/mroute.h b/include/linux/mroute.h index a68f44667c3b..8943510c619a 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -1,5 +1,6 @@ #ifndef __LINUX_MROUTE_H #define __LINUX_MROUTE_H + /* * Based on the MROUTING 3.5 defines primarily to keep * source compatibility with BSD. @@ -117,19 +118,53 @@ extern int ip_mroute_setsockopt(struct sock *, int, char *, int); extern int ip_mroute_getsockopt(struct sock *, int, char *, int *); extern int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg); extern void mroute_close(struct sock *sk); +extern void ipmr_forward(struct sk_buff *skb, int is_frag); struct vif_device { - struct device *dev; /* Device we are using */ - struct route *rt_cache; /* Tunnel route cache */ - unsigned long bytes_in,bytes_out; - unsigned long pkt_in,pkt_out; /* Statistics */ - unsigned long rate_limit; /* Traffic shaping (NI) */ - unsigned char threshold; /* TTL threshold */ - unsigned short flags; /* Control flags */ - unsigned long local,remote; /* Addresses (remote for tunnels) */ + struct device *dev; /* Device we are using */ + struct route *rt_cache; /* Tunnel route cache */ + unsigned long bytes_in,bytes_out; + unsigned long pkt_in,pkt_out; /* Statistics */ + unsigned long rate_limit; /* Traffic shaping (NI) */ + unsigned char threshold; /* TTL threshold */ + unsigned short flags; /* Control flags */ + unsigned long local,remote; /* Addresses(remote for tunnels)*/ +}; + +struct mfc_cache +{ + struct mfc_cache *next; /* Next entry on cache line */ + __u32 mfc_mcastgrp; /* Group the entry belongs to */ + __u32 mfc_origin; /* Source of packet */ + vifi_t mfc_parent; /* Source interface */ + struct timer_list mfc_timer; /* Expiry timer */ + int mfc_flags; /* Flags on line */ + struct sk_buff_head mfc_unresolved; /* Unresolved buffers */ + int mfc_queuelen; /* Unresolved buffer counter */ + unsigned char mfc_ttls[MAXVIFS]; /* TTL thresholds */ }; +#define MFC_QUEUED 1 +#define MFC_RESOLVED 2 + + +#define MFC_LINES 64 + +#ifdef __BIG_ENDIAN +#define MFC_HASH(a,b) ((((a)>>24)^((b)>>26))&(MFC_LINES-1)) +#else +#define MFC_HASH(a,b) (((a)^((b)>>2))&(MFC_LINES-1)) +#endif + #endif + +/* + * Pseudo messages used by mrouted + */ + +#define IGMPMSG_NOCACHE 1 /* Kernel cache fill request to mrouted */ +#define IGMPMSG_WRONGVIF 2 /* For PIM assert processing (unused) */ + #endif diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index ec0e17f274c3..020a338b85ff 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -67,6 +67,8 @@ enum net_directory_inos { PROC_NET_SNMP, PROC_NET_RARP, PROC_NET_IGMP, + PROC_NET_IPMR_VIF, + PROC_NET_IPMR_MFC, PROC_NET_IPFWFWD, PROC_NET_IPFWBLK, PROC_NET_IPACCT, diff --git a/include/linux/sched.h b/include/linux/sched.h index 45e9078b09c9..df82748fe3fb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1,6 +1,8 @@ #ifndef _LINUX_SCHED_H #define _LINUX_SCHED_H +#include + /* * define DEBUG if you want the wait-queues to have some extra * debugging code. It's not normally used, but might catch some @@ -21,6 +23,8 @@ extern unsigned long event; #include #include +#include + /* * cloning flags: */ @@ -215,6 +219,10 @@ struct task_struct { struct mm_struct *mm; /* signal handlers */ struct signal_struct *sig; +#ifdef CONFIG_SMP + int processor; + int lock_depth; /* Lock depth. We can context swithc in and out of holding a syscall kernel lock... */ +#endif }; /* @@ -228,6 +236,8 @@ struct task_struct { #define PF_STARTING 0x00000100 /* being created */ #define PF_EXITING 0x00000200 /* getting shut down */ +#define PF_USEDFPU 0x00100000 /* Process used the FPU this quantum (SMP only) */ + /* * Limit the stack by to some sane default: root can always * increase this limit if needed.. 8MB seems reasonable. @@ -275,7 +285,12 @@ extern struct mm_struct init_mm; extern struct task_struct init_task; extern struct task_struct *task[NR_TASKS]; extern struct task_struct *last_task_used_math; -extern struct task_struct *current; +extern struct task_struct *current_set[NR_CPUS]; +/* + * On a single processor system this comes out as current_set[0] when cpp + * has finished with it, which gcc will optimise away. + */ +#define current (current_set[smp_processor_id()]) /* Current on this processor */ extern unsigned long volatile jiffies; extern unsigned long itimer_ticks; extern unsigned long itimer_next; diff --git a/include/linux/smp.h b/include/linux/smp.h index 82cd6801eeb2..0d5dff3c0393 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -6,6 +6,7 @@ * Alan Cox. */ +#ifdef CONFIG_SMP #include @@ -28,4 +29,15 @@ extern volatile int smp_msg_id; #define MSG_STOP_CPU 0x0002 /* Sent to shut down slave CPU's when rebooting */ #define MSG_RESCHEDULE 0x0003 /* Reschedule request from master CPU */ +#else + +/* + * These macros fold the SMP functionality into a single CPU system + */ + +#define smp_num_cpus 1 +#define smp_processor_id() 0 +#define smp_message_pass(t,m,d,w) +#define smp_threads_ready 1 +#endif #endif diff --git a/include/linux/tasks.h b/include/linux/tasks.h index 4926c0e7bb08..e97409921d1e 100644 --- a/include/linux/tasks.h +++ b/include/linux/tasks.h @@ -5,7 +5,11 @@ * This is the maximum nr of tasks - change it if you need to */ -#define NR_PROCS 32 /* Max processors that can be running */ +#ifdef CONFIG_SMP +#define NR_CPUS 32 /* Max processors that can be running in SMP */ +#else +#define NR_CPUS 1 +#endif #define NR_TASKS 512 diff --git a/include/net/netlink.h b/include/net/netlink.h new file mode 100644 index 000000000000..58c811495ae6 --- /dev/null +++ b/include/net/netlink.h @@ -0,0 +1,13 @@ +#define NET_MAJOR 18 /* Major 18 is reserved for networking */ +#define MAX_LINKS 3 /* 18,0 for route updates, 18,1 for SKIP */ +#define MAX_QBYTES 32768 /* Maximum bytes in the queue */ + +extern int netlink_attach(int unit, int (*function)(struct sk_buff *skb)); +extern void netlink_detach(int unit); +extern int netlink_post(int unit, struct sk_buff *skb); +extern void init_netlink(void); + +#define NETLINK_ROUTE 0 /* Routing/device hook */ +#define NETLINK_SKIP 1 /* Reserved for ENskip */ +#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ + diff --git a/init/main.c b/init/main.c index 2aaffc7adc8e..135132809bc7 100644 --- a/init/main.c +++ b/init/main.c @@ -293,11 +293,14 @@ unsigned long loops_per_sec = (1<<12); better than 1% */ #define LPS_PREC 8 -static void calibrate_delay(void) +void calibrate_delay(void) { int ticks; int loopbit; int lps_precision = LPS_PREC; +#ifdef CONFIG_SMP + loops_per_sec = (1<<12); +#endif printk("Calibrating delay loop.. "); while (loops_per_sec <<= 1) { @@ -418,9 +421,76 @@ extern void setup_arch(char **, unsigned long *, unsigned long *); static char init_stack[PAGE_SIZE]; +#ifdef CONFIG_SMP +/* + * Activate a secondary processor. + */ + +asmlinkage void start_secondary(void) +{ + trap_init(); + init_IRQ(); + smp_callin(); + for(;;) + idle(); +} + +/* + * Called by CPU#0 to activate the rest. + */ + +static void smp_init(void) +{ + int i=0; + smp_boot_cpus(); + + /* + * Create the slave init tasks. At this point + * fork will create them all ask task 0 + */ + + for(i=1;iprocessor=i; + } + smp_threads_ready=1; + smp_commence(); +} + +#endif + +/* + * Activate the first processor. + */ + asmlinkage void start_kernel(void) { char * command_line; + +/* + * This little check will move. + */ + +#ifdef CONFIG_SMP + static int first_cpu=1; + + if(!first_cpu) + start_secondary(); + first_cpu=0; + +#endif /* * Interrupts are still disabled. Do necessary setups, then * enable them @@ -477,7 +547,9 @@ asmlinkage void start_kernel(void) check_bugs(); printk(linux_banner); - +#ifdef CONFIG_SMP + smp_init(); +#endif /* we count on the clone going ok */ if (!clone(CLONE_VM, init_stack+sizeof(init_stack))) init(); diff --git a/kernel/exit.c b/kernel/exit.c index 8acab682b52f..60721b34a45f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -6,6 +6,7 @@ #undef DEBUG_PROC_TREE +#include #include #include #include @@ -72,7 +73,7 @@ int send_sig(unsigned long sig,struct task_struct * p,int priv) void notify_parent(struct task_struct * tsk) { - if (tsk->p_pptr == task[1]) + if (tsk->p_pptr == task[smp_num_cpus]) /* Init */ tsk->exit_signal = SIGCHLD; send_sig(tsk->exit_signal, tsk->p_pptr, 1); wake_up_interruptible(&tsk->p_pptr->wait_chldexit); @@ -349,8 +350,8 @@ static void forget_original_parent(struct task_struct * father) for_each_task(p) { if (p->p_opptr == father) - if (task[1]) - p->p_opptr = task[1]; + if (task[smp_num_cpus]) /* init */ + p->p_opptr = task[smp_num_cpus]; else p->p_opptr = task[0]; } @@ -457,8 +458,8 @@ static void exit_notify(void) current->p_cptr = p->p_osptr; p->p_ysptr = NULL; p->flags &= ~(PF_PTRACED|PF_TRACESYS); - if (task[1] && task[1] != current) - p->p_pptr = task[1]; + if (task[smp_num_cpus] && task[smp_num_cpus] != current) /* init */ + p->p_pptr = task[smp_num_cpus]; else p->p_pptr = task[0]; p->p_osptr = p->p_pptr->p_cptr; diff --git a/kernel/fork.c b/kernel/fork.c index 04e65dd77d99..f53d5e9cbc96 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -11,6 +11,7 @@ * management can be a bitch. See 'mm/mm.c': 'copy_page_tables()' */ +#include #include #include #include @@ -20,6 +21,7 @@ #include #include #include +#include #include #include @@ -40,15 +42,17 @@ static int find_empty_process(void) return -EAGAIN; } repeat: - if ((++last_pid) & 0xffff8000) - last_pid=1; + if(smp_threads_ready) { + if ((++last_pid) & 0xffff8000) + last_pid=1; + } this_user_tasks = 0; for_each_task (p) { if (p->uid == current->uid) this_user_tasks++; - if (p->pid == last_pid || + if (smp_threads_ready && (p->pid == last_pid || p->pgrp == last_pid || - p->session == last_pid) + p->session == last_pid)) goto repeat; } if (this_user_tasks > current->rlim[RLIMIT_NPROC].rlim_cur) @@ -224,6 +228,10 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs) p->tty_old_pgrp = 0; p->utime = p->stime = 0; p->cutime = p->cstime = 0; +#ifdef CONFIG_SMP + p->processor = NO_PROC_ID; + p->lock_depth = 1; +#endif p->start_time = jiffies; task[nr] = p; SET_LINKS(p); diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 1378d75c8bfc..2a97fff86bfd 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -273,7 +273,10 @@ struct symbol_table symbol_table = { X(sleep_on), X(interruptible_sleep_on), X(schedule), - X(current), + X(current_set), +#if defined(__i386__) && defined(CONFIG_SMP) + X(apic_reg), /* Needed internally for the I386 inlines */ +#endif X(jiffies), X(xtime), X(loops_per_sec), diff --git a/kernel/sched.c b/kernel/sched.c index 0d7609e95c11..f71cb35d8fdf 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -88,7 +89,7 @@ struct task_struct init_task = INIT_TASK; unsigned long volatile jiffies=0; -struct task_struct *current = &init_task; +struct task_struct *current_set[NR_CPUS]; struct task_struct *last_task_used_math = NULL; struct task_struct * task[NR_TASKS] = {&init_task, }; @@ -181,6 +182,12 @@ asmlinkage void schedule(void) struct task_struct * p; struct task_struct * next; unsigned long timeout = 0; + +#ifdef CONFIG_SMP_DEBUG + int proc=smp_processor_id(); + if(active_kernel_processor!=proc) + panic("active kernel processor set wrongly! %d not %d\n", active_kernel_processor,proc); +#endif /* check alarm, wake up any interruptible tasks that have got a signal */ @@ -210,6 +217,13 @@ asmlinkage void schedule(void) } p = init_task.next_run; sti(); + +#ifdef CONFIG_SMP + /* + * This is safe as we do not permit re-entry of schedule() + */ + current->processor = NO_PROC_ID; +#endif /* * Note! there may appear new tasks on the run-queue during this, as @@ -220,6 +234,13 @@ asmlinkage void schedule(void) c = -1000; next = &init_task; while (p != &init_task) { +#ifdef CONFIG_SMP + /* We are not permitted to run a task someone else is running */ + if (p->processor != NO_PROC_ID) { + p = p->next_run; + continue; + } +#endif if (p->counter > c) c = p->counter, next = p; p = p->next_run; @@ -230,6 +251,20 @@ asmlinkage void schedule(void) for_each_task(p) p->counter = (p->counter >> 1) + p->priority; } +#ifdef CONFIG_SMP + + /* + * Context switching between two idle threads is pointless. + */ + if(!current->pid && !next->pid) + next=current; + /* + * Allocate process to CPU + */ + + next->processor = smp_processor_id(); + +#endif if (current != next) { struct timer_list timer; @@ -446,6 +481,9 @@ static unsigned long count_active_tasks(void) (*p)->state == TASK_UNINTERRUPTIBLE || (*p)->state == TASK_SWAPPING)) nr += FIXED_1; +#ifdef CONFIG_SMP + nr-=(smp_num_cpus-1)*FIXED_1; +#endif return nr; } @@ -651,8 +689,7 @@ void do_timer(struct pt_regs * regs) } else xtime.tv_usec += tick + time_adjust_step; - if (time_adjust) - { + if (time_adjust) { /* We are doing an adjtime thing. * * Modify the value of the tick for next time. @@ -685,7 +722,7 @@ void do_timer(struct pt_regs * regs) calc_load(); if (user_mode(regs)) { current->utime++; - if (current != task[0]) { + if (current->pid) { if (current->priority < 15) kstat.cpu_nice++; else @@ -698,9 +735,9 @@ void do_timer(struct pt_regs * regs) } } else { current->stime++; - if(current != task[0]) + if(current->pid) kstat.cpu_system++; - if (prof_buffer && current != task[0]) { + if (prof_buffer && current->pid) { extern int _stext; unsigned long ip = instruction_pointer(regs); ip -= (unsigned long) &_stext; @@ -727,7 +764,7 @@ void do_timer(struct pt_regs * regs) send_sig(SIGXCPU, current, 1); } - if (current != task[0] && 0 > --current->counter) { + if (current->pid && 0 > --current->counter) { current->counter = 0; need_resched = 1; } @@ -875,6 +912,15 @@ void show_state(void) void sched_init(void) { + /* + * We have to do a little magic to get the first + * process right in SMP mode. + */ + int cpu=smp_processor_id(); + current_set[cpu]=&init_task; +#ifdef CONFIG_SMP + init_task.processor=cpu; +#endif bh_base[TIMER_BH].routine = timer_bh; bh_base[TQUEUE_BH].routine = tqueue_bh; bh_base[IMMEDIATE_BH].routine = immediate_bh; diff --git a/kernel/sys.c b/kernel/sys.c index 5de77041140b..9360cb7fea1a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -173,7 +173,7 @@ void ctrl_alt_del(void) if (C_A_D) hard_reset_now(); else - send_sig(SIGINT,task[1],1); + kill_proc(1, SIGINT, 1); } diff --git a/kernel/time.c b/kernel/time.c index 27dceb91f56e..0ed751f9032e 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -26,6 +26,8 @@ #include #include +#include + /* * The timezone where the local system is located. Used as a default by some * programs who obtain this value by using gettimeofday. diff --git a/net/Changes b/net/Changes index 3d5ea8a3a2cb..04858a586868 100644 --- a/net/Changes +++ b/net/Changes @@ -230,6 +230,19 @@ o Error in ip_mr ioctls fixed [Michael Chastain] [IN] o TCP cache zap bugs hopefully fixed [IN] o Length checks in udp/raw sending [Craig Metz] [IN] +-------->>>>> 1.3.31 <<<<<<------- + +o IP_OPTIONS [A.N.Kuznetsov] [IN] +o TCP cache zap more fixes [IN] +o Most of the IP multicast routing cache added [IN] +o Kernel/user communication module (not used yet) [IN] + +-------->>>>> 1.3.31 <<<<<<------- + +o IFF_ALLMULTI support for 3c501,3c509,8390 and + tulip(SMC etherpower) boards [IN] + + ---------- Things I thought Linus had for a while and not merged ---------------- o Paul Gortmakers 8390 Copy and checksum [Pending] @@ -240,19 +253,15 @@ o Paul Gortmakers 8390 Copy and checksum [Pending] ---------- Things pending for me to merge -------------- o IPFW support for TOS changing (Al Longyear) -o /dev/skip /dev/ipah etc - Kernel/Usermode communications module (me) o AF_UNIX garbage collect code o Faster closedown option for heavy use sites (me) o Tom May's insw_and_checksum() - - ---------------- Tbings That Need Doing Before 1.4 ------------------ +--------------- Things That Need Doing Before 1.4 ------------------ o inet_error for other layers o Finish merging the bridge code o SIOCSLEEPRT patch -o Options support in ip_build_xmit [PENDING] o Fast checksum/copy on outgoing TCP o Fast dev_grab_next() transmit reload function and dev_push_failed() ?? @@ -261,10 +270,8 @@ o L2 ip routing cache [PENDING(btv)] o Forwarding queue control (+ fairness algorithms ??) o IP forward flow control. o Infinite PPP devices. -o AX.25 set protocol type o Clean up RAW AX.25 sockets. o Finish 802.2 Class I code to be compliant to the oddities of 802.2 -o Full variable length AX.25 support [JSN doing] o Tidy BPQ support to use a bpqip tunnel device o Strange eth0-eth3 bug o Finish IPIP bug fixes [Done hopefully] @@ -276,9 +283,7 @@ o Throw out existing firewall ioctl()'s and use a single table load. 0.2 --- -o New icmp.c. [IN] o Better TCP window handling [Pedro Roque] -o IP option support. o Add tty support to sonix driver. o PPP for Sonix ISDN. o Loadable firewall extensions. diff --git a/net/Makefile b/net/Makefile index fc4dc4080d47..1d02cafccada 100644 --- a/net/Makefile +++ b/net/Makefile @@ -11,6 +11,6 @@ MOD_SUB_DIRS := ipv4 ALL_SUB_DIRS := 802 ax25 core ethernet ipv4 ipx unix appletalk netrom SUB_DIRS := $(ALL_SUB_DIRS) L_TARGET := network.a -L_OBJS := socket.o protocols.o $(join $(SUB_DIRS),$(SUB_DIRS:%=/%.o)) +L_OBJS := socket.o protocols.o netlink.o $(join $(SUB_DIRS),$(SUB_DIRS:%=/%.o)) include $(TOPDIR)/Rules.make diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 6e0216363e78..20ceb9eecba5 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -17,8 +17,10 @@ M_OBJS := ifeq ($(CONFIG_INET_RARP),y) IPV4_OBJS += rarp.o -elifeq ($(CONFIG_INET_RARP),m) -M_OBJS += rarp.o +else + ifeq ($(CONFIG_INET_RARP),m) + M_OBJS += rarp.o + endif endif ifeq ($(CONFIG_NET_IPIP),y) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 80272aeb0a4c..d5af3f7d261f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -196,31 +196,29 @@ static void icmp_out_count(int type) static void icmp_glue_bits(const void *p, __u32 saddr, char *to, unsigned int offset, unsigned int fraglen) { - struct icmp_bxm *icmp_param=(struct icmp_bxm *)p; + struct icmp_bxm *icmp_param = (struct icmp_bxm *)p; struct icmphdr *icmph; - if(offset) + unsigned long csum; + + if (offset) { icmp_param->csum=csum_partial_copy(icmp_param->data_ptr+offset-sizeof(struct icmphdr), to, fraglen,icmp_param->csum); - else - { -#ifdef CSUM_FOLD_WORKS - /* - * Need this fixed to make multifragment ICMP's work again. - */ - icmp_param->csum=csum_partial_copy((void *)&icmp_param->icmph, to, sizeof(struct icmphdr), - icmp_param->csum); - icmp_param->csum=csum_partial_copy(icmp_param->data_ptr, to+sizeof(struct icmphdr), - fraglen-sizeof(struct icmphdr), icmp_param->csum); - icmph=(struct icmphdr *)to; - icmph->checksum = csum_fold(icmp_param->csum); -#else - memcpy(to, &icmp_param->icmph, sizeof(struct icmphdr)); - memcpy(to+sizeof(struct icmphdr), icmp_param->data_ptr, fraglen-sizeof(struct icmphdr)); - icmph=(struct icmphdr *)to; - icmph->checksum=ip_compute_csum(to, fraglen); -#endif - + return; } + + /* + * First fragment includes header. Note that we've done + * the other fragments first, so that we get the checksum + * for the whole packet here. + */ + csum = csum_partial_copy((void *)&icmp_param->icmph, + to, sizeof(struct icmphdr), + icmp_param->csum); + csum = csum_partial_copy(icmp_param->data_ptr, + to+sizeof(struct icmphdr), + fraglen-sizeof(struct icmphdr), csum); + icmph=(struct icmphdr *)to; + icmph->checksum = csum_fold(csum); } /* @@ -231,6 +229,7 @@ static void icmp_build_xmit(struct icmp_bxm *icmp_param, __u32 saddr, __u32 dadd { struct sock *sk=icmp_socket.data; icmp_param->icmph.checksum=0; + icmp_param->csum=0; icmp_out_count(icmp_param->icmph.type); ip_build_xmit(sk, icmp_glue_bits, icmp_param, icmp_param->data_len+sizeof(struct icmphdr), @@ -698,10 +697,12 @@ static struct icmp_control icmp_pointers[19] = { void icmp_init(struct proto_ops *ops) { struct sock *sk; + int err; icmp_socket.type=SOCK_RAW; icmp_socket.ops=ops; - if(ops->create(&icmp_socket, IPPROTO_ICMP)<0) - panic("Failed to create the ICMP control socket.\n"); + if((err=ops->create(&icmp_socket, IPPROTO_ICMP))<0) + panic("Failed to create the ICMP control socket (%d,%d,%p,%p).\n", -err, + current->euid, current, &init_task); sk=icmp_socket.data; sk->allocation=GFP_ATOMIC; sk->num = 256; /* Don't receive any data */ diff --git a/net/ipv4/ip.c b/net/ipv4/ip.c index 2921e31c9279..e0d196358de7 100644 --- a/net/ipv4/ip.c +++ b/net/ipv4/ip.c @@ -93,6 +93,8 @@ * Werner Almesberger : Zero fragment bug * Alan Cox : RAW IP frame length bug * Alan Cox : Outgoing firewall on build_xmit + * A.N.Kuznetsov : IP_OPTIONS support throughout the kernel + * Alan Cox : Multicast routing hooks * * * @@ -1466,6 +1468,50 @@ static void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev #ifdef CONFIG_IP_FORWARD +#ifdef CONFIG_IP_MROUTE + +/* + * Encapsulate a packet by attaching a valid IPIP header to it. + * This avoids tunnel drivers and other mess and gives us the speed so + * important for multicast video. + */ + +static void ip_encap(struct sk_buff *skb, int len, struct device *out, __u32 daddr) +{ + /* + * There is space for the IPIP header and MAC left. + * + * Firstly push down and install the IPIP header. + */ + struct iphdr *iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr)); + if(len>65515) + len=65515; + iph->version = 4; + iph->tos = skb->ip_hdr->tos; + iph->ttl = skb->ip_hdr->ttl; + iph->frag_off = 0; + iph->daddr = daddr; + iph->saddr = out->pa_addr; + iph->protocol = IPPROTO_IPIP; + iph->ihl = 5; + iph->tot_len = htons(skb->len); + iph->id = htons(ip_id_count++); + ip_send_check(iph); + + skb->dev = out; + skb->arp = 1; + skb->raddr=daddr; + /* + * Now add the physical header (driver will push it down). + */ + if (out->hard_header && out->hard_header(skb, out, ETH_P_IP, NULL, NULL, len)<0) + skb->arp=0; + /* + * Read to queue for transmission. + */ +} + +#endif /* * Forward an IP datagram to its next destination. @@ -1485,7 +1531,8 @@ int ip_forward(struct sk_buff *skb, struct device *dev, int is_frag, int fw_res = 0; /* Forwarding result */ #ifdef CONFIG_IP_MASQUERADE struct sk_buff *skb_in = skb; /* So we can remember if the masquerader did some swaps */ -#endif +#endif + int encap = 0; /* Encap length */ /* * See if we are allowed to forward this. @@ -1542,83 +1589,86 @@ int ip_forward(struct sk_buff *skb, struct device *dev, int is_frag, return -1; } - /* - * OK, the packet is still valid. Fetch its destination address, - * and give it to the IP sender for further processing. - */ - - rt = ip_rt_route(target_addr, NULL, NULL); - if (rt == NULL) - { - /* - * Tell the sender its packet cannot be delivered. Again - * ICMP is screened later. - */ - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0, dev); - return -1; - } - - - /* - * Gosh. Not only is the packet valid; we even know how to - * forward it onto its final destination. Can we say this - * is being plain lucky? - * If the router told us that there is no GW, use the dest. - * IP address itself- we seem to be connected directly... - */ - - raddr = rt->rt_gateway; - - if (raddr != 0) +#ifdef CONFIG_IP_MROUTE + if(!(is_frag&8)) { +#endif /* - * Strict routing permits no gatewaying + * OK, the packet is still valid. Fetch its destination address, + * and give it to the IP sender for further processing. */ - if (opt->is_strictroute) + rt = ip_rt_route(target_addr, NULL, NULL); + if (rt == NULL) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0, dev); + /* + * Tell the sender its packet cannot be delivered. Again + * ICMP is screened later. + */ + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0, dev); return -1; } + /* - * There is a gateway so find the correct route for it. - * Gateways cannot in turn be gatewayed. + * Gosh. Not only is the packet valid; we even know how to + * forward it onto its final destination. Can we say this + * is being plain lucky? + * If the router told us that there is no GW, use the dest. + * IP address itself- we seem to be connected directly... */ -#if 0 - rt = ip_rt_route(raddr, NULL, NULL); - if (rt == NULL) + raddr = rt->rt_gateway; + + if (raddr != 0) { /* - * Tell the sender its packet cannot be delivered... + * Strict routing permits no gatewaying + */ + + if (opt->is_strictroute) + { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0, dev); + return -1; + } + + /* + * There is a gateway so find the correct route for it. + * Gateways cannot in turn be gatewayed. */ - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev); - return -1; } - if (rt->rt_gateway != 0) - raddr = rt->rt_gateway; + else + raddr = target_addr; + + /* + * Having picked a route we can now send the frame out. + */ + + dev2 = rt->rt_dev; + /* + * In IP you never have to forward a frame on the interface that it + * arrived upon. We now generate an ICMP HOST REDIRECT giving the route + * we calculated. + */ +#ifndef CONFIG_IP_NO_ICMP_REDIRECT + if (dev == dev2 && !((iph->saddr^iph->daddr)&dev->pa_mask) && + (rt->rt_flags&RTF_MODIFIED) && !opt->srr) + icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, raddr, dev); #endif +#ifdef CONFIG_IP_MROUTE } else - raddr = target_addr; - - /* - * Having picked a route we can now send the frame out. - */ - - dev2 = rt->rt_dev; + { + /* + * Multicast route forward. Routing is already done + */ + dev2=skb->dev; + raddr=skb->raddr; + if(is_frag&16) /* VIFF_TUNNEL mode */ + encap=20; + } +#endif - /* - * In IP you never have to forward a frame on the interface that it - * arrived upon. We now generate an ICMP HOST REDIRECT giving the route - * we calculated. - */ -#ifndef CONFIG_IP_NO_ICMP_REDIRECT - if (dev == dev2 && !((iph->saddr^iph->daddr)&dev->pa_mask) && - (rt->rt_flags&RTF_MODIFIED) && !opt->srr) - icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, raddr, dev); -#endif /* * We now may allocate a new buffer, and copy the datagram into it. @@ -1637,17 +1687,21 @@ int ip_forward(struct sk_buff *skb, struct device *dev, int is_frag, #endif IS_SKB(skb); - if (skb->len > dev2->mtu && (ntohs(iph->frag_off) & IP_DF)) { + if (skb->len+encap > dev2->mtu && (ntohs(iph->frag_off) & IP_DF)) { ip_statistics.IpFragFails++; icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev2->mtu, dev); return -1; } +#ifdef CONFIG_IP_MROUTE + if(skb_headroom(skb)-encaphard_header_len) + { + skb2 = alloc_skb(dev2->hard_header_len + skb->len + encap + 15, GFP_ATOMIC); +#else if(skb_headroom(skb)hard_header_len) { skb2 = alloc_skb(dev2->hard_header_len + skb->len + 15, GFP_ATOMIC); - IS_SKB(skb2); - +#endif /* * This is rare and since IP is tolerant of network failures * quite harmless. @@ -1659,11 +1713,19 @@ int ip_forward(struct sk_buff *skb, struct device *dev, int is_frag, return -1; } + IS_SKB(skb2); /* * Add the physical headers. */ - - ip_send(skb2,raddr,skb->len,dev2,dev2->pa_addr); +#ifdef CONFIG_IP_MROUTE + if(is_frag&16) + { + skb_reserve(skb,(encap+dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */ + ip_encap(skb2,skb->len, dev2, raddr); + } + else +#endif + ip_send(skb2,raddr,skb->len,dev2,dev2->pa_addr); /* * We have to copy the bytes over as the new header wouldn't fit @@ -1689,13 +1751,22 @@ int ip_forward(struct sk_buff *skb, struct device *dev, int is_frag, skb2 = skb; skb2->dev=dev2; - skb->arp=1; - skb->raddr=raddr; - if(dev2->hard_header) +#ifdef CONFIG_IP_MROUTE + if(is_frag&16) + ip_encap(skb,skb->len, dev2, raddr); + else { - if(dev2->hard_header(skb, dev2, ETH_P_IP, NULL, NULL, skb->len)<0) - skb->arp=0; - } +#endif + skb->arp=1; + skb->raddr=raddr; + if(dev2->hard_header) + { + if(dev2->hard_header(skb, dev2, ETH_P_IP, NULL, NULL, skb->len)<0) + skb->arp=0; + } +#ifdef CONFIG_IP_MROUTE + } +#endif ip_statistics.IpForwDatagrams++; } @@ -1825,6 +1896,9 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) #ifdef CONFIG_IP_FIREWALL int err; #endif +#ifdef CONFIG_IP_MROUTE + int mroute_pkt=0; +#endif #ifdef CONFIG_NET_IPV6 /* @@ -1944,9 +2018,11 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) for ( srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4 - ) { + ) + { int brd2; - if (srrptr + 3 > srrspace) { + if (srrptr + 3 > srrspace) + { icmp_send(skb, ICMP_PARAMETERPROB, 0, opt->srr+2, skb->dev); kfree_skb(skb, FREE_WRITE); @@ -1954,21 +2030,26 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) } memcpy(&nexthop, &optptr[srrptr-1], 4); if ((brd2 = ip_chk_addr(nexthop)) == 0) - break; - if (brd2 != IS_MYADDR) { -/* ANK: should we implement weak tunneling of multicasts? - * Are they obsolete? DVMRP specs (RFC-1075) is old enough... - */ + break; + if (brd2 != IS_MYADDR) + { + + /* + * ANK: should we implement weak tunneling of multicasts? + * Are they obsolete? DVMRP specs (RFC-1075) is old enough... + * [They are obsolete] + */ kfree_skb(skb, FREE_WRITE); return -EINVAL; } } - if (srrptr <= srrspace) { + if (srrptr <= srrspace) + { opt->srr_is_hit = 1; opt->is_changed = 1; #ifdef CONFIG_IP_FORWARD if (ip_forward(skb, dev, is_frag, nexthop)) - kfree_skb(skb, FREE_WRITE); + kfree_skb(skb, FREE_WRITE); #else ip_statistics.IpInAddrErrors++; kfree_skb(skb, FREE_WRITE); @@ -2041,6 +2122,15 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) skb->ip_hdr = iph; skb->h.raw += iph->ihl*4; +#ifdef CONFIG_IP_MROUTE + /* + * Check the state on multicast routing (multicast and not 224.0.0.z) + */ + + if(brd==IS_MULTICAST && (iph->daddr&htonl(0xFFFFFF00))!=htonl(0xE0000000)) + mroute_pkt=1; + +#endif /* * Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies. * @@ -2102,7 +2192,11 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) * raw delivery wait for that */ +#ifdef CONFIG_IP_MROUTE + if (ipprot->copy || raw_sk || mroute_pkt) +#else if (ipprot->copy || raw_sk) +#endif { skb2 = skb_clone(skb, GFP_ATOMIC); if(skb2==NULL) @@ -2132,6 +2226,30 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) * ICMP reply messages get queued up for transmission...) */ +#ifdef CONFIG_IP_MROUTE + /* + * Forward the last copy to the multicast router. If + * there is a pending raw deliery however make a copy + * and forward that. + */ + + if(mroute_pkt) + { + flag=1; + if(raw_sk==NULL) + ipmr_forward(skb, is_frag); + else + { + struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC); + if(skb2) + { + skb2->free=1; + ipmr_forward(skb2, is_frag); + } + } + } +#endif + if(raw_sk!=NULL) /* Shift to last raw user */ raw_rcv(raw_sk, skb, dev, iph->saddr, iph->daddr); else if (!flag) /* Free and report errors */ @@ -2145,7 +2263,7 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) } /* - * Do any IP forwarding required. + * Do any unicast IP forwarding required. */ /* @@ -2266,14 +2384,7 @@ void ip_queue_xmit(struct sock *sk, struct device *dev, * header length problem */ -#if 0 - ptr = skb->data; - ptr += dev->hard_header_len; - iph = (struct iphdr *)ptr; - skb->ip_hdr = iph; -#else iph = skb->ip_hdr; -#endif iph->tot_len = ntohs(skb->len-(((unsigned char *)iph)-skb->data)); #ifdef CONFIG_IP_FIREWALL diff --git a/net/ipv4/ip_fw.c b/net/ipv4/ip_fw.c index c1763ae60483..da5aa4aabdde 100644 --- a/net/ipv4/ip_fw.c +++ b/net/ipv4/ip_fw.c @@ -40,6 +40,7 @@ * Alan Cox : Fixed an error in the merge. * Thomas Quinot : Fixed port spoofing. * Alan Cox : Cleaned up retransmits in spoofing. + * Alan Cox : Cleaned up length setting. * * All the real work was done by ..... * @@ -266,7 +267,7 @@ int ip_fw_chk(struct iphdr *ip, struct device *rif, struct ip_fw *chain, int pol case IPPROTO_TCP: dprintf1("TCP "); /* ports stay 0 if it is not the first fragment */ - if (offset!=0) { + if (!offset) { src_port=ntohs(tcp->source); dst_port=ntohs(tcp->dest); if(tcp->ack) @@ -281,7 +282,7 @@ int ip_fw_chk(struct iphdr *ip, struct device *rif, struct ip_fw *chain, int pol case IPPROTO_UDP: dprintf1("UDP "); /* ports stay 0 if it is not the first fragment */ - if (offset!=0) { + if (!offset) { src_port=ntohs(udp->source); dst_port=ntohs(udp->dest); } @@ -700,6 +701,12 @@ static struct sk_buff *revamp(struct sk_buff *skb, struct device *dev, struct ip /* skb2->h.raw = &skb2->data[skb->h.raw - skb->data];*/ skb2->h.raw = skb2->data + (skb->h.raw - skb->data); iph=skb2->h.iph; + /* + * Mend the IP header too + */ + iph->tot_len = htons(diff+ntohs(iph->tot_len)); + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); /* * Copy the packet data into the new buffer. diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2b5f125f72a4..24fe9b96bce9 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -12,10 +12,16 @@ * * Fixes: * Michael Chastain : Incorrect size of copying. - * + * Alan Cox : Added the cache manager code * * Status: - * Tree building works. Cache manager to be added next. + * Cache manager under test. Forwarding in vague test mode + * Todo: + * Flow control + * Tunnels + * Wipe cache on mrouted exit + * Debug cache ttl handling properly + * Resolve IFF_ALLMULTI for most cards */ #include @@ -27,10 +33,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -47,9 +55,12 @@ * Multicast router conrol variables */ -static struct vif_device vif_table[MAXVIFS]; -static unsigned long vifc_map; -int mroute_do_pim = 0; +static struct vif_device vif_table[MAXVIFS]; /* Devices */ +static unsigned long vifc_map; /* Active device map */ +int mroute_do_pim = 0; /* Set in PIM assert */ +static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */ +static struct mfc_cache *cache_resolve_queue; /* Unresolved cache */ +int cache_resolve_queue_len = 0; /* Size of unresolved */ /* * Delete a VIF entry @@ -64,6 +75,342 @@ static void vif_delete(struct vif_device *v) } v->dev=NULL; } + +/* + * Find a vif + */ + +static int ipmr_vifi_find(struct device *dev) +{ + struct vif_device *v=&vif_table[0]; + int ct; + for(ct=0;ctdev==dev) + return ct; + } + return -1; +} + +/* + * Delete a multicast route cache entry + */ + +static void ipmr_cache_delete(struct mfc_cache *cache) +{ + struct sk_buff *skb; + int line; + struct mfc_cache **cp; + + /* + * Find the right cache line + */ + + if(cache->mfc_flags&MFC_QUEUED) + { + cp=&cache_resolve_queue; + del_timer(&cache->mfc_timer); + } + else + { + line=MFC_HASH(cache->mfc_mcastgrp,cache->mfc_origin); + cp=&(mfc_cache_array[line]); + } + + /* + * Unlink the buffer + */ + + while(*cp!=NULL) + { + if(*cp==cache) + { + *cp=cache->next; + break; + } + cp=&((*cp)->next); + } + + /* + * Free the buffer. If it is a pending resolution + * clean up the other resources. + */ + + if(cache->mfc_flags&MFC_QUEUED) + { + cache_resolve_queue_len--; + while((skb=skb_dequeue(&cache->mfc_unresolved))) + kfree_skb(skb, FREE_WRITE); + } + kfree_s(cache,sizeof(cache)); +} + +/* + * Cache expiry timer + */ + +static void ipmr_cache_timer(unsigned long data) +{ + struct mfc_cache *cache=(struct mfc_cache *)data; + ipmr_cache_delete(cache); +} + +/* + * Insert a multicast cache entry + */ + +static void ipmr_cache_insert(struct mfc_cache *c) +{ + int line=MFC_HASH(c->mfc_mcastgrp,c->mfc_origin); + c->next=mfc_cache_array[line]; + mfc_cache_array[line]=c; +} + +/* + * Find a multicast cache entry + */ + +struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp) +{ + int line=MFC_HASH(mcastgrp,origin); + struct mfc_cache *cache; + cache=mfc_cache_array[line]; + while(cache!=NULL) + { + if(cache->mfc_origin==origin && cache->mfc_mcastgrp==mcastgrp) + return cache; + cache=cache->next; + } + cache=cache_resolve_queue; + while(cache!=NULL) + { + if(cache->mfc_origin==origin && cache->mfc_mcastgrp==mcastgrp) + return cache; + cache=cache->next; + } + return NULL; +} + +/* + * Allocate a multicast cache entry + */ + +static struct mfc_cache *ipmr_cache_alloc(int priority) +{ + struct mfc_cache *c=(struct mfc_cache *)kmalloc(sizeof(struct mfc_cache), priority); + if(c==NULL) + return NULL; + c->mfc_queuelen=0; + skb_queue_head_init(&c->mfc_unresolved); + init_timer(&c->mfc_timer); + c->mfc_timer.data=(long)c; + c->mfc_timer.function=ipmr_cache_timer; + return c; +} + +/* + * A cache entry has gone into a resolved state from queued + */ + +static void ipmr_cache_resolve(struct mfc_cache *cache) +{ + struct mfc_cache **p; + struct sk_buff *skb; + /* + * Kill the queue entry timer. + */ + del_timer(&cache->mfc_timer); + cache->mfc_flags&=~MFC_QUEUED; + /* + * Remove from the resolve queue + */ + p=&cache_resolve_queue; + while((*p)!=NULL) + { + if((*p)==cache) + { + *p=cache->next; + break; + } + p=&((*p)->next); + } + cache_resolve_queue_len--; + sti(); + /* + * Insert into the main cache + */ + ipmr_cache_insert(cache); + /* + * Play the pending entries through our router + */ + while((skb=skb_dequeue(&cache->mfc_unresolved))) + ipmr_forward(skb, skb->protocol); +} + +/* + * Bounce a cache query up to mrouted. We could use netlink for this but mrouted + * expects the following bizarre scheme.. + */ + +static void ipmr_cache_report(struct sk_buff *pkt) +{ + struct sk_buff *skb=alloc_skb(128, GFP_ATOMIC); + int ihl=pkt->ip_hdr->ihl<<2; + struct igmphdr *igmp; + if(!skb) + return; + + skb->free=1; + + /* + * Copy the IP header + */ + + skb->ip_hdr=(struct iphdr *)skb_put(skb,ihl); + skb->h.iph=skb->ip_hdr; + memcpy(skb->data,pkt->data,ihl); + skb->ip_hdr->protocol = 0; /* Flag to the kernel this is a route add */ + + /* + * Add our header + */ + + igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr)); + igmp->type = IGMPMSG_NOCACHE; /* non IGMP dummy message */ + igmp->code = 0; + skb->ip_hdr->tot_len=htons(skb->len); /* Fix the length */ + + /* + * Deliver to mrouted + */ + if(sock_queue_rcv_skb(mroute_socket,skb)<0) + { + skb->sk=NULL; + kfree_skb(skb, FREE_READ); + } +} + + +/* + * Queue a packet for resolution + */ + +static void ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct sk_buff *skb, int is_frag) +{ + if(cache==NULL) + { + /* + * Create a new entry if allowable + */ + if(cache_resolve_queue_len>=10 || (cache=ipmr_cache_alloc(GFP_ATOMIC))==NULL) + { + kfree_skb(skb, FREE_WRITE); + return; + } + /* + * Fill in the new cache entry + */ + cache->mfc_parent=vifi; + cache->mfc_origin=skb->ip_hdr->saddr; + cache->mfc_mcastgrp=skb->ip_hdr->daddr; + cache->mfc_flags=MFC_QUEUED; + /* + * Link to the unresolved list + */ + cache->next=cache_resolve_queue; + cache_resolve_queue=cache; + cache_resolve_queue_len++; + /* + * Fire off the expiry timer + */ + cache->mfc_timer.expires=jiffies+10*HZ; + add_timer(&cache->mfc_timer); + /* + * Reflect first query at mrouted. + */ + if(mroute_socket) + ipmr_cache_report(skb); + } + /* + * See if we can append the packet + */ + if(cache->mfc_queuelen>3) + { + kfree_skb(skb, FREE_WRITE); + return; + } + /* + * Add to our 'pending' list. Cache the is_frag data + * in skb->protocol now it is spare. + */ + cache->mfc_queuelen++; + skb->protocol=is_frag; + skb_queue_tail(&cache->mfc_unresolved,skb); +} + +/* + * MFC cache manipulation by user space mroute daemon + */ + +int ipmr_mfc_modify(int action, struct mfcctl *mfc) +{ + struct mfc_cache *cache; + if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr)) + return -EINVAL; + /* + * Find the cache line + */ + + cli(); + cache=ipmr_cache_find(mfc->mfcc_origin.s_addr,mfc->mfcc_mcastgrp.s_addr); + + /* + * Delete an entry + */ + if(action==MRT_DEL_MFC) + { + if(cache) + { + ipmr_cache_delete(cache); + sti(); + return 0; + } + return -ENOENT; + } + if(cache) + { + /* + * Update the cache, see if it frees a pending queue + */ + + cache->mfc_flags|=MFC_RESOLVED; + memcpy(cache->mfc_ttls, mfc->mfcc_ttls,sizeof(cache->mfc_ttls)); + + /* + * Check to see if we resolved a queued list. If so we + * need to send on the frames and tidy up. + */ + + if(cache->mfc_flags&MFC_QUEUED) + ipmr_cache_resolve(cache); /* Unhook & send the frames */ + sti(); + return 0; + } + /* + * Unsolicited update - thats ok add anyway. + */ + sti(); + cache=ipmr_cache_alloc(GFP_KERNEL); + if(cache==NULL) + return -ENOMEM; + cache->mfc_flags=MFC_RESOLVED; + cache->mfc_origin=mfc->mfcc_origin.s_addr; + cache->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr; + cache->mfc_parent=mfc->mfcc_parent; + memcpy(cache->mfc_ttls, mfc->mfcc_ttls,sizeof(cache->mfc_ttls)); + ipmr_cache_insert(cache); + return 0; +} /* * Socket options and virtual interface manipulation. The whole @@ -76,6 +423,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen) { int err; struct vifctl vif; + struct mfcctl mfc; if(optname!=MRT_INIT) { @@ -184,7 +532,11 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen) */ case MRT_ADD_MFC: case MRT_DEL_MFC: - return -EOPNOTSUPP; + err=verify_area(VERIFY_READ, optval, sizeof(mfc)); + if(err) + return err; + memcpy_fromfs(&mfc,optval, sizeof(mfc)); + return ipmr_mfc_modify(optname, &mfc); /* * Control PIM assert. */ @@ -301,6 +653,15 @@ void mroute_close(struct sock *sk) v++; } vifc_map=0; + /* + * Wipe the cache + */ + for(i=0;iraddr; + if(vif->flags&VIFF_TUNNEL) + { + tunnel=16; + raddr=vif->remote; + } + vif->pkt_out++; + vif->bytes_out+=skb->len; + skb->dev=vif->dev; + skb->raddr=skb->h.iph->daddr; + if(ip_forward(skb, in_dev, frag|8|tunnel, raddr)==-1) + kfree_skb(skb, FREE_WRITE); +} + +/* + * Multicast packets for forwarding arrive here + */ + +void ipmr_forward(struct sk_buff *skb, int is_frag) +{ + struct mfc_cache *cache; + struct sk_buff *skb2; + int psend = -1; + int vif=ipmr_vifi_find(skb->dev); + if(vif==-1) + { + kfree_skb(skb, FREE_WRITE); + return; + } + + vif_table[vif].pkt_in++; + vif_table[vif].bytes_in+=skb->len; + cache=ipmr_cache_find(skb->ip_hdr->saddr,skb->ip_hdr->daddr); + + /* + * No usable cache entry + */ + + if(cache==NULL || (cache->mfc_flags&MFC_QUEUED)) + ipmr_cache_unresolved(cache,vif,skb, is_frag); + else + { + /* + * Forward the frame + */ + int ct=0; + while(ctip_hdr->ttl > cache->mfc_ttls[ct] && cache->mfc_ttls[ct]>0) + { + if(psend!=-1) + { + skb2=skb_clone(skb, GFP_ATOMIC); + if(skb2) + { + skb2->free=1; + ipmr_queue_xmit(skb2, &vif_table[psend], skb->dev, is_frag); + } + } + psend=ct; + } + ct++; + } + if(psend==-1) + kfree_skb(skb, FREE_WRITE); + else + { + ipmr_queue_xmit(skb, &vif_table[psend], skb->dev, is_frag); + } + /* + * Adjust the stats + */ + } +} +/* + * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif + */ + +int ipmr_vif_info(char *buffer, char **start, off_t offset, int length, int dummy) +{ + struct vif_device *vif; + int len=0; + off_t pos=0; + off_t begin=0; + int size; + int ct; + + len += sprintf(buffer, + "Interface Bytes In Pkts In Bytes Out Pkts Out Flags Local Remote\n"); + pos=len; + + for (ct=0;ctdev==NULL) + continue; + size = sprintf(buffer+len, "%-10s %8ld %7ld %8ld %7ld %05X %08lX %08lX\n", + vif->dev->name,vif->bytes_in, vif->pkt_in, vif->bytes_out,vif->pkt_out, + vif->flags, vif->local, vif->remote); + len+=size; + pos+=size; + if(posoffset+length) + break; + } + + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} + +int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length, int dummy) +{ + struct mfc_cache *mfc; + int len=0; + off_t pos=0; + off_t begin=0; + int size; + int ct; + + len += sprintf(buffer, + "Group Origin SrcIface \n"); + pos=len; + + for (ct=0;ctmfc_parent)) + name=vif_table[mfc->mfc_parent].dev->name; + /* + * Interface forwarding map + */ + for(n=0;nmfc_ttls[ct]) + vifmap[n]='X'; + else + vifmap[n]='-'; + vifmap[n]=0; + /* + * Now print it out + */ + size = sprintf(buffer+len, "%08lX %08lX %-8s %s\n", + (unsigned long)mfc->mfc_mcastgrp, + (unsigned long)mfc->mfc_origin, + name, + vifmap); + len+=size; + pos+=size; + if(posoffset+length) + { + sti(); + goto done; + } + mfc=mfc->next; + } + sti(); + } +done: + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} + +/* + * Setup for IP multicast routing + */ + void ip_mr_init(void) { - printk("Linux IP multicast router 0.00pre-working 8)\n"); + printk("Linux IP multicast router 0.02pre-working 8)\n"); register_netdevice_notifier(&ip_mr_notifier); + proc_net_register(&(struct proc_dir_entry) { + PROC_NET_IPMR_VIF, 9 ,"ip_mr_vif", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_net_inode_operations, + ipmr_vif_info + }); + proc_net_register(&(struct proc_dir_entry) { + PROC_NET_IPMR_MFC, 11 ,"ip_mr_cache", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_net_inode_operations, + ipmr_mfc_info + }); } #endif diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 094ab8debb06..843f99f92bbe 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -509,6 +509,8 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) sk->state=state; if(state==TCP_ESTABLISHED) tcp_statistics.TcpCurrEstab++; + if(sk->state==TCP_CLOSE) + tcp_cache_zap(); } /* diff --git a/net/netlink.c b/net/netlink.c new file mode 100644 index 000000000000..b61c5b1f96a8 --- /dev/null +++ b/net/netlink.c @@ -0,0 +1,250 @@ +/* + * SKIPLINK An implementation of a loadable kernel mode driver providing + * multiple kernel/user space bidirectional communications links. + * + * Author: Alan Cox + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include + +#if defined(CONFIG_NETLINK) || defined(MODULE) +#ifdef MODULE +#include +#include +#else +#define MOD_INC_USE_COUNT +#define MOD_DEC_USE_COUNT +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +static int (*netlink_handler[MAX_LINKS])(struct sk_buff *skb); +static struct sk_buff_head skb_queue_rd[MAX_LINKS]; +static int rdq_size[MAX_LINKS]; +static struct wait_queue *read_space_wait[MAX_LINKS]; + +static int active_map = 0; +static int open_map = 0; + +/* + * Device operations + */ + +/* + * Default write handler. + */ + +static int netlink_err(struct sk_buff *skb) +{ + kfree_skb(skb, FREE_READ); + return -EUNATCH; +} + +/* + * Write a message to the kernel side of a communication link + */ + +static int netlink_write(struct inode * inode, struct file * file, const char * buf, int count) +{ + unsigned int minor = MINOR(inode->i_rdev); + struct sk_buff *skb; + skb=alloc_skb(count, GFP_KERNEL); + memcpy_fromfs(skb_put(skb,count),buf, count); + return (netlink_handler[minor])(skb); +} + +/* + * Read a message from the kernel side of the communication link + */ + +static int netlink_read(struct inode * inode, struct file * file, char * buf, int count) +{ + unsigned int minor = MINOR(inode->i_rdev); + struct sk_buff *skb; + cli(); + while((skb=skb_dequeue(&skb_queue_rd[minor]))==NULL) + { + if(file->f_flags&O_NONBLOCK) + { + sti(); + return -EWOULDBLOCK; + } + interruptible_sleep_on(&read_space_wait[minor]); + if(current->signal & ~current->blocked) + { + sti(); + return -ERESTARTSYS; + } + } + rdq_size[minor]-=skb->len; + sti(); + if(skb->lenlen; + memcpy_tofs(buf,skb->data,count); + kfree_skb(skb, FREE_READ); + return count; +} + +static int netlink_lseek(struct inode * inode, struct file * file, + off_t offset, int origin) +{ + return -ESPIPE; +} + +static int netlink_open(struct inode * inode, struct file * file) +{ + unsigned int minor = MINOR(inode->i_rdev); + + if(minor>=MAX_LINKS) + return -ENODEV; + if(open_map&(1<i_rdev); + open_map&=~(1<i_rdev); + int retval = 0; + + if (minor >= MAX_LINKS) + return -ENODEV; + switch ( cmd ) { + default: + retval = -EINVAL; + } + return retval; +} + + +static struct file_operations netlink_fops = { + netlink_lseek, + netlink_read, + netlink_write, + NULL, /* netlink_readdir */ + NULL, /* netlink_select */ + netlink_ioctl, + NULL, /* netlink_mmap */ + netlink_open, + netlink_release +}; + +/* + * We export these functions to other modules. They provide a + * complete set of kernel non-blocking support for message + * queueing. + */ + +int netlink_attach(int unit, int (*function)(struct sk_buff *skb)) +{ + if(unit>=MAX_LINKS) + return -ENODEV; + if(active_map&(1<len>MAX_QBYTES) + ret=-EWOULDBLOCK; + else + { + skb_queue_tail(&skb_queue_rd[unit], skb); + rdq_size[unit]+=skb->len; + ret=0; + wake_up_interruptible(&read_space_wait[MAX_LINKS]); + } + restore_flags(flags); + return ret; +} + + +#ifdef MODULE +char kernel_version[]=UTS_RELEASE; + +int init_module(void) +{ + int ct; + printk("Network Kernel/User communications module 0.01 ALPHA\n"); + if (register_chrdev(NET_MAJOR,"netlink",&netlink_fops)) { + printk("netlink: unable to get major %d\n", NET_MAJOR); + return -EIO; + } + for(ct=0;ct #include +#include + #include #include @@ -1285,13 +1287,21 @@ void sock_init(void) { int i; - printk("Swansea University Computer Society NET3.031 Snap #1 for Linux 1.3.25\n"); + printk("Swansea University Computer Society NET3.031 Snap #3 for Linux 1.3.30\n"); /* * Initialize all address (protocol) families. */ for (i = 0; i < NPROTO; ++i) pops[i] = NULL; + + /* + * The netlink device handler may be needed early. + */ + +#ifdef CONFIG_NETLINK + init_netlink(); +#endif /* * Initialize the protocols module. -- 2.39.5