From c0ccd8dcbd32639a22b318f56105c977c894d6b6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Nov 2007 15:33:05 -0500 Subject: [PATCH] Import 2.3.99pre2-4 --- CREDITS | 9 + Documentation/Changes | 8 +- Documentation/Configure.help | 231 ++- Documentation/usb/usb-serial.txt | 2 - MAINTAINERS | 2 + Makefile | 1 + arch/alpha/kernel/irq.c | 67 +- arch/alpha/kernel/irq_alpha.c | 17 +- arch/alpha/kernel/setup.c | 25 +- arch/alpha/kernel/smp.c | 12 +- arch/alpha/kernel/sys_dp264.c | 2 - arch/i386/defconfig | 2 +- arch/i386/kernel/acpi.c | 30 +- arch/i386/kernel/head.S | 10 +- arch/i386/kernel/setup.c | 2 +- arch/mips/kernel/irixelf.c | 21 +- arch/sparc/config.in | 61 +- arch/sparc/defconfig | 80 +- arch/sparc64/kernel/binfmt_aout32.c | 28 +- arch/sparc64/kernel/sparc64_ksyms.c | 4 +- arch/sparc64/kernel/sys_sparc.c | 7 +- arch/sparc64/kernel/sys_sparc32.c | 2 +- drivers/ide/Config.in | 23 +- drivers/net/rcpci45.c | 2 +- drivers/net/setup.c | 6 +- drivers/net/wan/Makefile | 8 + drivers/net/wan/comx.c | 12 +- drivers/net/yellowfin.c | 2 +- drivers/parport/ChangeLog | 14 + drivers/parport/parport_pc.c | 64 +- drivers/pnp/quirks.c | 2 +- drivers/scsi/53c7,8xx.c | 27 +- drivers/scsi/imm.c | 2 +- drivers/usb/Config.in | 19 +- drivers/usb/Makefile | 2 +- drivers/usb/hub.c | 104 +- drivers/usb/hub.h | 13 +- drivers/usb/joydev.c | 4 +- drivers/usb/serial/Makefile | 2 +- drivers/usb/serial/usb-serial.c | 37 +- drivers/usb/serial/usb-serial.h | 1 + drivers/usb/uhci.c | 34 +- drivers/usb/usb-ohci.c | 25 +- drivers/video/aty128fb.c | 127 +- fs/binfmt_aout.c | 28 +- fs/binfmt_elf.c | 22 +- fs/exec.c | 6 +- fs/namei.c | 70 + fs/nfsd/nfs3xdr.c | 39 +- fs/nfsd/nfsfh.c | 6 +- fs/nfsd/nfsproc.c | 34 +- fs/nfsd/vfs.c | 270 +-- fs/super.c | 3 +- include/asm-alpha/core_apecs.h | 47 +- include/asm-alpha/core_cia.h | 102 +- include/asm-alpha/core_irongate.h | 63 +- include/asm-alpha/core_lca.h | 47 +- include/asm-alpha/core_mcpcia.h | 47 +- include/asm-alpha/core_polaris.h | 63 +- include/asm-alpha/core_t2.h | 37 +- include/asm-alpha/core_tsunami.h | 63 +- include/asm-alpha/delay.h | 17 +- include/asm-alpha/io.h | 34 +- include/asm-alpha/mmu_context.h | 11 +- include/asm-alpha/smp.h | 1 - include/asm-alpha/vga.h | 4 +- include/asm-i386/processor.h | 4 + include/linux/binfmts.h | 2 +- include/linux/fs.h | 1 + include/linux/icmp.h | 21 + include/linux/netfilter.h | 15 +- include/linux/netfilter_ipv4.h | 10 + .../linux/netfilter_ipv4/compat_firewall.h | 47 + include/linux/netfilter_ipv4/ip_conntrack.h | 176 ++ .../linux/netfilter_ipv4/ip_conntrack_core.h | 39 + .../linux/netfilter_ipv4/ip_conntrack_ftp.h | 41 + .../netfilter_ipv4/ip_conntrack_helper.h | 30 + .../netfilter_ipv4/ip_conntrack_protocol.h | 58 + .../linux/netfilter_ipv4/ip_conntrack_tuple.h | 105 + include/linux/netfilter_ipv4/ip_nat.h | 117 ++ include/linux/netfilter_ipv4/ip_nat_core.h | 33 + include/linux/netfilter_ipv4/ip_nat_ftp.h | 21 + include/linux/netfilter_ipv4/ip_nat_helper.h | 30 + .../linux/netfilter_ipv4/ip_nat_protocol.h | 57 + include/linux/netfilter_ipv4/ip_nat_rule.h | 35 + include/linux/netfilter_ipv4/ip_queue.h | 86 + include/linux/netfilter_ipv4/ip_tables.h | 421 ++++ include/linux/netfilter_ipv4/ipchains_core.h | 193 ++ include/linux/netfilter_ipv4/ipfwadm_core.h | 256 +++ include/linux/netfilter_ipv4/ipt_LOG.h | 15 + include/linux/netfilter_ipv4/ipt_MARK.h | 8 + include/linux/netfilter_ipv4/ipt_REJECT.h | 17 + include/linux/netfilter_ipv4/ipt_TOS.h | 12 + include/linux/netfilter_ipv4/ipt_limit.h | 21 + include/linux/netfilter_ipv4/ipt_mac.h | 8 + include/linux/netfilter_ipv4/ipt_mark.h | 9 + include/linux/netfilter_ipv4/ipt_multiport.h | 21 + include/linux/netfilter_ipv4/ipt_owner.h | 18 + include/linux/netfilter_ipv4/ipt_state.h | 12 + include/linux/netfilter_ipv4/ipt_tos.h | 13 + include/linux/netfilter_ipv4/listhelp.h | 114 ++ include/linux/netfilter_ipv4/lockhelp.h | 129 ++ include/linux/nfsd/nfsd.h | 3 + include/linux/shm.h | 4 +- include/linux/skbuff.h | 2 - include/net/tcp.h | 2 + ipc/shm.c | 102 +- kernel/ksyms.c | 4 + mm/filemap.c | 5 +- mm/page_alloc.c | 108 +- net/Config.in | 6 +- net/Makefile | 6 +- net/core/netfilter.c | 63 +- net/core/skbuff.c | 5 +- net/decnet/dn_route.c | 9 +- net/ipv4/Config.in | 5 +- net/ipv4/icmp.c | 18 +- net/ipv4/ip_gre.c | 9 + net/ipv4/ip_output.c | 5 +- net/ipv4/ipip.c | 11 +- net/ipv4/ipmr.c | 14 +- net/ipv4/netfilter/Config.in | 64 + net/ipv4/netfilter/Makefile | 234 +++ net/ipv4/netfilter/ip_conntrack_core.c | 891 +++++++++ net/ipv4/netfilter/ip_conntrack_ftp.c | 251 +++ .../netfilter/ip_conntrack_proto_generic.c | 60 + net/ipv4/netfilter/ip_conntrack_proto_icmp.c | 111 ++ net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 227 +++ net/ipv4/netfilter/ip_conntrack_proto_udp.c | 65 + net/ipv4/netfilter/ip_conntrack_standalone.c | 298 +++ net/ipv4/netfilter/ip_fw_compat.c | 239 +++ net/ipv4/netfilter/ip_fw_compat_masq.c | 288 +++ net/ipv4/netfilter/ip_fw_compat_redir.c | 283 +++ net/ipv4/netfilter/ip_nat_core.c | 855 ++++++++ net/ipv4/netfilter/ip_nat_ftp.c | 403 ++++ net/ipv4/netfilter/ip_nat_proto_icmp.c | 97 + net/ipv4/netfilter/ip_nat_proto_tcp.c | 143 ++ net/ipv4/netfilter/ip_nat_proto_udp.c | 141 ++ net/ipv4/netfilter/ip_nat_proto_unknown.c | 61 + net/ipv4/netfilter/ip_nat_rule.c | 329 +++ net/ipv4/netfilter/ip_nat_standalone.c | 274 +++ net/ipv4/netfilter/ip_queue.c | 752 +++++++ net/ipv4/netfilter/ip_tables.c | 1664 ++++++++++++++++ net/ipv4/netfilter/ipchains_core.c | 1768 +++++++++++++++++ net/ipv4/netfilter/ipfwadm_core.c | 1410 +++++++++++++ net/ipv4/netfilter/ipt_LOG.c | 369 ++++ net/ipv4/netfilter/ipt_MARK.c | 68 + net/ipv4/netfilter/ipt_MASQUERADE.c | 171 ++ net/ipv4/netfilter/ipt_MIRROR.c | 131 ++ net/ipv4/netfilter/ipt_REDIRECT.c | 104 + net/ipv4/netfilter/ipt_REJECT.c | 145 ++ net/ipv4/netfilter/ipt_TOS.c | 87 + net/ipv4/netfilter/ipt_limit.c | 144 ++ net/ipv4/netfilter/ipt_mac.c | 63 + net/ipv4/netfilter/ipt_mark.c | 52 + net/ipv4/netfilter/ipt_multiport.c | 102 + net/ipv4/netfilter/ipt_owner.c | 136 ++ net/ipv4/netfilter/ipt_state.c | 61 + net/ipv4/netfilter/ipt_tos.c | 53 + net/ipv4/netfilter/ipt_unclean.c | 576 ++++++ net/ipv4/netfilter/iptable_filter.c | 182 ++ net/ipv4/netfilter/iptable_mangle.c | 153 ++ net/ipv4/route.c | 26 +- net/ipv4/tcp_ipv4.c | 6 +- net/ipv6/sit.c | 11 +- net/netsyms.c | 2 + net/sched/cls_fw.c | 2 +- net/sched/sch_ingress.c | 4 +- net/unix/af_unix.c | 2 +- 169 files changed, 17247 insertions(+), 1054 deletions(-) create mode 100644 include/linux/netfilter_ipv4/compat_firewall.h create mode 100644 include/linux/netfilter_ipv4/ip_conntrack.h create mode 100644 include/linux/netfilter_ipv4/ip_conntrack_core.h create mode 100644 include/linux/netfilter_ipv4/ip_conntrack_ftp.h create mode 100644 include/linux/netfilter_ipv4/ip_conntrack_helper.h create mode 100644 include/linux/netfilter_ipv4/ip_conntrack_protocol.h create mode 100644 include/linux/netfilter_ipv4/ip_conntrack_tuple.h create mode 100644 include/linux/netfilter_ipv4/ip_nat.h create mode 100644 include/linux/netfilter_ipv4/ip_nat_core.h create mode 100644 include/linux/netfilter_ipv4/ip_nat_ftp.h create mode 100644 include/linux/netfilter_ipv4/ip_nat_helper.h create mode 100644 include/linux/netfilter_ipv4/ip_nat_protocol.h create mode 100644 include/linux/netfilter_ipv4/ip_nat_rule.h create mode 100644 include/linux/netfilter_ipv4/ip_queue.h create mode 100644 include/linux/netfilter_ipv4/ip_tables.h create mode 100644 include/linux/netfilter_ipv4/ipchains_core.h create mode 100644 include/linux/netfilter_ipv4/ipfwadm_core.h create mode 100644 include/linux/netfilter_ipv4/ipt_LOG.h create mode 100644 include/linux/netfilter_ipv4/ipt_MARK.h create mode 100644 include/linux/netfilter_ipv4/ipt_REJECT.h create mode 100644 include/linux/netfilter_ipv4/ipt_TOS.h create mode 100644 include/linux/netfilter_ipv4/ipt_limit.h create mode 100644 include/linux/netfilter_ipv4/ipt_mac.h create mode 100644 include/linux/netfilter_ipv4/ipt_mark.h create mode 100644 include/linux/netfilter_ipv4/ipt_multiport.h create mode 100644 include/linux/netfilter_ipv4/ipt_owner.h create mode 100644 include/linux/netfilter_ipv4/ipt_state.h create mode 100644 include/linux/netfilter_ipv4/ipt_tos.h create mode 100644 include/linux/netfilter_ipv4/listhelp.h create mode 100644 include/linux/netfilter_ipv4/lockhelp.h create mode 100644 net/ipv4/netfilter/Config.in create mode 100644 net/ipv4/netfilter/Makefile create mode 100644 net/ipv4/netfilter/ip_conntrack_core.c create mode 100644 net/ipv4/netfilter/ip_conntrack_ftp.c create mode 100644 net/ipv4/netfilter/ip_conntrack_proto_generic.c create mode 100644 net/ipv4/netfilter/ip_conntrack_proto_icmp.c create mode 100644 net/ipv4/netfilter/ip_conntrack_proto_tcp.c create mode 100644 net/ipv4/netfilter/ip_conntrack_proto_udp.c create mode 100644 net/ipv4/netfilter/ip_conntrack_standalone.c create mode 100644 net/ipv4/netfilter/ip_fw_compat.c create mode 100644 net/ipv4/netfilter/ip_fw_compat_masq.c create mode 100644 net/ipv4/netfilter/ip_fw_compat_redir.c create mode 100644 net/ipv4/netfilter/ip_nat_core.c create mode 100644 net/ipv4/netfilter/ip_nat_ftp.c create mode 100644 net/ipv4/netfilter/ip_nat_proto_icmp.c create mode 100644 net/ipv4/netfilter/ip_nat_proto_tcp.c create mode 100644 net/ipv4/netfilter/ip_nat_proto_udp.c create mode 100644 net/ipv4/netfilter/ip_nat_proto_unknown.c create mode 100644 net/ipv4/netfilter/ip_nat_rule.c create mode 100644 net/ipv4/netfilter/ip_nat_standalone.c create mode 100644 net/ipv4/netfilter/ip_queue.c create mode 100644 net/ipv4/netfilter/ip_tables.c create mode 100644 net/ipv4/netfilter/ipchains_core.c create mode 100644 net/ipv4/netfilter/ipfwadm_core.c create mode 100644 net/ipv4/netfilter/ipt_LOG.c create mode 100644 net/ipv4/netfilter/ipt_MARK.c create mode 100644 net/ipv4/netfilter/ipt_MASQUERADE.c create mode 100644 net/ipv4/netfilter/ipt_MIRROR.c create mode 100644 net/ipv4/netfilter/ipt_REDIRECT.c create mode 100644 net/ipv4/netfilter/ipt_REJECT.c create mode 100644 net/ipv4/netfilter/ipt_TOS.c create mode 100644 net/ipv4/netfilter/ipt_limit.c create mode 100644 net/ipv4/netfilter/ipt_mac.c create mode 100644 net/ipv4/netfilter/ipt_mark.c create mode 100644 net/ipv4/netfilter/ipt_multiport.c create mode 100644 net/ipv4/netfilter/ipt_owner.c create mode 100644 net/ipv4/netfilter/ipt_state.c create mode 100644 net/ipv4/netfilter/ipt_tos.c create mode 100644 net/ipv4/netfilter/ipt_unclean.c create mode 100644 net/ipv4/netfilter/iptable_filter.c create mode 100644 net/ipv4/netfilter/iptable_mangle.c diff --git a/CREDITS b/CREDITS index f5437e76b51a..e20a1f141d61 100644 --- a/CREDITS +++ b/CREDITS @@ -318,6 +318,15 @@ P: 1024/04880A44 72E5 7031 4414 2EB6 F6B4 4CBD 1181 7032 0488 0A44 D: IEEE 1394 subsystem rewrite and maintainer D: Texas Instruments PCILynx IEEE 1394 driver +N: Marc Boucher +E: marc@mbsi.ca +P: CA 67 A5 1A 38 CE B6 F2 D5 83 51 03 D2 9C 30 9E CE D2 DD 65 +D: Netfilter core +D: IP policy routing by mark +D: Various fixes (mostly networking) +S: Montreal, Quebec +S: Canada + N: Zoltán Böszörményi E: zboszor@mail.externet.hu D: MTRR emulation with Cyrix style ARR registers, Athlon MTRR support diff --git a/Documentation/Changes b/Documentation/Changes index bf650d4174a8..0a06bebdfcd7 100644 --- a/Documentation/Changes +++ b/Documentation/Changes @@ -43,7 +43,7 @@ Current Minimal Requirements encountered a bug! If you're unsure what version you're currently running, the suggested command should tell you. -- Kernel modutils 2.3.7 ; insmod -V +- Kernel modutils 2.3.10 ; insmod -V - Gnu C 2.7.2.3 ; gcc --version - Binutils 2.9.1.0.7 ; ld -v - Linux libc5 C Library 5.4.46 ; ls -l /lib/libc* @@ -174,7 +174,7 @@ Modules ======= You need to upgrade to the latest version of modutils for the Linux -2.3 kernel. This version will also work with your 2.0 kernel. +2.3 kernel. This version can also be built to work with your 2.0 kernel. As of 2.1.90-pre1, kerneld has been replaced by a kernel thread, kmod. See Documentation/kmod.txt for more information. The main @@ -586,8 +586,8 @@ ftp://metalab.unc.edu/pub/Linux/GCC/ld.so-1.9.9.tar.gz Modules utilities ================= -The 2.3.7 release: -ftp://ftp.ocs.com.au/pub/modutils/v2.3/modutils-2.3.7.tar.gz +The 2.3.10 release: +ftp://ftp.ocs.com.au/pub/modutils/v2.3/modutils-2.3.10.tar.gz Procps utilities ================ diff --git a/Documentation/Configure.help b/Documentation/Configure.help index a9230e95f1df..421e2db7fcf0 100644 --- a/Documentation/Configure.help +++ b/Documentation/Configure.help @@ -1698,6 +1698,230 @@ CONFIG_NETFILTER Chances are that you should say Y here if you compile a kernel which will run as a router and N for regular hosts. If unsure, say N. +IP: connection tracking (required for masq/NAT) +CONFIG_IP_NF_CONNTRACK + Connection tracking keeps a record of what packets have passed + through your machine, in order to figure out how they are related + into connections. + + This is required to do Masquerading or other kinds of Network + Address Translation (except for Fast NAT). It can also be used to + enhance packet filtering (see `Connection state match support' + below). + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +FTP protocol support +CONFIG_IP_NF_FTP + Tracking FTP connections is problematic: special helpers are + required for tracking them, and doing masquerading and other forms + of Network Address Translation on them. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `Y'. + +IP: userspace queueing via NETLINK (EXPERIMENTAL) +CONFIG_IP_NF_QUEUE + Netfilter has the ability to queue packets to userspace: the netlink + device can be used to access them using this driver. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +IP: ip tables support (required for filtering/masq/NAT) +CONFIG_IP_NF_IPTABLES + iptables is a general, extensible packet identification framework. + The packet filtering and full NAT (masquerading, port forwarding, + etc) subsystems now use this: say `Y' or `M' here if you want to use + either of those. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +limit match support +CONFIG_IP_NF_MATCH_LIMIT + limit matching allows you to control the rate at which a rule can be + matched: mainly useful in combination with the LOG target. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +MAC address match support +CONFIG_IP_NF_MATCH_MAC + mac matching allows you to match packets based on the source + ethernet address of the packet. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +netfilter mark match support +CONFIG_IP_NF_MATCH_MARK + Netfilter mark matching allows you to match packets based on the + `nfmark' value in the packet. This can be set by the MARK target + (see below). + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +Multiple port match support +CONFIG_IP_NF_MATCH_MULTIPORT + Multiport matching allows you to match TCP or UDP packets based on + a series of source or destination ports: normally a rule can only + match a single range of ports. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +TOS match support +CONFIG_IP_NF_MATCH_TOS + TOS matching allows you to match packets based on the Type Of + Service fields of the IP packet. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +Connection state match support +CONFIG_IP_NF_MATCH_STATE + Connection state matching allows you to match packets based on their + relationship to a tracked connection (ie. previous packets). This + is a powerful tool for packet classification. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +Unclean match support (EXPERIMENTAL) +CONFIG_IP_NF_MATCH_UNCLEAN + Unclean packet matching matches any strange or invalid packets, by + looking at a series of fields in the IP, TCP, UDP and ICMP headers. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +Owner match support (EXPERIMENTAL) +CONFIG_IP_NF_MATCH_OWNER + + Packet owner matching allows you to match locally-generated packets + based on who created them: the user, group, process or session. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +Packet filtering +CONFIG_IP_NF_FILTER + Packet filtering defines a table `filter', which has a series of + rules for simple packet filtering at local input, forwarding and + local output. See the man page for iptables(8). + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +REJECT target support +CONFIG_IP_NF_TARGET_REJECT + The REJECT target allows a filtering rule to specify that an ICMP + error should be issued in response to an incoming packet, rather + than silently being dropped. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +MIRROR target support (EXPERIMENTAL) +CONFIG_IP_NF_TARGET_MIRROR + The MIRROR target allows a filtering rule to specify that an + incoming packet should be bounced back to the sender. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +Full NAT +CONFIG_IP_NF_NAT + The Full NAT option allows masquerading, port forwarding and other + forms of full Network Address Port Translation. It is controlled by + the `nat' table in iptables: see the man page for iptables(8). + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +MASQUERADE target support +CONFIG_IP_NF_TARGET_MASQUERADE + Masquerading is a special case of NAT: all outgoing connections are + changed to seem to come from a particular interface's address, and + if the interface goes down, those connections are lost. This is + only useful for dialup accounts with dynamic IP address (ie. your IP + address will be different on next dialup). + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +REDIRECT target support +CONFIG_IP_NF_TARGET_REDIRECT + REDIRECT is a special case of NAT: all incoming connections are + mapped onto the incoming interface's address, causing the packets to + come to the local machine instead of passing through. This is + useful for tranparent proxies. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +Packet mangling +CONFIG_IP_NF_MANGLE + This option adds a `mangle' table to iptables: see the man page for + iptables(8). This table is used for various packet alterations + which can effect how the packet is routed. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +TOS target support +CONFIG_IP_NF_TARGET_TOS + This option adds a `TOS' target, which allows you to create rules in + the `mangle' table which alter the Type Of Service field of an IP + packet prior to routing. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +MARK target support +CONFIG_IP_NF_TARGET_MARK + This option adds a `MARK' target, which allows you to create rules in + the `mangle' table which alter the netfilter mark (nfmark) field + associated with the packet packet prior to routing. This can change + the routing method (see `IP: use netfilter MARK value as routing key') + and can also be used by other subsystems to change their behavior. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +LOG target support +CONFIG_IP_NF_TARGET_LOG + This option adds a `LOG' target, which allows you to create rules in + any iptables table which records the packet header to the syslog. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +ipchains (2.2-style) support +CONFIG_IP_NF_COMPAT_IPCHAINS + This option places ipchains (with masquerading and redirection + support) back into the kernel, using the new netfilter + infrastructure. It is not recommended for new installations (see + `Packet filtering'). With this enabled, you should be able to use + the ipchains tool exactly as in 2.2 kernels. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +ipfwadm (2.0-style) support +CONFIG_IP_NF_COMPAT_IPFWADM + This option places ipfwadm (with masquerading and redirection + support) back into the kernel, using the new netfilter + infrastructure. It is not recommended for new installations (see + `Packet filtering'). With this enabled, you should be able to use + the ipfwadm tool exactly as in 2.0 kernels. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + SYN flood protection CONFIG_SYN_COOKIES Normal TCP/IP networking is open to an attack known as "SYN @@ -3139,17 +3363,16 @@ CONFIG_IP_ROUTE_MULTIPATH IP: use TOS value as routing key CONFIG_IP_ROUTE_TOS - The header of every IP packet carries a TOS (Type of Service) value + The header of every IP packet carries a TOS (Type Of Service) value with which the packet requests a certain treatment, e.g. low latency (for interactive traffic), high throughput, or high reliability. If you say Y here, you will be able to specify different routes for packets with different TOS values. -IP: use FWMARK value as routing key +IP: use netfilter MARK value as routing key CONFIG_IP_ROUTE_FWMARK If you say Y here, you will be able to specify different routes for - packets with different FWMARK ("firewalling mark") values - (see ipchains(8), "-m" argument). + packets with different mark values (see iptables(8), MARK target). IP: verbose route monitoring CONFIG_IP_ROUTE_VERBOSE diff --git a/Documentation/usb/usb-serial.txt b/Documentation/usb/usb-serial.txt index 58f3cf108a43..1ea46bbaab1a 100644 --- a/Documentation/usb/usb-serial.txt +++ b/Documentation/usb/usb-serial.txt @@ -90,8 +90,6 @@ Current status: not all of the standard USB descriptors are handled: Get_Status, Set_Feature O_NONBLOCK, select() - The device usually appears at /dev/ttyUSB1 . - Generic Serial driver diff --git a/MAINTAINERS b/MAINTAINERS index 6762ce048d95..f03fbb625c9e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -689,6 +689,8 @@ S: Maintained NETFILTER P: Rusty Russell M: Rusty.Russell@rustcorp.com.au +P: Marc Boucher +M: marc@mbsi.ca W: http://www.samba.org/netfilter/ W: http://netfilter.kernelnotes.org W: http://antarctica.penguincomputing.com/~netfilter/ diff --git a/Makefile b/Makefile index 782263e609eb..3d767c536f1e 100644 --- a/Makefile +++ b/Makefile @@ -320,6 +320,7 @@ modules_install: if [ -f SK98LIN_MODULES ]; then inst_mod SK98LIN_MODULES net; fi; \ if [ -f SKFP_MODULES ]; then inst_mod SKFP_MODULES net; fi; \ if [ -f USB_MODULES ]; then inst_mod USB_MODULES usb; fi; \ + if [ -f USB_SERIAL_MODULES ]; then inst_mod USB_SERIAL_MODULES usb; fi; \ if [ -f IEEE1394_MODULES ]; then inst_mod IEEE1394_MODULES ieee1394; fi; \ if [ -f PCMCIA_MODULES ]; then inst_mod PCMCIA_MODULES pcmcia; fi; \ if [ -f PCMCIA_NET_MODULES ]; then inst_mod PCMCIA_NET_MODULES pcmcia; fi; \ diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index bc8ca101a702..1f454cf48cf6 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -219,15 +219,33 @@ setup_irq(unsigned int irq, struct irqaction * new) } spin_unlock_irqrestore(&desc->lock,flags); - register_irq_proc(irq); return 0; } static struct proc_dir_entry * root_irq_dir; -static struct proc_dir_entry * irq_dir [NR_IRQS]; -static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; +static struct proc_dir_entry * irq_dir[NR_IRQS]; -static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; +#ifdef CONFIG_SMP +static struct proc_dir_entry * smp_affinity_entry[NR_IRQS]; +static char irq_user_affinity[NR_IRQS]; +static unsigned long irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; + +static void +select_smp_affinity(int irq) +{ + static int last_cpu; + int cpu = last_cpu + 1; + + if (! irq_desc[irq].handler->set_affinity || irq_user_affinity[irq]) + return; + + while (((cpu_present_mask >> cpu) & 1) == 0) + cpu = (cpu < NR_CPUS ? cpu + 1 : 0); + last_cpu = cpu; + + irq_affinity[irq] = 1UL << cpu; + irq_desc[irq].handler->set_affinity(irq, 1UL << cpu); +} #define HEX_DIGITS 16 @@ -290,18 +308,22 @@ irq_affinity_write_proc(struct file *file, const char *buffer, err = parse_hex_value(buffer, count, &new_value); -#if CONFIG_SMP - /* - * Do not allow disabling IRQs completely - it's a too easy - * way to make the system unusable accidentally :-) At least - * one online CPU still has to be targeted. - */ - if (!(new_value & cpu_present_mask)) + /* The special value 0 means release control of the + affinity to kernel. */ + if (new_value == 0) { + irq_user_affinity[irq] = 0; + select_smp_affinity(irq); + } + /* Do not allow disabling IRQs completely - it's a too easy + way to make the system unusable accidentally :-) At least + one online CPU still has to be targeted. */ + else if (!(new_value & cpu_present_mask)) return -EINVAL; -#endif - - irq_affinity[irq] = new_value; - irq_desc[irq].handler->set_affinity(irq, new_value); + else { + irq_affinity[irq] = new_value; + irq_user_affinity[irq] = 1; + irq_desc[irq].handler->set_affinity(irq, new_value); + } return full_count; } @@ -313,7 +335,7 @@ prof_cpu_mask_read_proc(char *page, char **start, off_t off, unsigned long *mask = (unsigned long *) data; if (count < HEX_DIGITS+1) return -EINVAL; - return sprintf (page, "%08lx\n", *mask); + return sprintf (page, "%016lx\n", *mask); } static int @@ -330,6 +352,7 @@ prof_cpu_mask_write_proc(struct file *file, const char *buffer, *mask = new_value; return full_count; } +#endif /* CONFIG_SMP */ #define MAX_NAMELEN 10 @@ -348,6 +371,7 @@ register_irq_proc (unsigned int irq) /* create /proc/irq/1234 */ irq_dir[irq] = proc_mkdir(name, root_irq_dir); +#ifdef CONFIG_SMP /* create /proc/irq/1234/smp_affinity */ entry = create_proc_entry("smp_affinity", 0700, irq_dir[irq]); @@ -357,6 +381,7 @@ register_irq_proc (unsigned int irq) entry->write_proc = irq_affinity_write_proc; smp_affinity_entry[irq] = entry; +#endif } unsigned long prof_cpu_mask = ~0UL; @@ -370,6 +395,7 @@ init_irq_proc (void) /* create /proc/irq */ root_irq_dir = proc_mkdir("irq", 0); +#ifdef CONFIG_SMP /* create /proc/irq/prof_cpu_mask */ entry = create_proc_entry("prof_cpu_mask", 0700, root_irq_dir); @@ -377,6 +403,7 @@ init_irq_proc (void) entry->data = (void *)&prof_cpu_mask; entry->read_proc = prof_cpu_mask_read_proc; entry->write_proc = prof_cpu_mask_write_proc; +#endif /* * Create entries for all existing IRQs. @@ -426,6 +453,10 @@ request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *), action->next = NULL; action->dev_id = dev_id; +#ifdef CONFIG_SMP + select_smp_affinity(irq); +#endif + retval = setup_irq(irq, action); if (retval) kfree(action); @@ -522,10 +553,10 @@ get_irq_list(char *buf) *p++ = '\n'; } #if CONFIG_SMP - p += sprintf(p, "LOC: "); + p += sprintf(p, "IPI: "); for (j = 0; j < smp_num_cpus; j++) p += sprintf(p, "%10lu ", - cpu_data[cpu_logical_map(j)].smp_local_irq_count); + cpu_data[cpu_logical_map(j)].ipi_count); p += sprintf(p, "\n"); #endif p += sprintf(p, "ERR: %10lu\n", irq_err_count); diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 62ba2362f541..774fcf8a65ea 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -61,14 +62,22 @@ do_entInt(unsigned long type, unsigned long vector, unsigned long la_ptr, break; case 1: #ifdef CONFIG_SMP - cpu_data[smp_processor_id()].smp_local_irq_count++; + { + long cpu; smp_percpu_timer_interrupt(®s); - if (smp_processor_id() == boot_cpuid) -#endif + cpu = smp_processor_id(); + if (cpu != boot_cpuid) { + irq_attempt(cpu, RTC_IRQ)++; + kstat.irqs[cpu][RTC_IRQ]++; + } else { handle_irq(RTC_IRQ, ®s); + } + } +#else + handle_irq(RTC_IRQ, ®s); +#endif return; case 2: - irq_err_count++; alpha_mv.machine_check(vector, la_ptr, ®s); return; case 3: diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 61591453546c..49ddca2e497e 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -846,6 +846,22 @@ platform_string(void) } } +static int +get_nr_processors(struct percpu_struct *cpubase, unsigned long num) +{ + struct percpu_struct *cpu; + int i, count = 0; + + for (i = 0; i < num; i++) { + cpu = (struct percpu_struct *) + ((char *)cpubase + i*hwrpb->processor_size); + if ((cpu->flags & 0x1cc) == 0x1cc) + count++; + } + return count; +} + + /* * BUFFER is PAGE_SIZE bytes long. */ @@ -865,7 +881,7 @@ int get_cpuinfo(char *buffer) char *cpu_name; char *systype_name; char *sysvariation_name; - int len; + int len, nr_processors; cpu = (struct percpu_struct*)((char*)hwrpb + hwrpb->processor_offset); cpu_index = (unsigned) (cpu->type - 1); @@ -876,6 +892,8 @@ int get_cpuinfo(char *buffer) get_sysnames(hwrpb->sys_type, hwrpb->sys_variation, &systype_name, &sysvariation_name); + nr_processors = get_nr_processors(cpu, hwrpb->nr_processors); + len = sprintf(buffer, "cpu\t\t\t: Alpha\n" "cpu model\t\t: %s\n" @@ -894,7 +912,8 @@ int get_cpuinfo(char *buffer) "BogoMIPS\t\t: %lu.%02lu\n" "kernel unaligned acc\t: %ld (pc=%lx,va=%lx)\n" "user unaligned acc\t: %ld (pc=%lx,va=%lx)\n" - "platform string\t\t: %s\n", + "platform string\t\t: %s\n" + "cpus detected\t\t: %d\n", cpu_name, cpu->variation, cpu->revision, (char*)cpu->serial_no, systype_name, sysvariation_name, hwrpb->sys_revision, @@ -909,7 +928,7 @@ int get_cpuinfo(char *buffer) loops_per_sec / 500000, (loops_per_sec / 5000) % 100, unaligned[0].count, unaligned[0].pc, unaligned[0].va, unaligned[1].count, unaligned[1].pc, unaligned[1].va, - platform_string()); + platform_string(), nr_processors); #ifdef __SMP__ len += smp_info(buffer+len); diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index ab1fb9ab2c4a..1ddaaf74d35a 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -1003,15 +1003,11 @@ flush_icache_page(struct vm_area_struct *vma, struct page *page) int smp_info(char *buffer) { - long i; - unsigned long sum = 0; - for (i = 0; i < NR_CPUS; i++) - sum += cpu_data[i].ipi_count; - - return sprintf(buffer, "CPUs probed %d active %d map 0x%lx IPIs %ld\n", - smp_num_probed, smp_num_cpus, cpu_present_mask, sum); + return sprintf(buffer, + "cpus active\t\t: %d\n" + "cpu active mask\t\t: %016lx\n", + smp_num_cpus, cpu_present_mask); } - #if DEBUG_SPINLOCK void diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index 4735a08f6ead..c10018fffdd6 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -303,8 +303,6 @@ init_tsunami_irqs(struct hw_interrupt_type * ops, int imin, int imax) static void __init dp264_init_irq(void) { - int cpu; - outb(0, DMA1_RESET_REG); outb(0, DMA2_RESET_REG); outb(DMA_MODE_CASCADE, DMA2_MODE_REG); diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 0f3f5cb8ab49..33f2d3cb1ab3 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -179,7 +179,6 @@ CONFIG_IDEPCI_SHARE_IRQ=y # CONFIG_BLK_DEV_OFFBOARD is not set # CONFIG_IDEDMA_PCI_AUTO is not set # CONFIG_BLK_DEV_IDEDMA is not set -# CONFIG_IDEDMA_AUTO is not set # CONFIG_IDEDMA_PCI_EXPERIMENTAL is not set # CONFIG_IDEDMA_PCI_WIP is not set # CONFIG_IDEDMA_NEW_DRIVE_LISTINGS is not set @@ -208,6 +207,7 @@ CONFIG_IDEPCI_SHARE_IRQ=y # CONFIG_BLK_DEV_TRM290 is not set # CONFIG_BLK_DEV_VIA82CXXX is not set # CONFIG_IDE_CHIPSETS is not set +# CONFIG_IDEDMA_AUTO is not set CONFIG_BLK_DEV_IDE_MODES=y # diff --git a/arch/i386/kernel/acpi.c b/arch/i386/kernel/acpi.c index 8efa2832a730..f9410f3fd7c0 100644 --- a/arch/i386/kernel/acpi.c +++ b/arch/i386/kernel/acpi.c @@ -488,13 +488,13 @@ static int __init acpi_find_tables(void) if (!rsdt) { printk(KERN_ERR "ACPI: missing RSDT at 0x%p\n", (void*) rsdp->rsdt); - return -ENODEV; + return -EINVAL; } else if (rsdt->signature != ACPI_RSDT_SIG) { printk(KERN_ERR "ACPI: bad RSDT at 0x%p (%08x)\n", (void*) rsdp->rsdt, (unsigned) rsdt->signature); acpi_unmap_table(rsdt); - return -ENODEV; + return -EINVAL; } // search RSDT for FACP acpi_facp.table = NULL; @@ -532,7 +532,7 @@ static int __init acpi_find_tables(void) if (!acpi_facp.table) { printk(KERN_ERR "ACPI: missing FACP\n"); - return -ENODEV; + return -EINVAL; } return 0; } @@ -1461,8 +1461,19 @@ static int __init acpi_init(void) switch (acpi_enabled) { case ACPI_ENABLED: - if (acpi_find_tables() && acpi_find_chipset()) + switch (acpi_find_tables()) { + case 0: + // found valid ACPI tables + break; + case -ENODEV: + // found no ACPI tables, try chipset-specific + if (acpi_find_chipset()) + return -ENODEV; + break; + default: + // found broken ACPI tables return -ENODEV; + } break; case ACPI_TABLES_ONLY: if (acpi_find_tables()) @@ -1478,6 +1489,12 @@ static int __init acpi_init(void) facp = (struct acpi_facp*) acpi_facp.table; + if (PM_IS_ACTIVE()) { + printk(KERN_NOTICE "acpi: APM is already active.\n"); + goto err_out; + } + pm_active = 1; + /* * Internally we always keep latencies in timer * ticks, which is simpler and more consistent (what is @@ -1516,8 +1533,6 @@ static int __init acpi_init(void) pm_power_off = acpi_power_off; - pm_active = 1; - /* * Set up the ACPI idle function. Note that we can't really * do this with multiple CPU's, we'd need a per-CPU ACPI @@ -1549,7 +1564,6 @@ static void __exit acpi_exit(void) struct acpi_facp *facp = (struct acpi_facp*) acpi_facp.table; pm_idle = NULL; - pm_active = 0; pm_power_off = NULL; unregister_sysctl_table(acpi_sysctl); @@ -1563,6 +1577,8 @@ static void __exit acpi_exit(void) if (pci_driver_registered) pci_unregister_driver(&acpi_driver); + + pm_active = 0; } /* diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 3340946be8a7..7a8a17b636a2 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -58,10 +58,19 @@ startup_32: * New page tables may be in 4Mbyte page mode and may * be using the global pages. * + * NOTE! If we are on a 486 we may have no cr4 at all! + * So we do not try to touch it unless we really have + * some bits in it to set. This won't work if the BSP + * implements cr4 but this AP does not -- very unlikely + * but be warned! The same applies to the pse feature + * if not equally supported. --macro + * * NOTE! We have to correct for the fact that we're * not yet offset PAGE_OFFSET.. */ #define cr4_bits mmu_cr4_features-__PAGE_OFFSET + cmpl $0,cr4_bits + je 1f movl %cr4,%eax # Turn on 4Mb pages orl cr4_bits,%eax movl %eax,%cr4 @@ -219,7 +228,6 @@ is386: pushl %ecx # restore original EFLAGS orl $2,%eax # set MP 2: movl %eax,%cr0 call check_x87 -4: #ifdef __SMP__ incb ready #endif diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index a12b6b73d75e..fb5f48ea8ca4 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1546,7 +1546,7 @@ void __init cpu_init (void) cpus_initialized++; printk("Initializing CPU#%d\n", nr); - if (cpu_has_pse) + if (cpu_has_vme || cpu_has_tsc || cpu_has_de) clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); __asm__ __volatile__("lgdt %0": "=m" (gdt_descr)); diff --git a/arch/mips/kernel/irixelf.c b/arch/mips/kernel/irixelf.c index df5f2a654990..62f517460f6a 100644 --- a/arch/mips/kernel/irixelf.c +++ b/arch/mips/kernel/irixelf.c @@ -42,7 +42,7 @@ #undef DEBUG_ELF static int load_irix_binary(struct linux_binprm * bprm, struct pt_regs * regs); -static int load_irix_library(int fd); +static int load_irix_library(struct file *); static int irix_core_dump(long signr, struct pt_regs * regs, struct file *file); extern int dump_fpu (elf_fpregset_t *); @@ -820,7 +820,7 @@ out_free_ph: /* This is really simpleminded and specialized - we are loading an * a.out library that is given an ELF header. */ -static inline int do_load_irix_library(struct file *file) +static int load_irix_library(struct file *file) { struct elfhdr elf_ex; struct elf_phdr *elf_phdata = NULL; @@ -834,8 +834,6 @@ static inline int do_load_irix_library(struct file *file) int i,j, k; len = 0; - if (!file->f_op) - return -EACCES; dentry = file->f_dentry; inode = dentry->d_inode; elf_bss = 0; @@ -888,12 +886,14 @@ static inline int do_load_irix_library(struct file *file) while(elf_phdata->p_type != PT_LOAD) elf_phdata++; /* Now use mmap to map the library into memory. */ + down(¤t->mm->mmap_sem); error = do_mmap(file, elf_phdata->p_vaddr & 0xfffff000, elf_phdata->p_filesz + (elf_phdata->p_vaddr & 0xfff), PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, elf_phdata->p_offset & 0xfffff000); + up(¤t->mm->mmap_sem); k = elf_phdata->p_vaddr + elf_phdata->p_filesz; if(k > elf_bss) elf_bss = k; @@ -912,19 +912,6 @@ static inline int do_load_irix_library(struct file *file) kfree(elf_phdata); return 0; } - -static int load_irix_library(int fd) -{ - int retval = -EACCES; - struct file *file; - - file = fget(fd); - if (file) { - retval = do_load_irix_library(file); - fput(file); - } - return retval; -} /* Called through irix_syssgi() to map an elf image given an FD, * a phdr ptr USER_PHDRP in userspace, and a count CNT telling how many diff --git a/arch/sparc/config.in b/arch/sparc/config.in index 21f47fd59091..e67a4302216f 100644 --- a/arch/sparc/config.in +++ b/arch/sparc/config.in @@ -1,4 +1,4 @@ -# $Id: config.in,v 1.89 2000/03/14 07:31:19 jj Exp $ +# $Id: config.in,v 1.90 2000/03/17 05:18:02 anton Exp $ # For a description of the syntax of this configuration file, # see the Configure script. # @@ -19,18 +19,6 @@ define_bool CONFIG_VT_CONSOLE y bool 'Symmetric multi-processing support (does not work on sun4/sun4c)' CONFIG_SMP -bool 'Support for SUN4 machines (disables SUN4[CDM] support)' CONFIG_SUN4 -if [ "$CONFIG_SUN4" != "y" ]; then - bool 'Support for PCI and PS/2 keyboard/mouse' CONFIG_PCI - source drivers/pci/Config.in -fi - -mainmenu_option next_comment -comment 'Console drivers' -bool 'PROM console' CONFIG_PROM_CONSOLE -source drivers/video/Config.in -endmenu - # Global things across all Sun machines. define_bool CONFIG_SBUS y define_bool CONFIG_SBUSCHAR y @@ -43,9 +31,11 @@ define_bool CONFIG_SUN_KEYBOARD y define_bool CONFIG_SUN_CONSOLE y define_bool CONFIG_SUN_AUXIO y define_bool CONFIG_SUN_IO y + +bool 'Support for SUN4 machines (disables SUN4[CDM] support)' CONFIG_SUN4 if [ "$CONFIG_SUN4" != "y" ]; then - source drivers/sbus/char/Config.in - source drivers/sbus/audio/Config.in + bool 'Support for PCI and PS/2 keyboard/mouse' CONFIG_PCI + source drivers/pci/Config.in fi tristate 'Openprom tree appears in /proc/openprom' CONFIG_SUN_OPENPROMFS @@ -73,16 +63,28 @@ if [ "$CONFIG_MODULES" = "y" ]; then fi endmenu +mainmenu_option next_comment +comment 'Console drivers' +bool 'PROM console' CONFIG_PROM_CONSOLE +source drivers/video/Config.in +endmenu + +if [ "$CONFIG_SUN4" != "y" ]; then + source drivers/sbus/char/Config.in + source drivers/sbus/audio/Config.in +fi + mainmenu_option next_comment comment 'Block devices' bool 'Normal floppy disk support' CONFIG_BLK_DEV_FD + bool 'Multiple devices driver support' CONFIG_BLK_DEV_MD if [ "$CONFIG_BLK_DEV_MD" = "y" ]; then tristate ' Linear (append) mode' CONFIG_MD_LINEAR tristate ' RAID-0 (striping) mode' CONFIG_MD_STRIPED - tristate ' RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING - tristate ' RAID-4/RAID-5 mode' CONFIG_MD_RAID5 +# tristate ' RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING +# tristate ' RAID-4/RAID-5 mode' CONFIG_MD_RAID5 fi tristate 'RAM disk support' CONFIG_BLK_DEV_RAM @@ -99,16 +101,6 @@ if [ "$CONFIG_NET" = "y" ]; then source net/Config.in fi -mainmenu_option next_comment -comment 'ISDN subsystem' - -tristate 'ISDN support' CONFIG_ISDN -if [ "$CONFIG_ISDN" != "n" ]; then - source drivers/isdn/Config.in -fi -endmenu - - define_bool CONFIG_IDE n define_bool CONFIG_BLK_DEV_IDE_MODES n define_bool CONFIG_BLK_DEV_HD n @@ -126,6 +118,15 @@ define_bool CONFIG_BLK_DEV_HD n # fi # endmenu +mainmenu_option next_comment +comment 'ISDN subsystem' + +tristate 'ISDN support' CONFIG_ISDN +if [ "$CONFIG_ISDN" != "n" ]; then + source drivers/isdn/Config.in +fi +endmenu + mainmenu_option next_comment comment 'SCSI support' @@ -196,12 +197,16 @@ if [ "$CONFIG_NET" = "y" ]; then fi tristate ' Sun LANCE support' CONFIG_SUNLANCE tristate ' Sun Happy Meal 10/100baseT support' CONFIG_HAPPYMEAL - tristate ' Sun BigMAC 10/100baseT support (EXPERIMENTAL)' CONFIG_SUNBMAC + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + tristate ' Sun BigMAC 10/100baseT support (EXPERIMENTAL)' CONFIG_SUNBMAC + fi tristate ' Sun QuadEthernet support' CONFIG_SUNQE tristate ' MyriCOM Gigabit Ethernet support' CONFIG_MYRI_SBUS + # bool ' FDDI driver support' CONFIG_FDDI # if [ "$CONFIG_FDDI" = "y" ]; then # fi + if [ "$CONFIG_ATM" = "y" ]; then source drivers/atm/Config.in fi diff --git a/arch/sparc/defconfig b/arch/sparc/defconfig index de483af6eb49..b39da1989041 100644 --- a/arch/sparc/defconfig +++ b/arch/sparc/defconfig @@ -14,8 +14,42 @@ CONFIG_EXPERIMENTAL=y CONFIG_VT=y CONFIG_VT_CONSOLE=y # CONFIG_SMP is not set +CONFIG_SBUS=y +CONFIG_SBUSCHAR=y +CONFIG_BUSMOUSE=y +CONFIG_SUN_MOUSE=y +CONFIG_SERIAL=y +CONFIG_SUN_SERIAL=y +CONFIG_SERIAL_CONSOLE=y +CONFIG_SUN_KEYBOARD=y +CONFIG_SUN_CONSOLE=y +CONFIG_SUN_AUXIO=y +CONFIG_SUN_IO=y # CONFIG_SUN4 is not set # CONFIG_PCI is not set +CONFIG_SUN_OPENPROMFS=m +CONFIG_NET=y +CONFIG_SYSVIPC=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +CONFIG_KCORE_ELF=y +CONFIG_BINFMT_AOUT=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +CONFIG_SUNOS_EMUL=y + +# +# Parallel port support +# +# CONFIG_PARPORT is not set +# CONFIG_PRINTER is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y # # Console drivers @@ -42,17 +76,6 @@ CONFIG_FBCON_CFB8=y CONFIG_FBCON_FONTWIDTH8_ONLY=y CONFIG_FONT_SUN8x16=y # CONFIG_FBCON_FONTS is not set -CONFIG_SBUS=y -CONFIG_SBUSCHAR=y -CONFIG_BUSMOUSE=y -CONFIG_SUN_MOUSE=y -CONFIG_SERIAL=y -CONFIG_SUN_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SUN_KEYBOARD=y -CONFIG_SUN_CONSOLE=y -CONFIG_SUN_AUXIO=y -CONFIG_SUN_IO=y # # Misc Linux/SPARC drivers @@ -73,39 +96,14 @@ CONFIG_SUN_AURORA=m # CONFIG_SPARCAUDIO_CS4231 is not set # CONFIG_SPARCAUDIO_DBRI is not set # CONFIG_SPARCAUDIO_DUMMY is not set -CONFIG_SUN_OPENPROMFS=m -CONFIG_NET=y -CONFIG_SYSVIPC=y -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -CONFIG_BINFMT_AOUT=y -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -CONFIG_SUNOS_EMUL=y # -# Parallel port support -# -# CONFIG_PARPORT is not set -# CONFIG_PRINTER is not set - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Floppy, IDE, and other block devices +# Block devices # CONFIG_BLK_DEV_FD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=m CONFIG_MD_STRIPED=m -CONFIG_MD_MIRRORING=m -CONFIG_MD_RAID5=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_INITRD=y CONFIG_BLK_DEV_LOOP=m @@ -162,17 +160,15 @@ CONFIG_DECNET_SIOCGIFCONF=y # QoS and/or fair queueing # # CONFIG_NET_SCHED is not set +# CONFIG_IDE is not set +# CONFIG_BLK_DEV_IDE_MODES is not set +# CONFIG_BLK_DEV_HD is not set # # ISDN subsystem # # CONFIG_ISDN is not set -# -# ATA/IDE/MFM/RLL support -# -# CONFIG_IDE is not set - # # SCSI support # diff --git a/arch/sparc64/kernel/binfmt_aout32.c b/arch/sparc64/kernel/binfmt_aout32.c index 3e95ed9cfafe..3504533d167d 100644 --- a/arch/sparc64/kernel/binfmt_aout32.c +++ b/arch/sparc64/kernel/binfmt_aout32.c @@ -33,7 +33,7 @@ #include static int load_aout32_binary(struct linux_binprm *, struct pt_regs * regs); -static int load_aout32_library(int fd); +static int load_aout32_library(struct file*); static int aout32_core_dump(long signr, struct pt_regs * regs, struct file *file); extern void dump_thread(struct pt_regs *, struct user *); @@ -343,9 +343,8 @@ beyond_if: } /* N.B. Move to .h file and use code in fs/binfmt_aout.c? */ -static int load_aout32_library(int fd) +static int load_aout32_library(struct file *file) { - struct file * file; struct inode * inode; unsigned long bss, start_addr, len; unsigned long error; @@ -353,12 +352,6 @@ static int load_aout32_library(int fd) loff_t offset = 0; struct exec ex; - retval = -EACCES; - file = fget(fd); - if (!file) - goto out; - if (!file->f_op) - goto out_putf; inode = file->f_dentry->d_inode; retval = -ENOEXEC; @@ -367,23 +360,23 @@ static int load_aout32_library(int fd) error = file->f_op->read(file, (char *) &ex, sizeof(ex), &offset); set_fs(USER_DS); if (error != sizeof(ex)) - goto out_putf; + goto out; /* We come in here for the regular a.out style of shared libraries */ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) || inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { - goto out_putf; + goto out; } if (N_MAGIC(ex) == ZMAGIC && N_TXTOFF(ex) && (N_TXTOFF(ex) < inode->i_sb->s_blocksize)) { printk("N_TXTOFF < BLOCK_SIZE. Please convert library\n"); - goto out_putf; + goto out; } if (N_FLAGS(ex)) - goto out_putf; + goto out; /* For QMAGIC, the starting address is 0x20 into the page. We mask this off to get the starting address for the page */ @@ -391,13 +384,15 @@ static int load_aout32_library(int fd) start_addr = ex.a_entry & 0xfffff000; /* Now use mmap to map the library into memory. */ + down(¤t->mm->mmap_sem); error = do_mmap(file, start_addr, ex.a_text + ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, N_TXTOFF(ex)); + up(¤t->mm->mmap_sem); retval = error; if (error != start_addr) - goto out_putf; + goto out; len = PAGE_ALIGN(ex.a_text + ex.a_data); bss = ex.a_text + ex.a_data + ex.a_bss; @@ -405,12 +400,9 @@ static int load_aout32_library(int fd) error = do_brk(start_addr + len, bss - len); retval = error; if (error != start_addr + len) - goto out_putf; + goto out; } retval = 0; - -out_putf: - fput(file); out: return retval; } diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 4f1271ba9cd4..26e11085d94e 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -1,4 +1,4 @@ -/* $Id: sparc64_ksyms.c,v 1.78 2000/03/15 15:02:30 jj Exp $ +/* $Id: sparc64_ksyms.c,v 1.79 2000/03/17 14:41:18 davem Exp $ * arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support. * * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) @@ -142,6 +142,8 @@ EXPORT_SYMBOL(cpu_data); /* Misc SMP information */ EXPORT_SYMBOL(smp_num_cpus); +EXPORT_SYMBOL(__cpu_number_map); +EXPORT_SYMBOL(__cpu_logical_map); /* Spinlock debugging library, optional. */ #ifdef SPIN_LOCK_DEBUG diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c index 81b4c4de1166..82aedbb08102 100644 --- a/arch/sparc64/kernel/sys_sparc.c +++ b/arch/sparc64/kernel/sys_sparc.c @@ -1,4 +1,4 @@ -/* $Id: sys_sparc.c,v 1.36 2000/02/16 07:31:35 davem Exp $ +/* $Id: sys_sparc.c,v 1.37 2000/03/17 05:48:46 anton Exp $ * linux/arch/sparc64/kernel/sys_sparc.c * * This file contains various random system calls that @@ -348,9 +348,10 @@ asmlinkage int solaris_syscall(struct pt_regs *regs) lock_kernel(); regs->tpc = regs->tnpc; regs->tnpc += 4; - if(++count <= 20) + if(++count <= 5) { printk ("For Solaris binary emulation you need solaris module loaded\n"); - show_regs (regs); + show_regs (regs); + } send_sig(SIGSEGV, current, 1); unlock_kernel(); return -ENOSYS; diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index c74893d39089..763acdc0f1ab 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -1,4 +1,4 @@ -/* $Id: sys_sparc32.c,v 1.138 2000/03/15 06:01:23 davem Exp $ +/* $Id: sys_sparc32.c,v 1.139 2000/03/16 20:37:57 davem Exp $ * sys_sparc32.c: Conversion between 32bit and 64bit native syscalls. * * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) diff --git a/drivers/ide/Config.in b/drivers/ide/Config.in index bcd17994bdb6..2d4d2228cc76 100644 --- a/drivers/ide/Config.in +++ b/drivers/ide/Config.in @@ -32,7 +32,6 @@ if [ "$CONFIG_BLK_DEV_IDE" != "n" ]; then bool ' Boot off-board chipsets first support' CONFIG_BLK_DEV_OFFBOARD dep_bool ' Use PCI DMA by default when available' CONFIG_IDEDMA_PCI_AUTO $CONFIG_BLK_DEV_IDEDMA_PCI define_bool CONFIG_BLK_DEV_IDEDMA $CONFIG_BLK_DEV_IDEDMA_PCI - define_bool CONFIG_IDEDMA_AUTO $CONFIG_IDEDMA_PCI_AUTO define_bool CONFIG_IDEDMA_PCI_EXPERIMENTAL $CONFIG_EXPERIMENTAL dep_bool ' ATA Work(s) In Progress (EXPERIMENTAL)' CONFIG_IDEDMA_PCI_WIP $CONFIG_BLK_DEV_IDEDMA_PCI $CONFIG_EXPERIMENTAL dep_bool ' Good-Bad DMA Model-Firmware (WIP)' CONFIG_IDEDMA_NEW_DRIVE_LISTINGS $CONFIG_IDEDMA_PCI_WIP @@ -72,14 +71,12 @@ if [ "$CONFIG_BLK_DEV_IDE" != "n" ]; then dep_bool ' PowerMac IDE DMA support' CONFIG_BLK_DEV_IDEDMA_PMAC $CONFIG_BLK_DEV_IDE_PMAC dep_bool ' Use DMA by default' CONFIG_IDEDMA_PMAC_AUTO $CONFIG_BLK_DEV_IDEDMA_PMAC define_bool CONFIG_BLK_DEV_IDEDMA $CONFIG_BLK_DEV_IDEDMA_PMAC - define_bool CONFIG_IDEDMA_AUTO $CONFIG_IDEDMA_PMAC_AUTO fi if [ "$CONFIG_ARCH_ACORN" = "y" ]; then dep_bool ' ICS IDE interface support' CONFIG_BLK_DEV_IDE_ICSIDE $CONFIG_ARCH_ACORN dep_bool ' ICS DMA support' CONFIG_BLK_DEV_IDEDMA_ICS $CONFIG_BLK_DEV_IDE_ICSIDE dep_bool ' Use ICS DMA by default' CONFIG_IDEDMA_ICS_AUTO $CONFIG_BLK_DEV_IDEDMA_ICS define_bool CONFIG_BLK_DEV_IDEDMA $CONFIG_BLK_DEV_IDEDMA_ICS - define_bool CONFIG_IDEDMA_AUTO $CONFIG_IDEDMA_ICS_AUTO dep_bool ' RapIDE interface support' CONFIG_BLK_DEV_IDE_RAPIDE $CONFIG_ARCH_ACORN fi if [ "$CONFIG_AMIGA" = "y" ]; then @@ -115,19 +112,13 @@ else define_bool CONFIG_BLK_DEV_HD $CONFIG_BLK_DEV_HD_ONLY fi -# if [ "$CONFIG_BLK_DEV_IDEDMA_PCI" = "y" -o \ -# "$CONFIG_BLK_DEV_IDEDMA_PMAC" = "y" -o \ -# "$CONFIG_BLK_DEV_IDEDMA_ICS" = "y" ]; then -# define_bool CONFIG_BLK_DEV_IDEDMA y -# if [ "$CONFIG_IDEDMA_PCI_AUTO" = "y" -o \ -# "$CONFIG_IDEDMA_PMAC_AUTO" = "y" -o \ -# "$CONFIG_IDEDMA_ICS_AUTO" = "y" ]; then -# define_bool CONFIG_IDEDMA_AUTO y -# fi -# else -# define_bool CONFIG_BLK_DEV_IDEDMA n -# define_bool CONFIG_IDEDMA_AUTO n -# fi +if [ "$CONFIG_IDEDMA_PCI_AUTO" = "y" -o \ + "$CONFIG_IDEDMA_PMAC_AUTO" = "y" -o \ + "$CONFIG_IDEDMA_ICS_AUTO" = "y" ]; then + define_bool CONFIG_IDEDMA_AUTO y +else + define_bool CONFIG_IDEDMA_AUTO n +fi if [ "$CONFIG_IDE_CHIPSETS" = "y" -o \ "$CONFIG_BLK_DEV_AEC6210" = "y" -o \ diff --git a/drivers/net/rcpci45.c b/drivers/net/rcpci45.c index e19f1e0fda35..36d734a73787 100644 --- a/drivers/net/rcpci45.c +++ b/drivers/net/rcpci45.c @@ -1221,7 +1221,7 @@ static void __exit rcpci_cleanup_module (void) } module_init(rcpci_init_module); -module_exit(rcpci_clenaup_module); +module_exit(rcpci_cleanup_module); static int diff --git a/drivers/net/setup.c b/drivers/net/setup.c index f8f59b9fbce2..12e51e458051 100644 --- a/drivers/net/setup.c +++ b/drivers/net/setup.c @@ -9,7 +9,6 @@ #include extern int mkiss_init_ctrl_dev(void); -extern int ppp_init(void); extern int slip_init_ctrl_dev(void); extern int strip_init_ctrl_dev(void); extern int x25_asy_init_ctrl_dev(void); @@ -77,7 +76,7 @@ struct net_probe pci_probes[] __initdata = { {cpm_enet_init, 0}, #endif #if defined(CONFIG_COMX) - {comx_init(), 0}, + {comx_init, 0}, #endif /* * SLHC if present needs attaching so other people see it * even if not opened. @@ -167,9 +166,6 @@ static void __init network_ldisc_init(void) #if defined(CONFIG_STRIP) strip_init_ctrl_dev(); #endif -#if defined(CONFIG_PPP) - ppp_init(); -#endif } diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile index b2cd8aafb876..dbb12c2bfda4 100644 --- a/drivers/net/wan/Makefile +++ b/drivers/net/wan/Makefile @@ -49,6 +49,14 @@ else endif endif +ifeq ($(CONFIG_COMX),y) +LX_OBJS += comx.o +else + ifeq ($(CONFIG_COMX),m) + MX_OBJS += comx.o + endif +endif + ifeq ($(CONFIG_COMX_HW_COMX),y) L_OBJS += comx-hw-comx.o else diff --git a/drivers/net/wan/comx.c b/drivers/net/wan/comx.c index d3ca69e869d8..1c59075bedd9 100644 --- a/drivers/net/wan/comx.c +++ b/drivers/net/wan/comx.c @@ -432,7 +432,7 @@ static int comx_statistics(struct net_device *dev, char *page) ch->line_status & PROTO_UP ? "UP" : "DOWN"); len += sprintf(page + len, "Modem status changes: %lu, Transmitter status " "is %s, tbusy: %d\n", ch->current_stats->tx_carrier_errors, ch->HW_txe ? - ch->HW_txe(dev) ? "IDLE" : "BUSY" : "NOT READY", (int)dev->tbusy); + ch->HW_txe(dev) ? "IDLE" : "BUSY" : "NOT READY", netif_running(dev)); len += sprintf(page + len, "Interface load (input): %d / %d / %d bits/s (", LOADAVG(0,0), LOADAVG(1, 0), LOADAVG(2, 0)); tmpstr[0] = 0; @@ -860,7 +860,7 @@ static int comx_mkdir(struct inode *dir, struct dentry *dentry, int mode) return -EIO; } - new_dir->ops = &proc_dir_inode_operations; // ez egy normalis /proc konyvtar + new_dir->proc_iops = &proc_dir_inode_operations; // ez egy normalis /proc konyvtar new_dir->nlink = 2; new_dir->data = NULL; // ide jon majd a struct dev @@ -884,7 +884,7 @@ static int comx_mkdir(struct inode *dir, struct dentry *dentry, int mode) S_IFREG | 0644, new_dir)) == NULL) { return -ENOMEM; } - debug_file->ops = &comx_debug_inode_ops; + debug_file->proc_iops = &comx_debug_inode_ops; debug_file->data = (void *)debug_file; debug_file->read_proc = NULL; // see below debug_file->write_proc = &comx_write_proc; @@ -1027,7 +1027,7 @@ static struct proc_dir_entry *create_comx_proc_entry(char *name, int mode, struct proc_dir_entry *new_file; if ((new_file = create_proc_entry(name, S_IFREG | mode, dir)) != NULL) { - new_file->ops = &comx_normal_inode_ops; + new_file->proc_iops = &comx_normal_inode_ops; new_file->data = (void *)new_file; new_file->read_proc = &comx_read_proc; new_file->write_proc = &comx_write_proc; @@ -1129,7 +1129,7 @@ int comx_unregister_protocol(char *name) #define comx_init init_module #endif -__initfunc(int comx_init(void)) +int __init comx_init(void) { struct proc_dir_entry *new_file; @@ -1177,7 +1177,7 @@ __initfunc(int comx_init(void)) return -ENOMEM; } - new_file->ops = &comx_normal_inode_ops; + new_file->proc_iops = &comx_normal_inode_ops; new_file->data = new_file; new_file->read_proc = &comx_root_read_proc; new_file->write_proc = NULL; diff --git a/drivers/net/yellowfin.c b/drivers/net/yellowfin.c index f28feea438c0..a86c83b7d8d5 100644 --- a/drivers/net/yellowfin.c +++ b/drivers/net/yellowfin.c @@ -1411,7 +1411,7 @@ static void __exit yellowfin_cleanup (void) module_init(yellowfin_init); -module_exit(yellowfin_exit); +module_exit(yellowfin_cleanup); /* diff --git a/drivers/parport/ChangeLog b/drivers/parport/ChangeLog index 8c89ab7dc7a1..d2bb93c74ae7 100644 --- a/drivers/parport/ChangeLog +++ b/drivers/parport/ChangeLog @@ -1,3 +1,17 @@ +2000-03-16 Tim Waugh + + * parport_pc.c (parport_ECP_supported): This seems to trigger on + machines that don't have an IRQ conflict; toned down the warning + message accordingly. + +2000-03-16 Gunther Mayer + + * parport_pc.c (show_parconfig_smsc37c669): Fix typo. + (decode_winbond): More IDs. + (winbond_check): Protect against false positives. + (winbond_check2): Likewise. + (smsc_check): Likewise. + 2000-03-15 Tim Waugh * parport_pc.c (cleanup_module): Don't call pci_unregister_driver diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 81c464b8014c..dbbd12864e35 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -1060,7 +1060,7 @@ static void __devinit show_parconfig_smsc37c669(int io, int key) (cr23*4 >=0x100) ?"yes":"no", (cr1 & 4) ? "yes" : "no"); printk("SMSC LPT Config: Port mode=%s, EPP version =%s\n", (cr1 & 0x08 ) ? "Standard mode only (SPP)" : modes[cr4 & 0x03], - (cr4 & 40) ? "1.7" : "1.9"); + (cr4 & 0x40) ? "1.7" : "1.9"); /* Heuristics ! BIOS setup for this mainboard device limits the choices to standard settings, i.e. io-address and IRQ @@ -1172,12 +1172,15 @@ static void __devinit decode_winbond(int efer, int key, int devid, int devrev, i /* Values are from public data sheets pdf files, I can just confirm 83977TF is correct :-) */ - if (id == 0x9773) type="83977TF"; + if (id == 0x9771) type="83977F/AF"; + else if (id == 0x9773) type="83977TF / SMSC 97w33x/97w34x"; else if (id == 0x9774) type="83977ATF"; else if ((id & ~0x0f) == 0x5270) type="83977CTF / SMSC 97w36x"; - else if ((id & ~0x0f) == 0x52f0) type="83977EF / SMSC 97x35x"; + else if ((id & ~0x0f) == 0x52f0) type="83977EF / SMSC 97w35x"; else if ((id & ~0x0f) == 0x5210) type="83627"; else if ((id & ~0x0f) == 0x6010) type="83697HF"; + else if ((oldid &0x0f ) == 0x0a) { type="83877F"; progif=1;} + else if ((oldid &0x0f ) == 0x0b) { type="83877AF"; progif=1;} else if ((oldid &0x0f ) == 0x0c) { type="83877TF"; progif=1;} else if ((oldid &0x0f ) == 0x0d) { type="83877ATF"; progif=1;} else progif=0; @@ -1225,7 +1228,15 @@ static void __devinit decode_smsc(int efer, int key, int devid, int devrev) static void __devinit winbond_check(int io, int key) { - int devid,devrev,oldid; + int devid,devrev,oldid,x_devid,x_devrev,x_oldid; + + /* First probe without key */ + outb(0x20,io); + x_devid=inb(io+1); + outb(0x21,io); + x_devrev=inb(io+1); + outb(0x09,io); + x_oldid=inb(io+1); outb(key,io); outb(key,io); /* Write Magic Sequence to EFER, extended @@ -1238,12 +1249,23 @@ static void __devinit winbond_check(int io, int key) oldid=inb(io+1); outb(0xaa,io); /* Magic Seal */ + if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid)) + return; /* protection against false positives */ + decode_winbond(io,key,devid,devrev,oldid); } static void __devinit winbond_check2(int io,int key) { - int devid,devrev,oldid; + int devid,devrev,oldid,x_devid,x_devrev,x_oldid; + + /* First probe without the key */ + outb(0x20,io+2); + x_devid=inb(io+2); + outb(0x21,io+1); + x_devrev=inb(io+2); + outb(0x09,io+1); + x_oldid=inb(io+2); outb(key,io); /* Write Magic Byte to EFER, extended funtion enable register */ @@ -1255,23 +1277,44 @@ static void __devinit winbond_check2(int io,int key) oldid=inb(io+2); outb(0xaa,io); /* Magic Seal */ + if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid)) + return; /* protection against false positives */ + decode_winbond(io,key,devid,devrev,oldid); } static void __devinit smsc_check(int io, int key) { - int devid,devrev; + int id,rev,oldid,oldrev,x_id,x_rev,x_oldid,x_oldrev; + + /* First probe without the key */ + outb(0x0d,io); + x_oldid=inb(io+1); + outb(0x0e,io); + x_oldrev=inb(io+1); + outb(0x20,io); + x_id=inb(io+1); + outb(0x21,io); + x_rev=inb(io+1); outb(key,io); outb(key,io); /* Write Magic Sequence to EFER, extended funtion enable register */ outb(0x0d,io); /* Write EFIR, extended function index register */ - devid=inb(io+1); /* Read EFDR, extended function data register */ + oldid=inb(io+1); /* Read EFDR, extended function data register */ outb(0x0e,io); - devrev=inb(io+1); + oldrev=inb(io+1); + outb(0x20,io); + id=inb(io+1); + outb(0x21,io); + rev=inb(io+1); outb(0xaa,io); /* Magic Seal */ - decode_smsc(io,key,devid,devrev); + if ((x_id == id) && (x_oldrev == oldrev) && + (x_oldid == oldid) && (x_rev == rev)) + return; /* protection against false positives */ + + decode_smsc(io,key,oldid,oldrev); } @@ -1584,7 +1627,8 @@ static int __devinit parport_ECP_supported(struct parport *pb) configb = inb (CONFIGB (pb)); if (!(configb & 0x40)) { - printk (KERN_WARNING "0x%lx: IRQ conflict!\n", pb->base); + printk (KERN_WARNING "0x%lx: possible IRQ conflict!\n", + pb->base); pb->irq = PARPORT_IRQ_NONE; } printk (KERN_DEBUG "0x%lx: ECP port cfgA=0x%02x cfgB=0x%02x\n", diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c index 50f24988128e..02766fa0d800 100644 --- a/drivers/pnp/quirks.c +++ b/drivers/pnp/quirks.c @@ -73,7 +73,7 @@ static void __init quirk_sb16audio_resources(struct pci_dev *dev) int changed = 0; /* - * The default range on the mtu port for these devices is 0x388-0x388. + * The default range on the mpu port for these devices is 0x388-0x388. * Here we increase that range so that two such cards can be * auto-configured. */ diff --git a/drivers/scsi/53c7,8xx.c b/drivers/scsi/53c7,8xx.c index 7049fa349063..9c6754924303 100644 --- a/drivers/scsi/53c7,8xx.c +++ b/drivers/scsi/53c7,8xx.c @@ -1396,7 +1396,7 @@ ncr_pci_init (Scsi_Host_Template *tpnt, int board, int chip, int i, irq; struct pci_dev *pdev = pci_find_slot(bus, device_fn); - printk("scsi-ncr53c7,8xx : at PCI bus %d, device %d, function %d\n", + printk("scsi-ncr53c7,8xx : at PCI bus %d, device %d, function %d\n", bus, (int) (device_fn & 0xf8) >> 3, (int) device_fn & 7); @@ -1406,10 +1406,8 @@ ncr_pci_init (Scsi_Host_Template *tpnt, int board, int chip, return -1; } - if ((error = pcibios_read_config_word (bus, device_fn, PCI_COMMAND, - &command)) || - (error = pcibios_read_config_byte (bus, device_fn, PCI_CLASS_REVISION, - &revision))) { + if ((error = pci_read_config_word (pdev, PCI_COMMAND, &command)) || + (error = pci_read_config_byte (pdev, PCI_CLASS_REVISION, &revision))) { printk ("scsi-ncr53c7,8xx : error %d not initializing due to error reading configuration space\n" " perhaps you specified an incorrect PCI bus, device, or function.\n", error); return -1; @@ -1451,24 +1449,21 @@ ncr_pci_init (Scsi_Host_Template *tpnt, int board, int chip, */ if (command & PCI_COMMAND_IO) { - if ((io_port & 3) != 1) { - printk ("scsi-ncr53c7,8xx : disabling I/O mapping since base address 0 (0x%x)\n" - " bits 0..1 indicate a non-IO mapping\n", - (unsigned) io_port); + if (!(pdev->resource[0].flags & IORESOURCE_IO)) { + printk ("scsi-ncr53c7,8xx : disabling I/O mapping since base " + "address 0\n contains a non-IO mapping\n"); io_port = 0; - } else - io_port &= PCI_BASE_ADDRESS_IO_MASK; + } } else { io_port = 0; } if (command & PCI_COMMAND_MEMORY) { - if ((base & PCI_BASE_ADDRESS_SPACE) != PCI_BASE_ADDRESS_SPACE_MEMORY) { - printk("scsi-ncr53c7,8xx : disabling memory mapping since base address 1\n" - " contains a non-memory mapping\n"); + if (!(pdev->resource[1].flags & IORESOURCE_MEM)) { + printk("scsi-ncr53c7,8xx : disabling memory mapping since base " + "address 1\n contains a non-memory mapping\n"); base = 0; - } else - base &= PCI_BASE_ADDRESS_MEM_MASK; + } } else { base = 0; } diff --git a/drivers/scsi/imm.c b/drivers/scsi/imm.c index 7b2944b30ec4..ab12ef01d2d5 100644 --- a/drivers/scsi/imm.c +++ b/drivers/scsi/imm.c @@ -330,7 +330,7 @@ static unsigned char imm_wait(int host_no) static int imm_negotiate(imm_struct * tmp) { /* - * The following is supposedly the IEEE 1248-1994 negotiate + * The following is supposedly the IEEE 1284-1994 negotiate * sequence. I have yet to obtain a copy of the above standard * so this is a bit of a guess... * diff --git a/drivers/usb/Config.in b/drivers/usb/Config.in index 8df28dfdc087..012a27187edd 100644 --- a/drivers/usb/Config.in +++ b/drivers/usb/Config.in @@ -36,21 +36,26 @@ comment 'USB Devices' bool ' USB FTDI Single Port Serial Driver (EXPERIMENTAL)' CONFIG_USB_SERIAL_FTDI_SIO bool ' USB Keyspan PDA Single Port Serial Driver (EXPERIMENTAL)' CONFIG_USB_SERIAL_KEYSPAN_PDA fi + bool ' USB Serial Converter verbose debug' CONFIG_USB_SERIAL_DEBUG fi dep_tristate ' USB CPiA Camera support' CONFIG_USB_CPIA $CONFIG_USB dep_tristate ' USB IBM (Xirlink) C-it Camera support' CONFIG_USB_IBMCAM $CONFIG_USB dep_tristate ' USB OV511 Camera support' CONFIG_USB_OV511 $CONFIG_USB dep_tristate ' USB Kodak DC-2xx Camera support' CONFIG_USB_DC2XX $CONFIG_USB - dep_tristate ' USB Mass Storage support' CONFIG_USB_STORAGE $CONFIG_USB m - if [ "$CONFIG_USB_STORAGE" != "n" ]; then - bool ' USB Mass Storage verbose debug' CONFIG_USB_STORAGE_DEBUG + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + dep_tristate ' USB Mass Storage support (EXPERIMENTAL)' CONFIG_USB_STORAGE $CONFIG_USB m + if [ "$CONFIG_USB_STORAGE" != "n" ]; then + bool ' USB Mass Storage verbose debug' CONFIG_USB_STORAGE_DEBUG + fi fi dep_tristate ' USS720 parport driver' CONFIG_USB_USS720 $CONFIG_USB $CONFIG_PARPORT dep_tristate ' DABUSB driver' CONFIG_USB_DABUSB $CONFIG_USB - dep_tristate ' PLUSB Prolific USB-Network driver' CONFIG_USB_PLUSB $CONFIG_USB - dep_tristate ' USB ADMtek Pegasus-based device support' CONFIG_USB_PEGASUS $CONFIG_USB - dep_tristate ' USB Diamond Rio500 support' CONFIG_USB_RIO500 $CONFIG_USB - dep_tristate ' D-Link USB FM radio support' CONFIG_USB_DSBR $CONFIG_USB + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + dep_tristate ' PLUSB Prolific USB-Network driver (EXPERIMENTAL)' CONFIG_USB_PLUSB $CONFIG_USB + dep_tristate ' USB ADMtek Pegasus-based device support (EXPERIMENTAL)' CONFIG_USB_PEGASUS $CONFIG_USB + dep_tristate ' USB Diamond Rio500 support (EXPERIMENTAL)' CONFIG_USB_RIO500 $CONFIG_USB + dep_tristate ' D-Link USB FM radio support (EXPERIMENTAL)' CONFIG_USB_DSBR $CONFIG_USB + fi comment 'USB HID' dep_tristate ' USB Human Interface Device (HID) support' CONFIG_USB_HID $CONFIG_USB diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile index 35bef0e45561..6e9e99714bff 100644 --- a/drivers/usb/Makefile +++ b/drivers/usb/Makefile @@ -45,7 +45,7 @@ ifeq ($(CONFIG_USB_SERIAL),y) obj-y += serial/serial.o else ifeq ($(CONFIG_USB_SERIAL),m) - MOD_SUB_DIRS += serial + MOD_IN_SUB_DIRS += serial endif endif diff --git a/drivers/usb/hub.c b/drivers/usb/hub.c index 9138ee4da12b..8bd919d679ff 100644 --- a/drivers/usb/hub.c +++ b/drivers/usb/hub.c @@ -4,8 +4,6 @@ * (C) Copyright 1999 Linus Torvalds * (C) Copyright 1999 Johannes Erdfelt * (C) Copyright 1999 Gregory P. Smith - * - * $Id: hub.c,v 1.21 2000/01/16 21:19:44 acher Exp $ */ #include @@ -75,31 +73,29 @@ static int usb_get_port_status(struct usb_device *dev, int port, void *data) * the low-level driver that it wants to be re-activated, * or zero to say "I'm done". */ -static int hub_irq(int status, void *__buffer, int len, void *dev_id) +static void hub_irq(struct urb *urb) { - struct usb_hub *hub = dev_id; + struct usb_hub *hub = (struct usb_hub *)urb->context; unsigned long flags; - switch (status) { - case -ENODEV: - /* Just ignore it */ - break; - case 0: - /* Something happened, let khubd figure it out */ - if (waitqueue_active(&khubd_wait)) { - /* Add the hub to the event queue */ - spin_lock_irqsave(&hub_event_lock, flags); - if (hub->event_list.next == &hub->event_list) { - list_add(&hub->event_list, &hub_event_list); - /* Wake up khubd */ - wake_up(&khubd_wait); - } - spin_unlock_irqrestore(&hub_event_lock, flags); - } - break; + if (urb->status) { + if (urb->status != -ENOENT) + dbg("nonzero status in irq %d", urb->status); + + return; } - return 1; + /* Something happened, let khubd figure it out */ + if (waitqueue_active(&khubd_wait)) { + /* Add the hub to the event queue */ + spin_lock_irqsave(&hub_event_lock, flags); + if (hub->event_list.next == &hub->event_list) { + list_add(&hub->event_list, &hub_event_list); + /* Wake up khubd */ + wake_up(&khubd_wait); + } + spin_unlock_irqrestore(&hub_event_lock, flags); + } } static void usb_hub_power_on(struct usb_hub *hub) @@ -196,13 +192,14 @@ static int usb_hub_configure(struct usb_hub *hub) return 0; } -static void * hub_probe(struct usb_device *dev, unsigned int i) +static void *hub_probe(struct usb_device *dev, unsigned int i) { struct usb_interface_descriptor *interface; struct usb_endpoint_descriptor *endpoint; struct usb_hub *hub; unsigned long flags; - int ret; + unsigned int pipe; + int maxp, ret; interface = &dev->actconfig->interface[i].altsetting[0]; @@ -233,7 +230,8 @@ static void * hub_probe(struct usb_device *dev, unsigned int i) /* We found a hub */ info("USB hub found"); - if ((hub = kmalloc(sizeof(*hub), GFP_KERNEL)) == NULL) { + hub = kmalloc(sizeof(*hub), GFP_KERNEL); + if (!hub) { err("couldn't kmalloc hub struct"); return NULL; } @@ -250,26 +248,24 @@ static void * hub_probe(struct usb_device *dev, unsigned int i) spin_unlock_irqrestore(&hub_event_lock, flags); if (usb_hub_configure(hub) >= 0) { - hub->irqpipe = usb_rcvintpipe(dev, endpoint->bEndpointAddress); - ret = usb_request_irq(dev, hub->irqpipe, - hub_irq, endpoint->bInterval, - hub, &hub->irq_handle); - if (ret) { - err("usb_request_irq failed (%d)", ret); - /* free hub, but first clean up its list. */ - spin_lock_irqsave(&hub_event_lock, flags); + pipe = usb_rcvintpipe(dev, endpoint->bEndpointAddress); + maxp = usb_maxpacket(dev, pipe, usb_pipeout(pipe)); - /* Delete it and then reset it */ - list_del(&hub->event_list); - INIT_LIST_HEAD(&hub->event_list); - list_del(&hub->hub_list); - INIT_LIST_HEAD(&hub->hub_list); + if (maxp > sizeof(hub->buffer)) + maxp = sizeof(hub->buffer); - spin_unlock_irqrestore(&hub_event_lock, flags); - - kfree(hub); + hub->urb = usb_alloc_urb(0); + if (!hub->urb) { + err("couldn't allocate interrupt urb"); + goto fail; + } - return NULL; + FILL_INT_URB(hub->urb, dev, pipe, hub->buffer, maxp, hub_irq, + hub, endpoint->bInterval); + ret = usb_submit_urb(hub->urb); + if (ret) { + err("usb_submit_urb failed (%d)", ret); + goto fail; } /* Wake up khubd */ @@ -277,11 +273,27 @@ static void * hub_probe(struct usb_device *dev, unsigned int i) } return hub; + +fail: + /* free hub, but first clean up its list. */ + spin_lock_irqsave(&hub_event_lock, flags); + + /* Delete it and then reset it */ + list_del(&hub->event_list); + INIT_LIST_HEAD(&hub->event_list); + list_del(&hub->hub_list); + INIT_LIST_HEAD(&hub->hub_list); + + spin_unlock_irqrestore(&hub_event_lock, flags); + + kfree(hub); + + return NULL; } static void hub_disconnect(struct usb_device *dev, void *ptr) { - struct usb_hub *hub = ptr; + struct usb_hub *hub = (struct usb_hub *)ptr; unsigned long flags; spin_lock_irqsave(&hub_event_lock, flags); @@ -294,8 +306,10 @@ static void hub_disconnect(struct usb_device *dev, void *ptr) spin_unlock_irqrestore(&hub_event_lock, flags); - if (hub->irq_handle) { - usb_release_irq(hub->dev, hub->irq_handle, hub->irqpipe); + if (hub->urb) { + usb_unlink_urb(hub->urb); + usb_free_urb(hub->urb); + hub->urb = NULL; } /* Free the memory */ diff --git a/drivers/usb/hub.h b/drivers/usb/hub.h index 0da7eb87c653..913c44a2d7ad 100644 --- a/drivers/usb/hub.h +++ b/drivers/usb/hub.h @@ -78,16 +78,10 @@ struct usb_hub_descriptor { __u8 bDescriptorType; __u8 bNbrPorts; __u16 wHubCharacteristics; -#if 0 - __u8 wHubCharacteristics[2]; /* __u16 but not aligned! */ -#endif __u8 bPwrOn2PwrGood; __u8 bHubContrCurrent; /* DeviceRemovable and PortPwrCtrlMask want to be variable-length bitmaps that hold max 256 entries, but for now they're ignored */ -#if 0 - __u8 filler; -#endif } __attribute__ ((packed)); struct usb_device; @@ -112,9 +106,10 @@ struct usb_hub { /* Device structure */ struct usb_device *dev; - /* Reference to the hub's polling IRQ and its associated pipe */ - void *irq_handle; - unsigned int irqpipe; + /* Interrupt polling pipe */ + struct urb *urb; + + char buffer[USB_MAXCHILDREN / 8]; /* List of hubs */ struct list_head hub_list; diff --git a/drivers/usb/joydev.c b/drivers/usb/joydev.c index a5dcabdabfe9..9b54300e2f9c 100644 --- a/drivers/usb/joydev.c +++ b/drivers/usb/joydev.c @@ -224,8 +224,8 @@ static ssize_t joydev_read(struct file *file, char *buf, size_t count, loff_t *p struct JS_DATA_TYPE data; - data.buttons = (joydev->nkey > 0 && test_bit(joydev->keypam[0], input->key)) ? 1 : 0 | - (joydev->nkey > 1 && test_bit(joydev->keypam[1], input->key)) ? 2 : 0; + data.buttons = ((joydev->nkey > 0 && test_bit(joydev->keypam[0], input->key)) ? 1 : 0) | + ((joydev->nkey > 1 && test_bit(joydev->keypam[1], input->key)) ? 2 : 0); data.x = ((joydev_correct(input->abs[ABS_X], &joydev->corr[0]) / 256) + 128) >> joydev->glue.JS_CORR.x; data.y = ((joydev_correct(input->abs[ABS_Y], &joydev->corr[1]) / 256) + 128) >> joydev->glue.JS_CORR.y; diff --git a/drivers/usb/serial/Makefile b/drivers/usb/serial/Makefile index 20dc5dde5726..02bd7aad60f6 100644 --- a/drivers/usb/serial/Makefile +++ b/drivers/usb/serial/Makefile @@ -14,7 +14,7 @@ ALL_SUB_DIRS := $(SUB_DIRS) O_TARGET := serial.o M_OBJS := usb-serial.o O_OBJS := usb-serial.o -#MOD_LIST_NAME := USB_MODULES +MOD_LIST_NAME := USB_SERIAL_MODULES # Objects that export symbols. diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index ad86c6a803f7..6effcc048cc8 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -14,6 +14,10 @@ * * See Documentation/usb/usb-serial.txt for more information on using this driver * + * (03/17/2000) gkh + * Added config option for debugging messages. + * Added patch for keyspan pda from Brian Warner. + * * (03/06/2000) gkh * Added the keyspan pda code from Brian Warner * Moved a bunch of the port specific stuff into its own structure. This @@ -175,7 +179,12 @@ #include #include #include -#define DEBUG + +#ifdef CONFIG_USB_SERIAL_DEBUG + #define DEBUG +#else + #undef DEBUG +#endif #include #ifdef CONFIG_USB_SERIAL_WHITEHEAT @@ -1428,6 +1437,7 @@ static void keyspan_pda_rx_interrupt (struct urb *urb) case 2: /* tx unthrottle interrupt */ serial->tx_throttled = 0; wake_up(&serial->write_wait); /* wake up writer */ + wake_up(&tty->write_wait); /* them too */ break; default: break; @@ -1846,25 +1856,12 @@ static int keyspan_pda_write_room (struct tty_struct *tty) static int keyspan_pda_chars_in_buffer (struct tty_struct *tty) { struct usb_serial *serial = (struct usb_serial *)tty->driver_data; - unsigned char count; - int rc; - - /* used by tty stuff to wait for output to drain. Go ask the - device how much is still queued in the tx ring */ - rc = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), - 6, /* write_room */ - USB_TYPE_VENDOR | USB_RECIP_INTERFACE - | USB_DIR_IN, - 1, /* value: 1 means chars_in_buffer */ - 0, /* index */ - &count, - 1, - 2*HZ); - if (rc < 0) - return rc; /* failed */ - if (rc == 0) - return -EIO; /* device didn't return any data */ - return (count); + + /* when throttled, return at least WAKEUP_CHARS to tell select() (via + n_tty.c:normal_poll() ) that we're not writeable. */ + if (serial->tx_throttled) + return 256; + return 0; } diff --git a/drivers/usb/serial/usb-serial.h b/drivers/usb/serial/usb-serial.h index f02eeedf59f7..71fecb0ec27b 100644 --- a/drivers/usb/serial/usb-serial.h +++ b/drivers/usb/serial/usb-serial.h @@ -371,6 +371,7 @@ static struct usb_serial_device_type keyspan_pda_fake_device = { num_interrupt_in: NUM_DONT_CARE, num_bulk_in: NUM_DONT_CARE, num_bulk_out: NUM_DONT_CARE, + num_ports: 1, startup: keyspan_pda_fake_startup }; static struct usb_serial_device_type keyspan_pda_device = { diff --git a/drivers/usb/uhci.c b/drivers/usb/uhci.c index 480c6252df27..f5e67acec170 100644 --- a/drivers/usb/uhci.c +++ b/drivers/usb/uhci.c @@ -689,19 +689,18 @@ status_phase: return 0; td_error: - /* Some debugging code */ - if (debug) { + if (status & TD_CTRL_STALLED) + /* endpoint has stalled - mark it halted */ + usb_endpoint_halt(urb->dev, uhci_endpoint(td->info), + uhci_packetout(td->info)); + else if (debug) { + /* Some debugging code */ dbg("uhci_result_control() failed with status %x", status); /* Print the chain for debugging purposes */ uhci_show_queue(urbp->qh); } - if (status & TD_CTRL_STALLED) - /* endpoint has stalled - mark it halted */ - usb_endpoint_halt(urb->dev, uhci_endpoint(td->info), - uhci_packetout(td->info)); - return uhci_map_status(status, uhci_packetout(td->info)); } @@ -818,8 +817,12 @@ static int uhci_result_interrupt(urb_t *urb) return 0; td_error: - /* Some debugging code */ - if (debug) { + if (status & TD_CTRL_STALLED) + /* endpoint has stalled - mark it halted */ + usb_endpoint_halt(urb->dev, uhci_endpoint(td->info), + uhci_packetout(td->info)); + else if (debug) { + /* Some debugging code */ dbg("uhci_result_interrupt/bulk() failed with status %x", status); @@ -830,11 +833,6 @@ td_error: uhci_show_td(td); } - if (status & TD_CTRL_STALLED) - /* endpoint has stalled - mark it halted */ - usb_endpoint_halt(urb->dev, uhci_endpoint(td->info), - uhci_packetout(td->info)); - return uhci_map_status(status, uhci_packetout(td->info)); } @@ -1251,12 +1249,14 @@ static int uhci_unlink_urb(urb_t *urb) uhci_unlink_generic(urb); if (urb->transfer_flags & USB_ASYNC_UNLINK) { + urb->status = -ECONNABORTED; + spin_lock_irqsave(&uhci->urb_remove_lock, flags); list_add(&urb->urb_list, &uhci->urb_remove_list); spin_unlock_irqrestore(&uhci->urb_remove_lock, flags); - - urb->status = -ECONNABORTED; } else { + urb->status = -ENOENT; + if (in_interrupt()) { /* wait at least 1 frame */ static int errorcount = 10; @@ -1268,8 +1268,6 @@ static int uhci_unlink_urb(urb_t *urb) if (urb->complete) urb->complete(urb); - - urb->status = -ENOENT; } } diff --git a/drivers/usb/usb-ohci.c b/drivers/usb/usb-ohci.c index cf457762a46b..78ae0a0ea883 100644 --- a/drivers/usb/usb-ohci.c +++ b/drivers/usb/usb-ohci.c @@ -252,6 +252,9 @@ static int sohci_submit_urb (urb_t * urb) if (urb->hcpriv) return -EINVAL; /* urb already in use */ +// if(usb_endpoint_halted (urb->dev, usb_pipeendpoint (pipe), usb_pipeout (pipe))) +// return -EPIPE; + usb_inc_dev_use (urb->dev); ohci = (ohci_t *) urb->dev->bus->hcpriv; @@ -838,28 +841,36 @@ static void td_submit_urb (urb_t * urb) int data_len = urb->transfer_buffer_length; int cnt = 0; __u32 info = 0; - + unsigned int toggle = 0; + /* OHCI handles the DATA-toggles itself, we just use the USB-toggle bits for reseting */ + if(usb_gettoggle(urb->dev, usb_pipeendpoint(urb->pipe), usb_pipeout(urb->pipe))) { + toggle = TD_T_TOGGLE; + } else { + toggle = TD_T_DATA0; + usb_settoggle(urb->dev, usb_pipeendpoint(urb->pipe), usb_pipeout(urb->pipe), 1); + } + urb_priv->td_cnt = 0; switch (usb_pipetype (urb->pipe)) { case PIPE_BULK: info = usb_pipeout (urb->pipe)? - TD_CC | TD_DP_OUT | TD_T_TOGGLE: TD_CC | TD_DP_IN | TD_T_TOGGLE; + TD_CC | TD_DP_OUT : TD_CC | TD_DP_IN ; while(data_len > 4096) { - td_fill (info, data, 4096, urb, (cnt? 0: ST_ADDR) | ADD_LEN, cnt); + td_fill (info | (cnt? TD_T_TOGGLE:toggle), data, 4096, urb, (cnt? 0: ST_ADDR) | ADD_LEN, cnt); data += 4096; data_len -= 4096; cnt++; } info = usb_pipeout (urb->pipe)? - TD_CC | TD_DP_OUT | TD_T_TOGGLE: TD_CC | TD_R | TD_DP_IN | TD_T_TOGGLE; - td_fill (info, data, data_len, urb, (cnt? 0: ST_ADDR) | ADD_LEN, cnt); + TD_CC | TD_DP_OUT : TD_CC | TD_R | TD_DP_IN ; + td_fill (info | (cnt? TD_T_TOGGLE:toggle), data, data_len, urb, (cnt? 0: ST_ADDR) | ADD_LEN, cnt); cnt++; writel (OHCI_BLF, &ohci->regs->cmdstatus); /* start bulk list */ break; case PIPE_INTERRUPT: info = usb_pipeout (urb->pipe)? - TD_CC | TD_DP_OUT | TD_T_TOGGLE: TD_CC | TD_R | TD_DP_IN | TD_T_TOGGLE; + TD_CC | TD_DP_OUT | toggle: TD_CC | TD_R | TD_DP_IN | toggle; td_fill (info, data, data_len, urb, ST_ADDR | ADD_LEN, cnt++); break; @@ -1059,6 +1070,8 @@ static void dl_done_list (ohci_t * ohci, td_t * td_list) } /* error code of transfer */ cc = TD_CC_GET (tdINFO); + if( cc == TD_CC_STALL) usb_endpoint_halt(urb->dev, usb_pipeendpoint(urb->pipe), usb_pipeout(urb->pipe)); + if (!(urb->transfer_flags & USB_DISABLE_SPD) && (cc == TD_DATAUNDERRUN)) cc = TD_CC_NOERROR; if (++(urb_priv->td_cnt) == urb_priv->length) { diff --git a/drivers/video/aty128fb.c b/drivers/video/aty128fb.c index fcc0f8c5cc86..50c1f0bda4e4 100644 --- a/drivers/video/aty128fb.c +++ b/drivers/video/aty128fb.c @@ -18,7 +18,6 @@ * - determine MCLK from previous setting -done for x86 * - calculate XCLK, rather than probe BIOS * - hardware cursor support - * - acceleration (do not use with Rage128 Pro!) * - ioctl()'s */ @@ -109,13 +108,13 @@ struct aty128_chip_info { /* supported Rage128 chipsets */ static const struct aty128_chip_info aty128_pci_probe_list[] __initdata = { - {"Rage128 RE (PCI)", PCI_DEVICE_ID_ATI_RAGE128_RE}, - {"Rage128 RF (AGP)", PCI_DEVICE_ID_ATI_RAGE128_RF}, - {"Rage128 RK (PCI)", PCI_DEVICE_ID_ATI_RAGE128_RK}, - {"Rage128 RL (AGP)", PCI_DEVICE_ID_ATI_RAGE128_RL}, - {"Rage128 Pro PF (AGP)", PCI_DEVICE_ID_ATI_RAGE128_PF}, - {"Rage128 Pro PR (PCI)", PCI_DEVICE_ID_ATI_RAGE128_PR}, - {NULL, 0} + { "Rage128 RE (PCI)", PCI_DEVICE_ID_ATI_RAGE128_RE }, + { "Rage128 RF (AGP)", PCI_DEVICE_ID_ATI_RAGE128_RF }, + { "Rage128 RK (PCI)", PCI_DEVICE_ID_ATI_RAGE128_RK }, + { "Rage128 RL (AGP)", PCI_DEVICE_ID_ATI_RAGE128_RL }, + { "Rage128 Pro PF (AGP)", PCI_DEVICE_ID_ATI_RAGE128_PF }, + { "Rage128 Pro PR (PCI)", PCI_DEVICE_ID_ATI_RAGE128_PR }, + { NULL, 0 } }; /* packed BIOS settings */ @@ -162,20 +161,20 @@ struct aty128_meminfo { }; /* various memory configurations */ -const struct aty128_meminfo sdr_128 = +static const struct aty128_meminfo sdr_128 = { 4, 4, 3, 3, 1, 3, 1, 16, 30, 16, "128-bit SDR SGRAM (1:1)" }; -const struct aty128_meminfo sdr_64 = +static const struct aty128_meminfo sdr_64 = { 4, 8, 3, 3, 1, 3, 1, 17, 46, 17, "64-bit SDR SGRAM (1:1)" }; -const struct aty128_meminfo sdr_sgram = +static const struct aty128_meminfo sdr_sgram = { 4, 4, 1, 2, 1, 2, 1, 16, 24, 16, "64-bit SDR SGRAM (2:1)" }; -const struct aty128_meminfo ddr_sgram = +static const struct aty128_meminfo ddr_sgram = { 4, 4, 3, 3, 2, 3, 1, 16, 31, 16, "64-bit DDR SGRAM" }; static int currcon = 0; static char *aty128fb_name = "ATY Rage128"; static char fontname[40] __initdata = { 0 }; -static char noaccel __initdata = 1; +static char noaccel __initdata = 0; static unsigned int initdepth __initdata = 8; #ifndef MODULE @@ -273,6 +272,7 @@ struct fb_info_aty128 { #ifdef CONFIG_MTRR struct { int vram; int vram_valid; } mtrr; #endif + int fifo_slots; /* free slots in FIFO (64 max) */ }; static struct fb_info_aty128 *board_list = NULL; @@ -344,7 +344,8 @@ static void aty128_init_engine(const struct aty128fb_par *par, struct fb_info_aty128 *info); static void aty128_reset_engine(const struct fb_info_aty128 *info); static void aty128_flush_pixel_cache(const struct fb_info_aty128 *info); -static void wait_for_fifo(u16 entries, const struct fb_info_aty128 *info); +static void do_wait_for_fifo(u16 entries, struct fb_info_aty128 *info); +static void wait_for_fifo(u16 entries, struct fb_info_aty128 *info); static void wait_for_idle(struct fb_info_aty128 *info); static u32 bpp_to_depth(u32 bpp); @@ -483,7 +484,8 @@ _aty_st_pll(unsigned int pll_index, u32 val, aty_st_8(CLOCK_CNTL_INDEX, (pll_index & 0x1F) | PLL_WR_EN); aty_st_le32(CLOCK_CNTL_DATA, val); } - + + /* return true when the PLL has completed an atomic update */ static int aty_pll_readupdate(const struct fb_info_aty128 *info) @@ -547,52 +549,64 @@ register_test(const struct fb_info_aty128 *info) * Accelerator engine functions */ static void -wait_for_idle(struct fb_info_aty128 *info) +do_wait_for_fifo(u16 entries, struct fb_info_aty128 *info) { - unsigned long timeout = jiffies + HZ/20; - int reset = 1; - - wait_for_fifo(64, info); - - while (time_before(jiffies, timeout)) - if ((aty_ld_le32(GUI_STAT) & GUI_ACTIVE) != ENGINE_IDLE) { - reset = 0; - break; - } + int i; - if (reset) + for (;;) { + for (i = 0; i < 2000000; i++) { + info->fifo_slots = aty_ld_le32(GUI_STAT) & 0x0fff; + if (info->fifo_slots >= entries) + return; + } aty128_reset_engine(info); - - info->blitter_may_be_busy = 0; + } } static void -wait_for_fifo(u16 entries, const struct fb_info_aty128 *info) +wait_for_idle(struct fb_info_aty128 *info) { - unsigned long timeout = jiffies + HZ/20; - int reset = 1; + int i; - while (time_before(jiffies, timeout)) - if ((aty_ld_le32(GUI_STAT) & 0x00000FFF) < entries) { - reset = 0; - break; - } + do_wait_for_fifo(64, info); - if (reset) - aty128_reset_engine(info); + for (;;) { + for (i = 0; i < 2000000; i++) { + if (!(aty_ld_le32(GUI_STAT) & (1 << 31))) { + aty128_flush_pixel_cache(info); + info->blitter_may_be_busy = 0; + return; + } + } + aty128_reset_engine(info); + } +} + + +static void +wait_for_fifo(u16 entries, struct fb_info_aty128 *info) +{ + if (info->fifo_slots < entries) + do_wait_for_fifo(64, info); + info->fifo_slots -= entries; } static void aty128_flush_pixel_cache(const struct fb_info_aty128 *info) { - int i = 16384; + int i; + u32 tmp; - aty_st_le32(PC_NGUI_CTLSTAT, aty_ld_le32(PC_NGUI_CTLSTAT) | 0x000000ff); + tmp = aty_ld_le32(PC_NGUI_CTLSTAT); + tmp &= ~(0x00ff); + tmp |= 0x00ff; + aty_st_le32(PC_NGUI_CTLSTAT, tmp); - while (i && ((aty_ld_le32(PC_NGUI_CTLSTAT) & PC_BUSY) == PC_BUSY)) - i--; + for (i = 0; i < 2000000; i++) + if (!(aty_ld_le32(PC_NGUI_CTLSTAT) & PC_BUSY)) + break; } @@ -798,7 +812,7 @@ aty128_var_to_crtc(const struct fb_var_screeninfo *var, return -EINVAL; } - h_disp = (xres/8) - 1; + h_disp = (xres >> 3) - 1; h_total = (((xres + right + hslen + left) / 8) - 1) & 0xFFFFL; v_disp = yres - 1; @@ -1485,7 +1499,7 @@ aty128_encode_fix(struct fb_fix_screeninfo *fix, fix->type = FB_TYPE_PACKED_PIXELS; fix->type_aux = 0; - fix->line_length = par->crtc.vxres*par->crtc.bpp/8; + fix->line_length = par->crtc.vxres*par->crtc.bpp >> 3; fix->visual = par->crtc.bpp <= 8 ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_DIRECTCOLOR; fix->ywrapstep = 0; @@ -1662,7 +1676,7 @@ aty128fb_setup(char *options) else if(!strncmp(this_opt, "nomtrr", 6)) { mtrr = 0; } -#endif /* CONFIG_MTRR */ +#endif #ifdef CONFIG_PPC /* vmode and cmode depreciated */ else if (!strncmp(this_opt, "vmode:", 6)) { @@ -1791,7 +1805,7 @@ aty128_init(struct fb_info_aty128 *info, const char *name) dac = aty_ld_le32(DAC_CNTL); dac |= (DAC_8BIT_EN | DAC_RANGE_CNTL | DAC_BLANKING); - dac |= DAC_MASK; /* set DAC mask */ + dac |= DAC_MASK; aty_st_le32(DAC_CNTL, dac); /* turn off bus mastering, just in case */ @@ -2309,22 +2323,22 @@ aty128_rectcopy(int srcx, int srcy, int dstx, int dsty, wait_for_fifo(2, info); save_dp_datatype = aty_ld_le32(DP_DATATYPE); - save_dp_cntl = aty_ld_le32(DP_CNTL); + save_dp_cntl = aty_ld_le32(DP_CNTL); wait_for_fifo(6, info); - aty_st_le32(DP_DATATYPE, (0 | BRUSH_SOLIDCOLOR << 16) | SRC_DSTCOLOR); + aty_st_le32(DP_DATATYPE, (BRUSH_SOLIDCOLOR << 16) | SRC_DSTCOLOR); aty_st_le32(DP_MIX, ROP3_SRCCOPY | DP_SRC_RECT); aty_st_le32(DP_CNTL, DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM); aty_st_le32(SRC_Y_X, (srcy << 16) | srcx); aty_st_le32(DST_Y_X, (dsty << 16) | dstx); aty_st_le32(DST_HEIGHT_WIDTH, (height << 16) | width); + info->blitter_may_be_busy = 1; + wait_for_fifo(2, info); aty_st_le32(DP_DATATYPE, save_dp_datatype); aty_st_le32(DP_CNTL, save_dp_cntl); - info->blitter_may_be_busy = 1; - wait_for_idle(info); } @@ -2333,16 +2347,15 @@ aty128_rectcopy(int srcx, int srcy, int dstx, int dsty, * Text mode accelerated functions */ - static void fbcon_aty128_bmove(struct display *p, int sy, int sx, int dy, int dx, int height, int width) { - sx *= fontwidth(p); - sy *= fontheight(p); - dx *= fontwidth(p); - dy *= fontheight(p); - width *= fontwidth(p); + sx *= fontwidth(p); + sy *= fontheight(p); + dx *= fontwidth(p); + dy *= fontheight(p); + width *= fontwidth(p); height *= fontheight(p); aty128_rectcopy(sx, sy, dx, dy, width, height, diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 5aa2f1b351d8..9339775cee6a 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -30,7 +30,7 @@ #include static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); -static int load_aout_library(int fd); +static int load_aout_library(struct file*); static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file); extern void dump_thread(struct pt_regs *, struct user *); @@ -414,9 +414,8 @@ beyond_if: return 0; } -static int load_aout_library(int fd) +static int load_aout_library(struct file *file) { - struct file * file; struct inode * inode; unsigned long bss, start_addr, len; unsigned long error; @@ -424,12 +423,6 @@ static int load_aout_library(int fd) loff_t offset = 0; struct exec ex; - retval = -EACCES; - file = fget(fd); - if (!file) - goto out; - if (!file->f_op) - goto out_putf; inode = file->f_dentry->d_inode; retval = -ENOEXEC; @@ -438,17 +431,17 @@ static int load_aout_library(int fd) error = file->f_op->read(file, (char *) &ex, sizeof(ex), &offset); set_fs(USER_DS); if (error != sizeof(ex)) - goto out_putf; + goto out; /* We come in here for the regular a.out style of shared libraries */ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) || inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { - goto out_putf; + goto out; } if (N_FLAGS(ex)) - goto out_putf; + goto out; /* For QMAGIC, the starting address is 0x20 into the page. We mask this off to get the starting address for the page */ @@ -474,16 +467,18 @@ static int load_aout_library(int fd) (unsigned long) start_addr + ex.a_text + ex.a_data); retval = 0; - goto out_putf; + goto out; } /* Now use mmap to map the library into memory. */ + down(¤t->mm->mmap_sem); error = do_mmap(file, start_addr, ex.a_text + ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, N_TXTOFF(ex)); + up(¤t->mm->mmap_sem); retval = error; if (error != start_addr) - goto out_putf; + goto out; len = PAGE_ALIGN(ex.a_text + ex.a_data); bss = ex.a_text + ex.a_data + ex.a_bss; @@ -491,12 +486,9 @@ static int load_aout_library(int fd) error = do_brk(start_addr + len, bss - len); retval = error; if (error != start_addr + len) - goto out_putf; + goto out; } retval = 0; - -out_putf: - fput(file); out: return retval; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 20faef169538..fcf1d111fc2d 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -40,7 +40,7 @@ #include static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs); -static int load_elf_library(int fd); +static int load_elf_library(struct file*); extern int dump_fpu (struct pt_regs *, elf_fpregset_t *); extern void dump_thread(struct pt_regs *, struct user *); @@ -787,9 +787,8 @@ out_free_ph: /* This is really simpleminded and specialized - we are loading an a.out library that is given an ELF header. */ -static int load_elf_library(int fd) +static int load_elf_library(struct file *file) { - struct file * file; struct dentry * dentry; struct inode * inode; struct elf_phdr *elf_phdata; @@ -799,9 +798,6 @@ static int load_elf_library(int fd) loff_t offset = 0; error = -EACCES; - file = fget(fd); - if (!file || !file->f_op) - goto out; dentry = file->f_dentry; inode = dentry->d_inode; @@ -813,27 +809,27 @@ static int load_elf_library(int fd) retval = file->f_op->read(file, (char *) &elf_ex, sizeof(elf_ex), &offset); set_fs(USER_DS); if (retval != sizeof(elf_ex)) - goto out_putf; + goto out; if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0) - goto out_putf; + goto out; /* First of all, some simple consistency checks */ if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || !elf_check_arch(elf_ex.e_machine) || (!inode->i_fop || !inode->i_fop->mmap)) - goto out_putf; + goto out; /* Now read in all of the header information */ j = sizeof(struct elf_phdr) * elf_ex.e_phnum; if (j > ELF_EXEC_PAGESIZE) - goto out_putf; + goto out; error = -ENOMEM; elf_phdata = (struct elf_phdr *) kmalloc(j, GFP_KERNEL); if (!elf_phdata) - goto out_putf; + goto out; /* N.B. check for error return?? */ retval = read_exec(dentry, elf_ex.e_phoff, (char *) elf_phdata, @@ -848,6 +844,7 @@ static int load_elf_library(int fd) while (elf_phdata->p_type != PT_LOAD) elf_phdata++; /* Now use mmap to map the library into memory. */ + down(¤t->mm->mmap_sem); error = do_mmap(file, ELF_PAGESTART(elf_phdata->p_vaddr), (elf_phdata->p_filesz + @@ -856,6 +853,7 @@ static int load_elf_library(int fd) MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, (elf_phdata->p_offset - ELF_PAGEOFFSET(elf_phdata->p_vaddr))); + up(¤t->mm->mmap_sem); if (error != ELF_PAGESTART(elf_phdata->p_vaddr)) goto out_free_ph; @@ -873,8 +871,6 @@ static int load_elf_library(int fd) out_free_ph: kfree(elf_phdata); -out_putf: - fput(file); out: return error; } diff --git a/fs/exec.c b/fs/exec.c index 19a85f925c64..9c7a5174610e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -165,14 +165,12 @@ asmlinkage long sys_uselib(const char * library) if (file && file->f_dentry && file->f_op && file->f_op->read) { spin_lock(&binfmt_lock); for (fmt = formats ; fmt ; fmt = fmt->next) { - int (*fn)(int) = fmt->load_shlib; - if (!fn) + if (!fmt->load_shlib) continue; if (!try_inc_mod_count(fmt->module)) continue; spin_unlock(&binfmt_lock); - /* N.B. Should use file instead of fd */ - retval = fn(fd); + retval = fmt->load_shlib(file); spin_lock(&binfmt_lock); put_binfmt(fmt); if (retval != -ENOEXEC) diff --git a/fs/namei.c b/fs/namei.c index f57cb2a542d4..b60676b6a682 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -450,6 +450,76 @@ no_inode: return dentry; } +/* + * Restricted form of lookup. Doesn't follow links, single-component only, + * needs parent already locked. Doesn't follow mounts. + */ +struct dentry * lookup_one(const char * name, struct dentry * base) +{ + struct dentry * dentry; + struct inode *inode; + int err; + unsigned long hash; + struct qstr this; + unsigned int c; + + inode = base->d_inode; + err = permission(inode, MAY_EXEC); + dentry = ERR_PTR(err); + if (err) + goto out; + + this.name = name; + c = *(const unsigned char *)name; + if (!c) + goto access; + + hash = init_name_hash(); + do { + name++; + if (c == '/') + goto access; + hash = partial_name_hash(c, hash); + c = *(const unsigned char *)name; + } while (c); + this.len = name - (const char *) this.name; + this.hash = end_name_hash(hash); + + /* + * See if the low-level filesystem might want + * to use its own hash.. + */ + if (base->d_op && base->d_op->d_hash) { + err = base->d_op->d_hash(base, &this); + dentry = ERR_PTR(err); + if (err < 0) + goto out; + } + + dentry = cached_lookup(base, &this, 0); + if (!dentry) { + struct dentry *new = d_alloc(base, &this); + dentry = ERR_PTR(-ENOMEM); + if (!new) + goto out; + dentry = inode->i_op->lookup(inode, new); + if (!dentry) + dentry = new; + else { + dput(new); + if (IS_ERR(dentry)) + goto out; + } + } + +out: + dput(base); + return dentry; +access: + dentry = ERR_PTR(-EACCES); + goto out; +} + /* * namei() * diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 7dc5739eb64e..6e102db9cc30 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -727,25 +727,40 @@ encode_entry(struct readdir_cd *cd, const char *name, /* throw in readdirplus baggage */ if (plus) { struct svc_fh fh; + struct svc_export *exp; + struct dentry *dparent, *dchild; + + dparent = cd->dirfh->fh_dentry; + exp = cd->dirfh->fh_export; fh_init(&fh, NFS3_FHSIZE); - /* Disabled for now because of lock-up */ - if (0 && nfsd_lookup(cd->rqstp, cd->dirfh, name, namlen, &fh) == 0) { - p = encode_post_op_attr(cd->rqstp, p, fh.fh_dentry); - p = encode_fh(p, &fh); - fh_put(&fh); - } else { - /* Didn't find this entry... weird. - * Proceed without the attrs anf fh anyway. - */ - *p++ = 0; - *p++ = 0; - } + if (fh_verify(cd->rqstp, cd->dirfh, S_IFDIR, MAY_EXEC) != 0) + goto noexec; + if (isdotent(name, namlen)) { + dchild = dparent; + if (namlen == 2) + dchild = dchild->d_parent; + dchild = dget(dchild); + } else + dchild = lookup_one(name, dget(dparent)); + if (IS_ERR(dchild)) + goto noexec; + if (fh_compose(&fh, exp, dchild) != 0 || !dchild->d_inode) + goto noexec; + p = encode_post_op_attr(cd->rqstp, p, fh.fh_dentry); + p = encode_fh(p, &fh); + fh_put(&fh); } +out: cd->buflen = buflen; cd->buffer = p; return 0; + +noexec: + *p++ = 0; + *p++ = 0; + goto out; } int diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 7baab32d9532..a5fcdcf7d73e 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -147,7 +147,7 @@ static struct dentry *nfsd_iget(struct super_block *sb, unsigned long ino, __u32 generation); iput(inode); - return NULL; + return ERR_PTR(-ESTALE); } /* now to find a dentry. * If possible, get a well-connected one @@ -353,10 +353,6 @@ find_fh_dentry(struct super_block *sb, ino_t ino, int generation, ino_t dirino, if (IS_ERR(result)) goto err_out; err = -ESTALE; - if (!result) { - dprintk("find_fh_dentry: No inode found.\n"); - goto err_out; - } if (! (result->d_flags & DCACHE_NFSD_DISCONNECTED)) return result; diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 7d570299f2a4..6f69225cc97b 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -195,6 +195,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, svc_fh *newfhp = &resp->fh; struct iattr *attr = &argp->attrs; struct inode *inode; + struct dentry *dchild; int nfserr, type, mode, rdonly = 0; dev_t rdev = NODEV; @@ -214,14 +215,24 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, } else if (nfserr) goto done; - /* - * Do a lookup to verify the new file handle. - */ + nfserr = nfserr_acces; + if (!argp->len) + goto done; + nfserr = nfserr_exist; + if (isdotent(argp->name, argp->len)) + goto done; + fh_lock(dirfhp); + dchild = lookup_one(argp->name, dget(dirfhp->fh_dentry)); + nfserr = nfserrno(PTR_ERR(dchild)); + if (IS_ERR(dchild)) + goto out_unlock; fh_init(newfhp, NFS_FHSIZE); - nfserr = nfsd_lookup(rqstp, dirfhp, argp->name, argp->len, newfhp); + nfserr = fh_compose(newfhp, dirfhp->fh_export, dchild); + if (!nfserr && !dchild->d_inode) + nfserr = nfserr_noent; if (nfserr) { if (nfserr != nfserr_noent) - goto done; + goto out_unlock; /* * If the new file handle wasn't verified, we can't tell * whether the file exists or not. Time to bail ... @@ -230,22 +241,11 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, if (!newfhp->fh_dverified) { printk(KERN_WARNING "nfsd_proc_create: file handle not verified\n"); - goto done; + goto out_unlock; } } - /* - * Lock the parent directory and check for existence. - */ - nfserr = fh_lock_parent(dirfhp, newfhp->fh_dentry); - if (nfserr) - goto done; inode = newfhp->fh_dentry->d_inode; - if (inode && newfhp->fh_handle.fh_fileid_type == 0) - /* inode might have been instantiated while we slept */ - nfserr = fh_update(newfhp); - if (nfserr) - goto done; /* Unfudge the mode bits */ if (attr->ia_valid & ATTR_MODE) { diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 06a795841241..e3be271a299c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -52,9 +52,6 @@ */ #define IS_ISMNDLK(i) (S_ISREG((i)->i_mode) && MANDATORY_LOCK(i)) -/* Check for dir entries '.' and '..' */ -#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.')) - /* * This is a cache of readahead params that help us choose the proper * readahead strategy. Initially, we set all readahead parameters to 0 @@ -77,47 +74,6 @@ struct raparms { static struct raparms * raparml = NULL; static struct raparms * raparm_cache = NULL; - -/* - * We need to do a check-parent every time - * after we have locked the parent - to verify - * that the parent is still our parent and - * that we are still hashed onto it.. - * - * This is required in case two processes race - * on removing (or moving) the same entry: the - * parent lock will serialize them, but the - * other process will be too late.. - * - * Note that this nfsd_check_parent is identical - * the check_parent in linux/fs/namei.c. - */ -#define nfsd_check_parent(dir, dentry) \ - ((dir) == (dentry)->d_parent && !d_unhashed(dentry)) - -/* - * Lock a parent directory following the VFS locking protocol. - */ -int -fh_lock_parent(struct svc_fh *parent_fh, struct dentry *dchild) -{ - fh_lock(parent_fh); - /* - * Make sure the parent->child relationship still holds, - * and that the child is still hashed. - */ - if (nfsd_check_parent(parent_fh->fh_dentry, dchild)) - return 0; - - printk(KERN_WARNING - "fh_lock_parent: %s/%s parent changed or child unhashed\n", - dchild->d_parent->d_name.name, dchild->d_name.name); - - fh_unlock(parent_fh); - return nfserr_noent; -} - - /* * Look up one component of a pathname. * N.B. After this call _both_ fhp and resfh need an fh_put @@ -156,35 +112,57 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, err = nfserr_acces; /* Lookup the name, but don't follow links */ - if (strcmp(name,"..")==0 && dparent->d_covers != dparent) - dchild = dget(dparent); - else + if (strcmp(name, "..")==0) { + /* checking mountpoint crossing is very different when stepping up */ + if (dparent == exp->ex_dentry) { + if (!EX_CROSSMNT(exp)) + dchild = dget(dparent); /* .. == . just like at / */ + else + { + struct svc_export *exp2 = NULL; + struct dentry *dp; + dchild = dparent->d_covers->d_parent; + for (dp=dchild; + exp2 == NULL && dp->d_covers->d_parent != dp; + dp=dp->d_covers->d_parent) + exp2 = exp_get(exp->ex_client, dp->d_inode->i_dev, dp->d_inode->i_ino); + if (exp2==NULL || dchild->d_sb != exp2->ex_dentry->d_sb) { + dchild = dget(dparent); + } else { + dget(dchild); + exp = exp2; + } + } + } else + dchild = dget(dparent->d_parent); + } else { dchild = lookup_dentry(name, dget(dparent), 0); - if (IS_ERR(dchild)) - goto out_nfserr; - /* - * check if we have crossed a mount point ... - */ - if (dchild->d_sb != dparent->d_sb) { - struct svc_export *exp2 = NULL; - exp2 = exp_get(rqstp->rq_client, - dchild->d_inode->i_dev, - dchild->d_inode->i_ino); - if (exp2 && EX_CROSSMNT(exp2)) - /* successfully crossed mount point */ - exp = exp2; - else if (dchild->d_covers->d_sb == dparent->d_sb) { - /* stay in the original filesystem */ - struct dentry *tdentry = dget(dchild->d_covers); - dput(dchild); - dchild = tdentry; - } else { - /* This cannot possibly happen */ - printk("nfsd_lookup: %s/%s impossible mount point!\n", dparent->d_name.name, dchild->d_name.name); - dput(dchild); - err = nfserr_acces; - goto out; + if (IS_ERR(dchild)) + goto out_nfserr; + /* + * check if we have crossed a mount point ... + */ + if (dchild->d_sb != dparent->d_sb) { + struct svc_export *exp2 = NULL; + exp2 = exp_get(rqstp->rq_client, + dchild->d_inode->i_dev, + dchild->d_inode->i_ino); + if (exp2 && EX_CROSSMNT(exp2)) + /* successfully crossed mount point */ + exp = exp2; + else if (dchild->d_covers->d_sb == dparent->d_sb) { + /* stay in the original filesystem */ + struct dentry *tdentry = dget(dchild->d_covers); + dput(dchild); + dchild = tdentry; + } else { + /* This cannot possibly happen */ + printk("nfsd_lookup: %s/%s impossible mount point!\n", dparent->d_name.name, dchild->d_name.name); + dput(dchild); + err = nfserr_acces; + goto out; + } } } /* @@ -216,6 +194,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap) int imode; int err; kernel_cap_t saved_cap = 0; + int size_change = 0; if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) accmode |= MAY_WRITE; @@ -305,14 +284,31 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap) saved_cap = current->cap_effective; cap_clear(current->cap_effective); } +#ifdef CONFIG_QUOTA + /* DQUOT_TRANSFER needs both ia_uid and ia_gid defined */ + if (iap->ia_valid & (ATTR_UID|ATTR_GID)) { + if (! (iap->ia_valid & ATTR_UID)) + iap->ia_uid = inode->i_uid; + if (! (iap->ia_valid & ATTR_GID)) + iap->ia_gid = inode->i_gid; + iap->ia_valid |= ATTR_UID|ATTR_GID; + } +#endif /* CONFIG_QUOTA */ + if (iap->ia_valid & ATTR_SIZE) { fh_lock(fhp); + size_change = 1; + } +#ifdef CONFIG_QUOTA + if (iap->ia_valid & (ATTR_UID|ATTR_GID)) + err = DQUOT_TRANSFER(dentry, iap); + else +#endif err = notify_change(dentry, iap); + if (size_change) { fh_unlock(fhp); put_write_access(inode); } - else - err = notify_change(dentry, iap); if (current->fsuid != 0) current->cap_effective = saved_cap; if (err) @@ -647,11 +643,11 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, uid_t saved_euid; #endif - if (!cnt) - goto out; err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file); if (err) goto out; + if (!cnt) + goto out_close; err = nfserr_perm; if (!file.f_op->write) goto out_close; @@ -812,6 +808,9 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfserr_perm; if (!flen) goto out; + err = nfserr_exist; + if (isdotent(fname, flen)) + goto out; err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); if (err) @@ -829,14 +828,11 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ if (!resfhp->fh_dverified) { /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ - dchild = lookup_dentry(fname, dget(dentry), 0); + fh_lock(fhp); + dchild = lookup_one(fname, dget(dentry)); err = PTR_ERR(dchild); if (IS_ERR(dchild)) goto out_nfserr; - /* Lock the parent and check for errors ... */ - err = fh_lock_parent(fhp, dchild); - if (err) - goto out; err = fh_compose(resfhp, fhp->fh_export, dchild); if (err) goto out; @@ -934,6 +930,9 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfserr_perm; if (!flen) goto out; + err = nfserr_exist; + if (isdotent(fname, flen)) + goto out; if (!(iap->ia_valid & ATTR_MODE)) iap->ia_mode = 0; err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); @@ -948,21 +947,16 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfserr_notdir; if(!dirp->i_op || !dirp->i_op->lookup) goto out; + fh_lock(fhp); /* * Compose the response file handle. */ - dchild = lookup_dentry(fname, dget(dentry), 0); + dchild = lookup_one(fname, dget(dentry)); err = PTR_ERR(dchild); if(IS_ERR(dchild)) goto out_nfserr; - /* - * We must lock the directory before we check for the inode. - */ - err = fh_lock_parent(fhp, dchild); - if (err) - goto out; err = fh_compose(resfhp, fhp->fh_export, dchild); if (err) goto out; @@ -1096,24 +1090,20 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfserr_noent; if (!flen || !plen) goto out; + err = nfserr_exist; + if (isdotent(fname, flen)) + goto out; err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); if (err) goto out; + fh_lock(fhp); dentry = fhp->fh_dentry; - - dnew = lookup_dentry(fname, dget(dentry), 0); + dnew = lookup_one(fname, dget(dentry)); err = PTR_ERR(dnew); if (IS_ERR(dnew)) goto out_nfserr; - /* - * Lock the parent before checking for existence - */ - err = fh_lock_parent(fhp, dnew); - if (err) - goto out_compose; - err = vfs_symlink(dentry->d_inode, dnew, path); if (!err) { if (EX_ISSYNC(fhp->fh_export)) @@ -1134,7 +1124,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, fh_unlock(fhp); /* Compose the fh so the dentry will be freed ... */ -out_compose: cerr = fh_compose(resfhp, fhp->fh_export, dnew); if (err==0) err = cerr; out: @@ -1167,20 +1156,18 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, err = nfserr_perm; if (!len) goto out; + err = nfserr_exist; + if (isdotent(fname, len)) + goto out; + fh_lock(ffhp); ddir = ffhp->fh_dentry; dirp = ddir->d_inode; - dnew = lookup_dentry(fname, dget(ddir), 0); + dnew = lookup_one(fname, dget(ddir)); err = PTR_ERR(dnew); if (IS_ERR(dnew)) goto out_nfserr; - /* - * Lock the parent before checking for existence - */ - err = fh_lock_parent(ffhp, dnew); - if (err) - goto out_dput; dold = tfhp->fh_dentry; dest = dold->d_inode; @@ -1199,7 +1186,6 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, } fh_unlock(ffhp); -out_dput: dput(dnew); out: return err; @@ -1209,29 +1195,6 @@ out_nfserr: goto out; } -/* - * This follows the model of double_lock() in the VFS. - */ -static inline void nfsd_double_down(struct semaphore *s1, struct semaphore *s2) -{ - if (s1 != s2) { - if ((unsigned long) s1 < (unsigned long) s2) { - struct semaphore *tmp = s1; - s1 = s2; - s2 = tmp; - } - down(s1); - } - down(s2); -} - -static inline void nfsd_double_up(struct semaphore *s1, struct semaphore *s2) -{ - up(s1); - if (s1 != s2) - up(s2); -} - /* * Rename a file * N.B. After this call _both_ ffhp and tfhp need an fh_put @@ -1261,15 +1224,12 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (fdir->i_dev != tdir->i_dev) goto out; - /* N.B. We shouldn't need this ... dentry layer handles it */ err = nfserr_perm; - if (!flen || (fname[0] == '.' && - (flen == 1 || (flen == 2 && fname[1] == '.'))) || - !tlen || (tname[0] == '.' && - (tlen == 1 || (tlen == 2 && tname[1] == '.')))) + if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) goto out; - odentry = lookup_dentry(fname, dget(fdentry), 0); + double_down(&tdir->i_sem, &fdir->i_sem); + odentry = lookup_one(fname, dget(fdentry)); err = PTR_ERR(odentry); if (IS_ERR(odentry)) goto out_nfserr; @@ -1278,16 +1238,11 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (!odentry->d_inode) goto out_dput_old; - ndentry = lookup_dentry(tname, dget(tdentry), 0); + ndentry = lookup_one(tname, dget(tdentry)); err = PTR_ERR(ndentry); if (IS_ERR(ndentry)) goto out_dput_old; - /* - * Lock the parent directories. - */ - nfsd_double_down(&tdir->i_sem, &fdir->i_sem); - #ifdef CONFIG_NFSD_V3 /* Fill in the pre-op attr for the wcc data for both * tdir and fdir @@ -1296,19 +1251,11 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, fill_pre_wcc(tfhp); #endif /* CONFIG_NFSD_V3 */ - err = -ENOENT; - /* GAM3 check for parent changes after locking. */ - if (nfsd_check_parent(fdentry, odentry) && - nfsd_check_parent(tdentry, ndentry)) { - - err = vfs_rename(fdir, odentry, tdir, ndentry); - if (!err && EX_ISSYNC(tfhp->fh_export)) { - nfsd_sync_dir(tdentry); - nfsd_sync_dir(fdentry); - } - } else - dprintk("nfsd: Caught race in nfsd_rename"); - + err = vfs_rename(fdir, odentry, tdir, ndentry); + if (!err && EX_ISSYNC(tfhp->fh_export)) { + nfsd_sync_dir(tdentry); + nfsd_sync_dir(fdentry); + } #ifdef CONFIG_NFSD_V3 /* Fill in the post-op attr for the wcc data for both * tdir and fdir @@ -1316,7 +1263,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, fill_post_wcc(ffhp); fill_post_wcc(tfhp); #endif /* CONFIG_NFSD_V3 */ - nfsd_double_up(&tdir->i_sem, &fdir->i_sem); + double_up(&tdir->i_sem, &fdir->i_sem); dput(ndentry); out_dput_old: @@ -1343,7 +1290,6 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, struct inode *dirp; int err; - /* N.B. We shouldn't need this test ... handled by dentry layer */ err = nfserr_acces; if (!flen || isdotent(fname, flen)) goto out; @@ -1351,10 +1297,11 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (err) goto out; + fh_lock(fhp); dentry = fhp->fh_dentry; dirp = dentry->d_inode; - rdentry = lookup_dentry(fname, dget(dentry), 0); + rdentry = lookup_one(fname, dget(dentry)); err = PTR_ERR(rdentry); if (IS_ERR(rdentry)) goto out_nfserr; @@ -1365,12 +1312,6 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, goto out; } - err = fh_lock_parent(fhp, rdentry); - if (err) { - dput(rdentry); - goto out; - } - if (type != S_IFDIR) { /* It's UNLINK */ err = vfs_unlink(dirp, rdentry); } else { /* It's RMDIR */ @@ -1436,6 +1377,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, * may choose to do less. */ inode = file.f_dentry->d_inode; + down(&inode->i_sem); while (1) { oldlen = cd.buflen; @@ -1444,9 +1386,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, file.f_inode->i_dev, file.f_inode->i_ino, (int) file.f_pos, (int) oldlen, (int) cd.buflen); */ - down(&inode->i_sem); err = file.f_op->readdir(&file, &cd, (filldir_t) func); - up(&inode->i_sem); if (err < 0) goto out_nfserr; if (oldlen == cd.buflen) @@ -1454,6 +1394,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, if (cd.eob) break; } + up(&inode->i_sem); /* If we didn't fill the buffer completely, we're at EOF */ eof = !cd.eob; @@ -1482,6 +1423,7 @@ out: return err; out_nfserr: + up(&inode->i_sem); err = nfserrno(err); goto out_close; } diff --git a/fs/super.c b/fs/super.c index a0fe6be7b226..dd34ddc70103 100644 --- a/fs/super.c +++ b/fs/super.c @@ -581,7 +581,6 @@ out_fail: s->s_dev = 0; s->s_bdev = 0; s->s_type = NULL; - put_filesystem(type); unlock_super(s); return NULL; } @@ -1245,7 +1244,7 @@ void __init mount_root(void) * devfs crap and checking it right now. Later. */ if (!ROOT_DEV) - panic("I have no root and I want to sream"); + panic("I have no root and I want to scream"); bdev = bdget(kdev_t_to_nr(ROOT_DEV)); if (!bdev) diff --git a/include/asm-alpha/core_apecs.h b/include/asm-alpha/core_apecs.h index 927aa2ea7f34..fee9f3c13724 100644 --- a/include/asm-alpha/core_apecs.h +++ b/include/asm-alpha/core_apecs.h @@ -511,32 +511,27 @@ __EXTERN_INLINE int apecs_is_ioaddr(unsigned long addr) #ifdef __WANT_IO_DEF -#define __inb apecs_inb -#define __inw apecs_inw -#define __inl apecs_inl -#define __outb apecs_outb -#define __outw apecs_outw -#define __outl apecs_outl -#define __readb apecs_readb -#define __readw apecs_readw -#define __readl apecs_readl -#define __readq apecs_readq -#define __writeb apecs_writeb -#define __writew apecs_writew -#define __writel apecs_writel -#define __writeq apecs_writeq -#define __ioremap apecs_ioremap -#define __is_ioaddr apecs_is_ioaddr - -#define inb(port) \ - (__builtin_constant_p((port))?__inb(port):_inb(port)) -#define outb(x, port) \ - (__builtin_constant_p((port))?__outb((x),(port)):_outb((x),(port))) - -#define __raw_readl(a) __readl((unsigned long)(a)) -#define __raw_readq(a) __readq((unsigned long)(a)) -#define __raw_writel(v,a) __writel((v),(unsigned long)(a)) -#define __raw_writeq(v,a) __writeq((v),(unsigned long)(a)) +#define __inb(p) apecs_inb((unsigned long)(p)) +#define __inw(p) apecs_inw((unsigned long)(p)) +#define __inl(p) apecs_inl((unsigned long)(p)) +#define __outb(x,p) apecs_outb((x),(unsigned long)(p)) +#define __outw(x,p) apecs_outw((x),(unsigned long)(p)) +#define __outl(x,p) apecs_outl((x),(unsigned long)(p)) +#define __readb(a) apecs_readb((unsigned long)(a)) +#define __readw(a) apecs_readw((unsigned long)(a)) +#define __readl(a) apecs_readl((unsigned long)(a)) +#define __readq(a) apecs_readq((unsigned long)(a)) +#define __writeb(x,a) apecs_writeb((x),(unsigned long)(a)) +#define __writew(x,a) apecs_writew((x),(unsigned long)(a)) +#define __writel(x,a) apecs_writel((x),(unsigned long)(a)) +#define __writeq(x,a) apecs_writeq((x),(unsigned long)(a)) +#define __ioremap(a) apecs_ioremap((unsigned long)(a)) +#define __is_ioaddr(a) apecs_is_ioaddr((unsigned long)(a)) + +#define __raw_readl(a) __readl(a) +#define __raw_readq(a) __readq(a) +#define __raw_writel(v,a) __writel((v),(a)) +#define __raw_writeq(v,a) __writeq((v),(a)) #endif /* __WANT_IO_DEF */ diff --git a/include/asm-alpha/core_cia.h b/include/asm-alpha/core_cia.h index abd667ac7867..83648c8773a5 100644 --- a/include/asm-alpha/core_cia.h +++ b/include/asm-alpha/core_cia.h @@ -533,62 +533,58 @@ __EXTERN_INLINE int cia_is_ioaddr(unsigned long addr) #ifdef __WANT_IO_DEF #ifdef CONFIG_ALPHA_PYXIS -# define __inb cia_bwx_inb -# define __inw cia_bwx_inw -# define __inl cia_bwx_inl -# define __outb cia_bwx_outb -# define __outw cia_bwx_outw -# define __outl cia_bwx_outl -# define __readb cia_bwx_readb -# define __readw cia_bwx_readw -# define __writeb cia_bwx_writeb -# define __writew cia_bwx_writew -# define __readl cia_bwx_readl -# define __readq cia_bwx_readq -# define __writel cia_bwx_writel -# define __writeq cia_bwx_writeq -# define __ioremap cia_bwx_ioremap -# define inb(port) __inb((port)) -# define inw(port) __inw((port)) -# define inl(port) __inl((port)) -# define outb(x, port) __outb((x),(port)) -# define outw(x, port) __outw((x),(port)) -# define outl(x, port) __outl((x),(port)) -# define __raw_readb(addr) __readb((addr)) -# define __raw_readw(addr) __readw((addr)) -# define __raw_writeb(b, addr) __writeb((b),(addr)) -# define __raw_writew(b, addr) __writew((b),(addr)) -# define __raw_readl(a) __readl((unsigned long)(a)) -# define __raw_readq(a) __readq((unsigned long)(a)) -# define __raw_writel(v,a) __writel((v),(unsigned long)(a)) -# define __raw_writeq(v,a) __writeq((v),(unsigned long)(a)) +# define __inb(p) cia_bwx_inb((unsigned long)(p)) +# define __inw(p) cia_bwx_inw((unsigned long)(p)) +# define __inl(p) cia_bwx_inl((unsigned long)(p)) +# define __outb(x,p) cia_bwx_outb((x),(unsigned long)(p)) +# define __outw(x,p) cia_bwx_outw((x),(unsigned long)(p)) +# define __outl(x,p) cia_bwx_outl((x),(unsigned long)(p)) +# define __readb(a) cia_bwx_readb((unsigned long)(a)) +# define __readw(a) cia_bwx_readw((unsigned long)(a)) +# define __readl(a) cia_bwx_readl((unsigned long)(a)) +# define __readq(a) cia_bwx_readq((unsigned long)(a)) +# define __writeb(x,a) cia_bwx_writeb((x),(unsigned long)(a)) +# define __writew(x,a) cia_bwx_writew((x),(unsigned long)(a)) +# define __writel(x,a) cia_bwx_writel((x),(unsigned long)(a)) +# define __writeq(x,a) cia_bwx_writeq((x),(unsigned long)(a)) +# define __ioremap(a) cia_bwx_ioremap((unsigned long)(a)) +# define inb(p) __inb(p) +# define inw(p) __inw(p) +# define inl(p) __inl(p) +# define outb(x,p) __outb((x),(port)) +# define outw(x,p) __outw((x),(port)) +# define outl(x,p) __outl((x),(port)) +# define __raw_readb(a) __readb(a) +# define __raw_readw(a) __readw(a) +# define __raw_readl(a) __readl(a) +# define __raw_readq(a) __readq(a) +# define __raw_writeb(x,a) __writeb((x),(a)) +# define __raw_writew(x,a) __writew((x),(a)) +# define __raw_writel(x,a) __writel((x),(a)) +# define __raw_writeq(x,a) __writeq((x),(a)) #else -# define __inb cia_inb -# define __inw cia_inw -# define __inl cia_inl -# define __outb cia_outb -# define __outw cia_outw -# define __outl cia_outl -# define __readb cia_readb -# define __readw cia_readw -# define __writeb cia_writeb -# define __writew cia_writew -# define __readl cia_readl -# define __readq cia_readq -# define __writel cia_writel -# define __writeq cia_writeq -# define __ioremap cia_ioremap -# define inb(port) \ - (__builtin_constant_p((port))?__inb(port):_inb(port)) -# define outb(x, port) \ - (__builtin_constant_p((port))?__outb((x),(port)):_outb((x),(port))) -# define __raw_readl(a) __readl((unsigned long)(a)) -# define __raw_readq(a) __readq((unsigned long)(a)) -# define __raw_writel(v,a) __writel((v),(unsigned long)(a)) -# define __raw_writeq(v,a) __writeq((v),(unsigned long)(a)) +# define __inb(p) cia_inb((unsigned long)(p)) +# define __inw(p) cia_inw((unsigned long)(p)) +# define __inl(p) cia_inl((unsigned long)(p)) +# define __outb(x,p) cia_outb((x),(unsigned long)(p)) +# define __outw(x,p) cia_outw((x),(unsigned long)(p)) +# define __outl(x,p) cia_outl((x),(unsigned long)(p)) +# define __readb(a) cia_readb((unsigned long)(a)) +# define __readw(a) cia_readw((unsigned long)(a)) +# define __readl(a) cia_readl((unsigned long)(a)) +# define __readq(a) cia_readq((unsigned long)(a)) +# define __writeb(x,a) cia_writeb((x),(unsigned long)(a)) +# define __writew(x,a) cia_writew((x),(unsigned long)(a)) +# define __writel(x,a) cia_writel((x),(unsigned long)(a)) +# define __writeq(x,a) cia_writeq((x),(unsigned long)(a)) +# define __ioremap(a) cia_ioremap((unsigned long)(a)) +# define __raw_readl(a) __readl(a) +# define __raw_readq(a) __readq(a) +# define __raw_writel(v,a) __writel((v),(a)) +# define __raw_writeq(v,a) __writeq((v),(a)) #endif /* PYXIS */ -#define __is_ioaddr cia_is_ioaddr +#define __is_ioaddr(a) cia_is_ioaddr((unsigned long)(a)) #endif /* __WANT_IO_DEF */ diff --git a/include/asm-alpha/core_irongate.h b/include/asm-alpha/core_irongate.h index 5c30feb37569..c97283d89e49 100644 --- a/include/asm-alpha/core_irongate.h +++ b/include/asm-alpha/core_irongate.h @@ -493,38 +493,37 @@ __EXTERN_INLINE int irongate_is_ioaddr(unsigned long addr) #ifdef __WANT_IO_DEF -#define __inb irongate_inb -#define __inw irongate_inw -#define __inl irongate_inl -#define __outb irongate_outb -#define __outw irongate_outw -#define __outl irongate_outl -#define __readb irongate_readb -#define __readw irongate_readw -#define __writeb irongate_writeb -#define __writew irongate_writew -#define __readl irongate_readl -#define __readq irongate_readq -#define __writel irongate_writel -#define __writeq irongate_writeq -#define __ioremap irongate_ioremap -#define __is_ioaddr irongate_is_ioaddr - -#define inb(port) __inb((port)) -#define inw(port) __inw((port)) -#define inl(port) __inl((port)) -#define outb(v, port) __outb((v),(port)) -#define outw(v, port) __outw((v),(port)) -#define outl(v, port) __outl((v),(port)) - -#define __raw_readb(a) __readb((unsigned long)(a)) -#define __raw_readw(a) __readw((unsigned long)(a)) -#define __raw_readl(a) __readl((unsigned long)(a)) -#define __raw_readq(a) __readq((unsigned long)(a)) -#define __raw_writeb(v,a) __writeb((v),(unsigned long)(a)) -#define __raw_writew(v,a) __writew((v),(unsigned long)(a)) -#define __raw_writel(v,a) __writel((v),(unsigned long)(a)) -#define __raw_writeq(v,a) __writeq((v),(unsigned long)(a)) +#define __inb(p) irongate_inb((unsigned long)(p)) +#define __inw(p) irongate_inw((unsigned long)(p)) +#define __inl(p) irongate_inl((unsigned long)(p)) +#define __outb(x,p) irongate_outb((x),(unsigned long)(p)) +#define __outw(x,p) irongate_outw((x),(unsigned long)(p)) +#define __outl(x,p) irongate_outl((x),(unsigned long)(p)) +#define __readb(a) irongate_readb((unsigned long)(a)) +#define __readw(a) irongate_readw((unsigned long)(a)) +#define __readl(a) irongate_readl((unsigned long)(a)) +#define __readq(a) irongate_readq((unsigned long)(a)) +#define __writeb(x,a) irongate_writeb((x),(unsigned long)(a)) +#define __writew(x,a) irongate_writew((x),(unsigned long)(a)) +#define __writel(x,a) irongate_writel((x),(unsigned long)(a)) +#define __writeq(x,a) irongate_writeq((x),(unsigned long)(a)) +#define __ioremap(a) irongate_ioremap((unsigned long)(a)) +#define __is_ioaddr(a) irongate_is_ioaddr((unsigned long)(a)) + +#define inb(p) __inb(p) +#define inw(p) __inw(p) +#define inl(p) __inl(p) +#define outb(x,p) __outb((x),(p)) +#define outw(x,p) __outw((x),(p)) +#define outl(x,p) __outl((x),(p)) +#define __raw_readb(a) __readb(a) +#define __raw_readw(a) __readw(a) +#define __raw_readl(a) __readl(a) +#define __raw_readq(a) __readq(a) +#define __raw_writeb(v,a) __writeb((v),(a)) +#define __raw_writew(v,a) __writew((v),(a)) +#define __raw_writel(v,a) __writel((v),(a)) +#define __raw_writeq(v,a) __writeq((v),(a)) #endif /* __WANT_IO_DEF */ diff --git a/include/asm-alpha/core_lca.h b/include/asm-alpha/core_lca.h index 6ba2bbd83513..28b8993085a9 100644 --- a/include/asm-alpha/core_lca.h +++ b/include/asm-alpha/core_lca.h @@ -360,32 +360,27 @@ __EXTERN_INLINE int lca_is_ioaddr(unsigned long addr) #ifdef __WANT_IO_DEF -#define __inb lca_inb -#define __inw lca_inw -#define __inl lca_inl -#define __outb lca_outb -#define __outw lca_outw -#define __outl lca_outl -#define __readb lca_readb -#define __readw lca_readw -#define __writeb lca_writeb -#define __writew lca_writew -#define __readl lca_readl -#define __readq lca_readq -#define __writel lca_writel -#define __writeq lca_writeq -#define __ioremap lca_ioremap -#define __is_ioaddr lca_is_ioaddr - -#define inb(port) \ - (__builtin_constant_p((port))?__inb(port):_inb(port)) -#define outb(x, port) \ - (__builtin_constant_p((port))?__outb((x),(port)):_outb((x),(port))) - -#define __raw_readl(a) __readl((unsigned long)(a)) -#define __raw_readq(a) __readq((unsigned long)(a)) -#define __raw_writel(v,a) __writel((v),(unsigned long)(a)) -#define __raw_writeq(v,a) __writeq((v),(unsigned long)(a)) +#define __inb(p) lca_inb((unsigned long)(p)) +#define __inw(p) lca_inw((unsigned long)(p)) +#define __inl(p) lca_inl((unsigned long)(p)) +#define __outb(x,p) lca_outb((x),(unsigned long)(p)) +#define __outw(x,p) lca_outw((x),(unsigned long)(p)) +#define __outl(x,p) lca_outl((x),(unsigned long)(p)) +#define __readb(a) lca_readb((unsigned long)(a)) +#define __readw(a) lca_readw((unsigned long)(a)) +#define __readl(a) lca_readl((unsigned long)(a)) +#define __readq(a) lca_readq((unsigned long)(a)) +#define __writeb(x,a) lca_writeb((x),(unsigned long)(a)) +#define __writew(x,a) lca_writew((x),(unsigned long)(a)) +#define __writel(x,a) lca_writel((x),(unsigned long)(a)) +#define __writeq(x,a) lca_writeq((x),(unsigned long)(a)) +#define __ioremap(a) lca_ioremap((unsigned long)(a)) +#define __is_ioaddr(a) lca_is_ioaddr((unsigned long)(a)) + +#define __raw_readl(a) __readl(a) +#define __raw_readq(a) __readq(a) +#define __raw_writel(v,a) __writel((v),(a)) +#define __raw_writeq(v,a) __writeq((v),(a)) #endif /* __WANT_IO_DEF */ diff --git a/include/asm-alpha/core_mcpcia.h b/include/asm-alpha/core_mcpcia.h index bc3e44267914..d35ad9190db0 100644 --- a/include/asm-alpha/core_mcpcia.h +++ b/include/asm-alpha/core_mcpcia.h @@ -438,32 +438,27 @@ __EXTERN_INLINE void mcpcia_writeq(unsigned long b, unsigned long addr) #ifdef __WANT_IO_DEF -#define __inb mcpcia_inb -#define __inw mcpcia_inw -#define __inl mcpcia_inl -#define __outb mcpcia_outb -#define __outw mcpcia_outw -#define __outl mcpcia_outl -#define __readb mcpcia_readb -#define __readw mcpcia_readw -#define __writeb mcpcia_writeb -#define __writew mcpcia_writew -#define __readl mcpcia_readl -#define __readq mcpcia_readq -#define __writel mcpcia_writel -#define __writeq mcpcia_writeq -#define __ioremap mcpcia_ioremap -#define __is_ioaddr mcpcia_is_ioaddr - -# define inb(port) \ - (__builtin_constant_p((port))?__inb(port):_inb(port)) -# define outb(x, port) \ - (__builtin_constant_p((port))?__outb((x),(port)):_outb((x),(port))) - -#define __raw_readl(a) __readl((unsigned long)(a)) -#define __raw_readq(a) __readq((unsigned long)(a)) -#define __raw_writel(v,a) __writel((v),(unsigned long)(a)) -#define __raw_writeq(v,a) __writeq((v),(unsigned long)(a)) +#define __inb(p) mcpcia_inb((unsigned long)(p)) +#define __inw(p) mcpcia_inw((unsigned long)(p)) +#define __inl(p) mcpcia_inl((unsigned long)(p)) +#define __outb(x,p) mcpcia_outb((x),(unsigned long)(p)) +#define __outw(x,p) mcpcia_outw((x),(unsigned long)(p)) +#define __outl(x,p) mcpcia_outl((x),(unsigned long)(p)) +#define __readb(a) mcpcia_readb((unsigned long)(a)) +#define __readw(a) mcpcia_readw((unsigned long)(a)) +#define __readl(a) mcpcia_readl((unsigned long)(a)) +#define __readq(a) mcpcia_readq((unsigned long)(a)) +#define __writeb(x,a) mcpcia_writeb((x),(unsigned long)(a)) +#define __writew(x,a) mcpcia_writew((x),(unsigned long)(a)) +#define __writel(x,a) mcpcia_writel((x),(unsigned long)(a)) +#define __writeq(x,a) mcpcia_writeq((x),(unsigned long)(a)) +#define __ioremap(a) mcpcia_ioremap((unsigned long)(a)) +#define __is_ioaddr(a) mcpcia_is_ioaddr((unsigned long)(a)) + +#define __raw_readl(a) __readl(a) +#define __raw_readq(a) __readq(a) +#define __raw_writel(v,a) __writel((v),(a)) +#define __raw_writeq(v,a) __writeq((v),(a)) #endif /* __WANT_IO_DEF */ diff --git a/include/asm-alpha/core_polaris.h b/include/asm-alpha/core_polaris.h index 626b24a22631..88a0573d2517 100644 --- a/include/asm-alpha/core_polaris.h +++ b/include/asm-alpha/core_polaris.h @@ -170,38 +170,37 @@ __EXTERN_INLINE int polaris_is_ioaddr(unsigned long addr) #ifdef __WANT_IO_DEF -#define __inb polaris_inb -#define __inw polaris_inw -#define __inl polaris_inl -#define __outb polaris_outb -#define __outw polaris_outw -#define __outl polaris_outl -#define __readb polaris_readb -#define __readw polaris_readw -#define __writeb polaris_writeb -#define __writew polaris_writew -#define __readl polaris_readl -#define __readq polaris_readq -#define __writel polaris_writel -#define __writeq polaris_writeq -#define __ioremap polaris_ioremap -#define __is_ioaddr polaris_is_ioaddr - -#define inb(port) __inb((port)) -#define inw(port) __inw((port)) -#define inl(port) __inl((port)) -#define outb(v, port) __outb((v),(port)) -#define outw(v, port) __outw((v),(port)) -#define outl(v, port) __outl((v),(port)) - -#define __raw_readb(a) __readb((unsigned long)(a)) -#define __raw_readw(a) __readw((unsigned long)(a)) -#define __raw_readl(a) __readl((unsigned long)(a)) -#define __raw_readq(a) __readq((unsigned long)(a)) -#define __raw_writeb(v,a) __writeb((v),(unsigned long)(a)) -#define __raw_writew(v,a) __writew((v),(unsigned long)(a)) -#define __raw_writel(v,a) __writel((v),(unsigned long)(a)) -#define __raw_writeq(v,a) __writeq((v),(unsigned long)(a)) +#define __inb(p) polaris_inb((unsigned long)(p)) +#define __inw(p) polaris_inw((unsigned long)(p)) +#define __inl(p) polaris_inl((unsigned long)(p)) +#define __outb(x,p) polaris_outb((x),(unsigned long)(p)) +#define __outw(x,p) polaris_outw((x),(unsigned long)(p)) +#define __outl(x,p) polaris_outl((x),(unsigned long)(p)) +#define __readb(a) polaris_readb((unsigned long)(a)) +#define __readw(a) polaris_readw((unsigned long)(a)) +#define __readl(a) polaris_readl((unsigned long)(a)) +#define __readq(a) polaris_readq((unsigned long)(a)) +#define __writeb(x,a) polaris_writeb((x),(unsigned long)(a)) +#define __writew(x,a) polaris_writew((x),(unsigned long)(a)) +#define __writel(x,a) polaris_writel((x),(unsigned long)(a)) +#define __writeq(x,a) polaris_writeq((x),(unsigned long)(a)) +#define __ioremap(a) polaris_ioremap((unsigned long)(a)) +#define __is_ioaddr(a) polaris_is_ioaddr((unsigned long)(a)) + +#define inb(p) __inb(p) +#define inw(p) __inw(p) +#define inl(p) __inl(p) +#define outb(x,p) __outb((x),(p)) +#define outw(x,p) __outw((x),(p)) +#define outl(x,p) __outl((x),(p)) +#define __raw_readb(a) __readb(a) +#define __raw_readw(a) __readw(a) +#define __raw_readl(a) __readl(a) +#define __raw_readq(a) __readq(a) +#define __raw_writeb(v,a) __writeb((v),(a)) +#define __raw_writew(v,a) __writew((v),(a)) +#define __raw_writel(v,a) __writel((v),(a)) +#define __raw_writeq(v,a) __writeq((v),(a)) #endif /* __WANT_IO_DEF */ diff --git a/include/asm-alpha/core_t2.h b/include/asm-alpha/core_t2.h index d11506cd51d9..4eab7fa6404f 100644 --- a/include/asm-alpha/core_t2.h +++ b/include/asm-alpha/core_t2.h @@ -516,27 +516,22 @@ __EXTERN_INLINE int t2_is_ioaddr(unsigned long addr) #ifdef __WANT_IO_DEF -#define __inb t2_inb -#define __inw t2_inw -#define __inl t2_inl -#define __outb t2_outb -#define __outw t2_outw -#define __outl t2_outl -#define __readb t2_readb -#define __readw t2_readw -#define __readl t2_readl -#define __readq t2_readq -#define __writeb t2_writeb -#define __writew t2_writew -#define __writel t2_writel -#define __writeq t2_writeq -#define __ioremap t2_ioremap -#define __is_ioaddr t2_is_ioaddr - -#define inb(port) \ - (__builtin_constant_p((port))?__inb(port):_inb(port)) -#define outb(x, port) \ - (__builtin_constant_p((port))?__outb((x),(port)):_outb((x),(port))) +#define __inb(p) t2_inb((unsigned long)(p)) +#define __inw(p) t2_inw((unsigned long)(p)) +#define __inl(p) t2_inl((unsigned long)(p)) +#define __outb(x,p) t2_outb((x),(unsigned long)(p)) +#define __outw(x,p) t2_outw((x),(unsigned long)(p)) +#define __outl(x,p) t2_outl((x),(unsigned long)(p)) +#define __readb(a) t2_readb((unsigned long)(a)) +#define __readw(a) t2_readw((unsigned long)(a)) +#define __readl(a) t2_readl((unsigned long)(a)) +#define __readq(a) t2_readq((unsigned long)(a)) +#define __writeb(x,a) t2_writeb((x),(unsigned long)(a)) +#define __writew(x,a) t2_writew((x),(unsigned long)(a)) +#define __writel(x,a) t2_writel((x),(unsigned long)(a)) +#define __writeq(x,a) t2_writeq((x),(unsigned long)(a)) +#define __ioremap(a) t2_ioremap((unsigned long)(a)) +#define __is_ioaddr(a) t2_is_ioaddr((unsigned long)(a)) #endif /* __WANT_IO_DEF */ diff --git a/include/asm-alpha/core_tsunami.h b/include/asm-alpha/core_tsunami.h index ffbcc5c34882..5d0d7cf7a36c 100644 --- a/include/asm-alpha/core_tsunami.h +++ b/include/asm-alpha/core_tsunami.h @@ -405,38 +405,37 @@ __EXTERN_INLINE void tsunami_writeq(unsigned long b, unsigned long addr) #ifdef __WANT_IO_DEF -#define __inb tsunami_inb -#define __inw tsunami_inw -#define __inl tsunami_inl -#define __outb tsunami_outb -#define __outw tsunami_outw -#define __outl tsunami_outl -#define __readb tsunami_readb -#define __readw tsunami_readw -#define __writeb tsunami_writeb -#define __writew tsunami_writew -#define __readl tsunami_readl -#define __readq tsunami_readq -#define __writel tsunami_writel -#define __writeq tsunami_writeq -#define __ioremap tsunami_ioremap -#define __is_ioaddr tsunami_is_ioaddr - -#define inb(port) __inb((port)) -#define inw(port) __inw((port)) -#define inl(port) __inl((port)) -#define outb(v, port) __outb((v),(port)) -#define outw(v, port) __outw((v),(port)) -#define outl(v, port) __outl((v),(port)) - -#define __raw_readb(a) __readb((unsigned long)(a)) -#define __raw_readw(a) __readw((unsigned long)(a)) -#define __raw_readl(a) __readl((unsigned long)(a)) -#define __raw_readq(a) __readq((unsigned long)(a)) -#define __raw_writeb(v,a) __writeb((v),(unsigned long)(a)) -#define __raw_writew(v,a) __writew((v),(unsigned long)(a)) -#define __raw_writel(v,a) __writel((v),(unsigned long)(a)) -#define __raw_writeq(v,a) __writeq((v),(unsigned long)(a)) +#define __inb(p) tsunami_inb((unsigned long)(p)) +#define __inw(p) tsunami_inw((unsigned long)(p)) +#define __inl(p) tsunami_inl((unsigned long)(p)) +#define __outb(x,p) tsunami_outb((x),(unsigned long)(p)) +#define __outw(x,p) tsunami_outw((x),(unsigned long)(p)) +#define __outl(x,p) tsunami_outl((x),(unsigned long)(p)) +#define __readb(a) tsunami_readb((unsigned long)(a)) +#define __readw(a) tsunami_readw((unsigned long)(a)) +#define __readl(a) tsunami_readl((unsigned long)(a)) +#define __readq(a) tsunami_readq((unsigned long)(a)) +#define __writeb(x,a) tsunami_writeb((x),(unsigned long)(a)) +#define __writew(x,a) tsunami_writew((x),(unsigned long)(a)) +#define __writel(x,a) tsunami_writel((x),(unsigned long)(a)) +#define __writeq(x,a) tsunami_writeq((x),(unsigned long)(a)) +#define __ioremap(a) tsunami_ioremap((unsigned long)(a)) +#define __is_ioaddr(a) tsunami_is_ioaddr((unsigned long)(a)) + +#define inb(p) __inb(p) +#define inw(p) __inw(p) +#define inl(p) __inl(p) +#define outb(x,p) __outb((x),(p)) +#define outw(x,p) __outw((x),(p)) +#define outl(x,p) __outl((x),(p)) +#define __raw_readb(a) __readb(a) +#define __raw_readw(a) __readw(a) +#define __raw_readl(a) __readl(a) +#define __raw_readq(a) __readq(a) +#define __raw_writeb(v,a) __writeb((v),(a)) +#define __raw_writew(v,a) __writew((v),(a)) +#define __raw_writel(v,a) __writel((v),(a)) +#define __raw_writeq(v,a) __writeq((v),(a)) #endif /* __WANT_IO_DEF */ diff --git a/include/asm-alpha/delay.h b/include/asm-alpha/delay.h index b6789cb359a3..84b6d1eb5172 100644 --- a/include/asm-alpha/delay.h +++ b/include/asm-alpha/delay.h @@ -31,21 +31,16 @@ __delay(int loops) } extern __inline__ void -udelay(unsigned long usecs) +__udelay(unsigned long usecs, unsigned long lps) { - unsigned long lps; + usecs *= ((1UL << 32) / 1000000) * lps; + __delay((long)usecs >> 32); +} #ifdef __SMP__ - lps = cpu_data[smp_processor_id()].loops_per_sec; +#define udelay(u) __udelay((u), cpu_data[smp_processor_id()].loops_per_sec) #else - lps = loops_per_sec; +#define udelay(u) __udelay((u), loops_per_sec) #endif - /* Compute (usecs * 2**32 / 10**6) * loops_per_sec / 2**32. */ - - usecs *= 0x10c6; /* 2^32 / 10^6 */ - usecs *= lps; - __delay((long)usecs >> 32); -} - #endif /* defined(__ALPHA_DELAY_H) */ diff --git a/include/asm-alpha/io.h b/include/asm-alpha/io.h index 3d5a3451a8ed..0e532f6e5581 100644 --- a/include/asm-alpha/io.h +++ b/include/asm-alpha/io.h @@ -112,12 +112,12 @@ extern void _sethae (unsigned long addr); /* cached version */ /* In a generic kernel, we always go through the machine vector. */ -# define __inb alpha_mv.mv_inb -# define __inw alpha_mv.mv_inw -# define __inl alpha_mv.mv_inl -# define __outb alpha_mv.mv_outb -# define __outw alpha_mv.mv_outw -# define __outl alpha_mv.mv_outl +# define __inb(p) alpha_mv.mv_inb((unsigned long)(p)) +# define __inw(p) alpha_mv.mv_inw((unsigned long)(p)) +# define __inl(p) alpha_mv.mv_inl((unsigned long)(p)) +# define __outb(x,p) alpha_mv.mv_outb((x),(unsigned long)(p)) +# define __outw(x,p) alpha_mv.mv_outw((x),(unsigned long)(p)) +# define __outl(x,p) alpha_mv.mv_outl((x),(unsigned long)(p)) # define __readb(a) alpha_mv.mv_readb((unsigned long)(a)) # define __readw(a) alpha_mv.mv_readw((unsigned long)(a)) @@ -128,8 +128,8 @@ extern void _sethae (unsigned long addr); /* cached version */ # define __writel(v,a) alpha_mv.mv_writel((v),(unsigned long)(a)) # define __writeq(v,a) alpha_mv.mv_writeq((v),(unsigned long)(a)) -# define __ioremap(a) alpha_mv.mv_ioremap(a) -# define __is_ioaddr(a) alpha_mv.mv_is_ioaddr(a) +# define __ioremap(a) alpha_mv.mv_ioremap((unsigned long)(a)) +# define __is_ioaddr(a) alpha_mv.mv_is_ioaddr((unsigned long)(a)) # define inb __inb # define inw __inw @@ -210,13 +210,13 @@ extern void _writeq(unsigned long b, unsigned long addr); * redefined by userlevel programs. */ #ifndef inb -# define inb(p) _inb((p)) +# define inb(p) _inb(p) #endif #ifndef inw -# define inw(p) _inw((p)) +# define inw(p) _inw(p) #endif #ifndef inl -# define inl(p) _inl((p)) +# define inl(p) _inl(p) #endif #ifndef outb # define outb(b,p) _outb((b),(p)) @@ -254,12 +254,12 @@ extern void _writeq(unsigned long b, unsigned long addr); /* Userspace declarations. */ -extern unsigned int inb (unsigned long port); -extern unsigned int inw (unsigned long port); -extern unsigned int inl (unsigned long port); -extern void outb (unsigned char b,unsigned long port); -extern void outw (unsigned short w,unsigned long port); -extern void outl (unsigned int l,unsigned long port); +extern unsigned int inb(unsigned long port); +extern unsigned int inw(unsigned long port); +extern unsigned int inl(unsigned long port); +extern void outb(unsigned char b,unsigned long port); +extern void outw(unsigned short w,unsigned long port); +extern void outl(unsigned int l,unsigned long port); extern unsigned long readb(unsigned long addr); extern unsigned long readw(unsigned long addr); extern unsigned long readl(unsigned long addr); diff --git a/include/asm-alpha/mmu_context.h b/include/asm-alpha/mmu_context.h index 6b268f4e6df7..c42c1ae54dc4 100644 --- a/include/asm-alpha/mmu_context.h +++ b/include/asm-alpha/mmu_context.h @@ -22,11 +22,6 @@ #include #endif -static inline void -enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) -{ -} - extern inline unsigned long __reload_thread(struct thread_struct *pcb) { @@ -223,6 +218,12 @@ destroy_context(struct mm_struct *mm) /* Nothing to do. */ } +static inline void +enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +{ + tsk->thread.ptbr = ((unsigned long)mm->pgd - IDENT_ADDR) >> PAGE_SHIFT; +} + #ifdef __MMU_EXTERN_INLINE #undef __EXTERN_INLINE #undef __MMU_EXTERN_INLINE diff --git a/include/asm-alpha/smp.h b/include/asm-alpha/smp.h index 12ebc5087b36..b05076263482 100644 --- a/include/asm-alpha/smp.h +++ b/include/asm-alpha/smp.h @@ -30,7 +30,6 @@ struct cpuinfo_alpha { unsigned long pgtable_cache_sz; unsigned long ipi_count; unsigned long irq_attempt[NR_IRQS]; - unsigned long smp_local_irq_count; unsigned long prof_multiplier; unsigned long prof_counter; int irq_count, bh_count; diff --git a/include/asm-alpha/vga.h b/include/asm-alpha/vga.h index 44b6abcc0df2..4fc557ca1c41 100644 --- a/include/asm-alpha/vga.h +++ b/include/asm-alpha/vga.h @@ -14,7 +14,7 @@ #define VT_BUF_HAVE_MEMCPYW #define VT_BUF_HAVE_MEMCPYF -extern inline void scr_writew(u16 val, u16 *addr) +extern inline void scr_writew(u16 val, volatile u16 *addr) { if (__is_ioaddr((unsigned long) addr)) __raw_writew(val, (unsigned long) addr); @@ -22,7 +22,7 @@ extern inline void scr_writew(u16 val, u16 *addr) *addr = val; } -extern inline u16 scr_readw(const u16 *addr) +extern inline u16 scr_readw(volatile const u16 *addr) { if (__is_ioaddr((unsigned long) addr)) return __raw_readw((unsigned long) addr); diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 1a4c151cdcdd..9f594ab5d826 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -116,6 +116,10 @@ extern struct cpuinfo_x86 cpu_data[]; (boot_cpu_data.x86_capability & X86_FEATURE_PAE) #define cpu_has_tsc \ (boot_cpu_data.x86_capability & X86_FEATURE_TSC) +#define cpu_has_de \ + (boot_cpu_data.x86_capability & X86_FEATURE_DE) +#define cpu_has_vme \ + (boot_cpu_data.x86_capability & X86_FEATURE_VME) extern char ignore_irq13; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 31721c101d85..14e163b0ebb3 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -37,7 +37,7 @@ struct linux_binfmt { struct linux_binfmt * next; struct module *module; int (*load_binary)(struct linux_binprm *, struct pt_regs * regs); - int (*load_shlib)(int fd); + int (*load_shlib)(struct file *); int (*core_dump)(long signr, struct pt_regs * regs, struct file * file); unsigned long min_coredump; /* minimal dump size */ }; diff --git a/include/linux/fs.h b/include/linux/fs.h index cebc71f32a77..893cc57c8f24 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -995,6 +995,7 @@ typedef int (*read_actor_t)(read_descriptor_t *, struct page *, unsigned long, u extern struct dentry * lookup_dentry(const char *, struct dentry *, unsigned int); +extern struct dentry * lookup_one(const char *, struct dentry *); extern struct dentry * __namei(const char *, unsigned int); #define namei(pathname) __namei(pathname, 1) diff --git a/include/linux/icmp.h b/include/linux/icmp.h index fb4ed8b9a3ba..292888923b52 100644 --- a/include/linux/icmp.h +++ b/include/linux/icmp.h @@ -82,10 +82,31 @@ struct icmphdr { #ifdef __KERNEL__ +#include + struct icmp_err { int errno; unsigned fatal:1; }; + +/* + * Build xmit assembly blocks + */ + +struct icmp_bxm +{ + void *data_ptr; + int data_len; + struct icmphdr icmph; + unsigned long csum; + struct ip_options replyopts; + unsigned char optbuf[40]; +}; + +struct sk_buff; + +extern void icmp_reply(struct icmp_bxm *, struct sk_buff *); + #endif /* diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 9b72c14de63e..c18810e526c4 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -16,7 +16,8 @@ #define NF_ACCEPT 1 #define NF_STOLEN 2 #define NF_QUEUE 3 -#define NF_MAX_VERDICT NF_QUEUE +#define NF_REPEAT 4 +#define NF_MAX_VERDICT NF_REPEAT /* Generic cache responses from hook functions. */ #define NFC_ALTERED 0x8000 @@ -66,6 +67,10 @@ struct nf_sockopt_ops int get_optmin; int get_optmax; int (*get)(struct sock *sk, int optval, void *user, int *len); + + /* Number of users inside set() or get(). */ + unsigned int use; + struct task_struct *cleanup_task; }; /* Each queued (to userspace) skbuff has one of these. */ @@ -173,12 +178,4 @@ extern void nf_invalidate_cache(int pf); #define SUMIN(a,b) ((size_t)(a)<(size_t)(b) ? (ssize_t)(a) : (ssize_t)(b)) #endif /*__KERNEL__*/ -enum nf_reason { - /* Do not, NOT, reorder these. Add at end. */ - NF_REASON_NONE, - NF_REASON_SET_BY_IPCHAINS, - NF_REASON_FOR_ROUTING, - NF_REASON_FOR_CLS_FW, -}; - #endif /*__LINUX_NETFILTER_H*/ diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 6111c47d80d6..3a41356db539 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -49,6 +49,16 @@ #define NF_IP_POST_ROUTING 4 #define NF_IP_NUMHOOKS 5 +enum nf_ip_hook_priorities { + NF_IP_PRI_FIRST = INT_MIN, + NF_IP_PRI_CONNTRACK = -200, + NF_IP_PRI_MANGLE = -150, + NF_IP_PRI_NAT_DST = -100, + NF_IP_PRI_FILTER = 0, + NF_IP_PRI_NAT_SRC = 100, + NF_IP_PRI_LAST = INT_MAX, +}; + #ifdef CONFIG_NETFILTER_DEBUG #ifdef __KERNEL__ void nf_debug_ip_local_deliver(struct sk_buff *skb); diff --git a/include/linux/netfilter_ipv4/compat_firewall.h b/include/linux/netfilter_ipv4/compat_firewall.h new file mode 100644 index 000000000000..b8c419c51d03 --- /dev/null +++ b/include/linux/netfilter_ipv4/compat_firewall.h @@ -0,0 +1,47 @@ +/* Minor modifications to fit on compatibility framework: + Rusty.Russell@rustcorp.com.au +*/ + +#ifndef __LINUX_FIREWALL_H +#define __LINUX_FIREWALL_H + +#include + +/* + * Definitions for loadable firewall modules + */ + +#define FW_QUEUE 0 +#define FW_BLOCK 1 +#define FW_ACCEPT 2 +#define FW_REJECT (-1) +#define FW_REDIRECT 3 +#define FW_MASQUERADE 4 +#define FW_SKIP 5 + +struct firewall_ops +{ + struct firewall_ops *next; + int (*fw_forward)(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb); + int (*fw_input)(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb); + int (*fw_output)(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb); + /* These may be NULL. */ + int (*fw_acct_in)(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb); + int (*fw_acct_out)(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb); +}; + +extern int register_firewall(int pf, struct firewall_ops *fw); +extern int unregister_firewall(int pf, struct firewall_ops *fw); + +extern int ip_fw_masq_timeouts(void *user, int len); +#endif /* __LINUX_FIREWALL_H */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h new file mode 100644 index 000000000000..ff3bd30d4da3 --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -0,0 +1,176 @@ +#ifndef _IP_CONNTRACK_H +#define _IP_CONNTRACK_H +/* Connection state tracking for netfilter. This is separated from, + but required by, the NAT layer; it can also be used by an iptables + extension. */ + +#include + +enum ip_conntrack_info +{ + /* Part of an established connection (either direction). */ + IP_CT_ESTABLISHED, + + /* Like NEW, but related to an existing connection, or ICMP error + (in either direction). */ + IP_CT_RELATED, + + /* Started a new connection to track (only + IP_CT_DIR_ORIGINAL); may be a retransmission. */ + IP_CT_NEW, + + /* >= this indicates reply direction */ + IP_CT_IS_REPLY, + + /* Number of distinct IP_CT types (no NEW in reply dirn). */ + IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1 +}; + +#ifdef __KERNEL__ + +#include +#include + +#ifdef CONFIG_NF_DEBUG +#define IP_NF_ASSERT(x) \ +do { \ + if (!(x)) \ + /* Wooah! I'm tripping my conntrack in a frenzy of \ + netplay... */ \ + printk("NF_IP_ASSERT: %s:%i(%s)\n", \ + __FILE__, __LINE__, __FUNCTION__); \ +} while(0) +#else +#define IP_NF_ASSERT(x) +#endif + +/* Bitset representing status of connection. */ +enum ip_conntrack_status { + /* It's an expected connection: bit 0 set. This bit never changed */ + IPS_EXPECTED = 0x01, + + /* We've seen packets both ways: bit 1 set. Can be set, not unset. */ + IPS_SEEN_REPLY = 0x02 +}; + +struct ip_conntrack_expect +{ + /* Internal linked list */ + struct list_head list; + + /* We expect this tuple, but DON'T CARE ABOUT THE SOURCE + per-protocol part. */ + struct ip_conntrack_tuple tuple; + + /* The conntrack we are part of (set iff we're live) */ + struct ip_conntrack *expectant; +}; + +#if defined(CONFIG_IP_NF_NAT) || defined(CONFIG_IP_NF_NAT_MODULE) +#include +#endif + +#if defined(CONFIG_IP_NF_FTP) || defined(CONFIG_IP_NF_FTP_MODULE) +#include +#if defined(CONFIG_IP_NF_NAT) || defined(CONFIG_IP_NF_NAT_MODULE) +#include +#endif +#endif + +struct ip_conntrack +{ + /* Usage count in here is 1 for destruct timer, 1 per skb, + plus 1 for any connection(s) we are `master' for */ + struct nf_conntrack ct_general; + + /* These are my tuples; original and reply */ + struct ip_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; + + /* Have we seen traffic both ways yet? (bitset) */ + unsigned int status; + + /* Timer function; drops refcnt when it goes off. */ + struct timer_list timeout; + + /* If we're expecting another related connection, this will be + in expected linked list */ + struct ip_conntrack_expect expected; + + /* If we were expected by another connection, this will be it */ + struct nf_ct_info master; + + /* Helper, if any. */ + struct ip_conntrack_helper *helper; + + /* Our various nf_ct_info structs specify *what* relation this + packet has to the conntrack */ + struct nf_ct_info infos[IP_CT_NUMBER]; + + /* Storage reserved for other modules: */ + + union { + int /*enum tcp_conntrack*/ tcp_state; + } proto; + + union { +#if defined(CONFIG_IP_NF_FTP) || defined(CONFIG_IP_NF_FTP_MODULE) + struct ip_ct_ftp ct_ftp_info; +#endif + } help; + +#if defined(CONFIG_IP_NF_NAT) || defined(CONFIG_IP_NF_NAT_MODULE) + struct { + struct ip_nat_info info; + union { +#if defined(CONFIG_IP_NF_FTP) || defined(CONFIG_IP_NF_FTP_MODULE) + struct ip_nat_ftp_info ftp_info[IP_CT_DIR_MAX]; +#endif + } help; +#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ + defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) + int masq_index; +#endif + } nat; +#endif /* CONFIG_IP_NF_NAT || CONFIG_IP_NF_NAT_MODULE */ + +}; + +/* Alter reply tuple (maybe alter helper). If it's already taken, + return 0 and don't do alteration. */ +extern int +ip_conntrack_alter_reply(struct ip_conntrack *conntrack, + const struct ip_conntrack_tuple *newreply); + +/* Is this tuple taken? (ignoring any belonging to the given + conntrack). */ +extern int +ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack); + +/* Return conntrack_info and tuple hash for given skb. */ +extern struct ip_conntrack * +ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo); + +extern struct module *ip_conntrack_module; + +extern int invert_tuplepr(struct ip_conntrack_tuple *inverse, + const struct ip_conntrack_tuple *orig); + +/* Refresh conntrack for this many jiffies */ +extern void ip_ct_refresh(struct ip_conntrack *ct, + unsigned long extra_jiffies); + +/* These are for NAT. Icky. */ +/* Call me when a conntrack is destroyed. */ +extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack); + +/* Returns new sk_buff, or NULL */ +struct sk_buff * +ip_ct_gather_frags(struct sk_buff *skb); + +/* Delete all conntracks which match. */ +extern void +ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data), + void *data); +#endif /* __KERNEL__ */ +#endif /* _IP_CONNTRACK_H */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h new file mode 100644 index 000000000000..9cb49afaff88 --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -0,0 +1,39 @@ +#ifndef _IP_CONNTRACK_CORE_H +#define _IP_CONNTRACK_CORE_H +#include + +/* This header is used to share core functionality between the + standalone connection tracking module, and the compatibility layer's use + of connection tracking. */ +extern unsigned int ip_conntrack_in(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)); + +extern int ip_conntrack_init(void); +extern void ip_conntrack_cleanup(void); + +struct ip_conntrack_protocol; +extern struct ip_conntrack_protocol *find_proto(u_int8_t protocol); +/* Like above, but you already have conntrack read lock. */ +extern struct ip_conntrack_protocol *__find_proto(u_int8_t protocol); +extern struct list_head protocol_list; + +/* Returns TRUE if it dealt with ICMP, and filled in skb->nfct */ +int icmp_error_track(struct sk_buff *skb); +extern int get_tuple(const struct iphdr *iph, size_t len, + struct ip_conntrack_tuple *tuple, + struct ip_conntrack_protocol *protocol); + +/* Find a connection corresponding to a tuple. */ +struct ip_conntrack_tuple_hash * +ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack); + +extern unsigned int ip_conntrack_htable_size; +extern struct list_head *ip_conntrack_hash; +extern struct list_head expect_list; +DECLARE_RWLOCK_EXTERN(ip_conntrack_lock); +#endif /* _IP_CONNTRACK_CORE_H */ + diff --git a/include/linux/netfilter_ipv4/ip_conntrack_ftp.h b/include/linux/netfilter_ipv4/ip_conntrack_ftp.h new file mode 100644 index 000000000000..a164aed4f7af --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_conntrack_ftp.h @@ -0,0 +1,41 @@ +#ifndef _IP_CONNTRACK_FTP_H +#define _IP_CONNTRACK_FTP_H +/* FTP tracking. */ + +#ifndef __KERNEL__ +#error Only in kernel. +#endif + +#include + +/* Protects ftp part of conntracks */ +DECLARE_LOCK_EXTERN(ip_ftp_lock); + +enum ip_ct_ftp_type +{ + /* PORT command from client */ + IP_CT_FTP_PORT = IP_CT_DIR_ORIGINAL, + /* PASV response from server */ + IP_CT_FTP_PASV = IP_CT_DIR_REPLY +}; + +/* Protected by ip_conntrack_lock */ +/* We record seq number and length of ftp ip/port text here: all in + host order. */ +struct ip_ct_ftp +{ + /* This tells NAT that this is an ftp connection */ + int is_ftp; + u_int32_t seq; + /* 0 means not found yet */ + u_int32_t len; + enum ip_ct_ftp_type ftptype; + /* Port that was to be used */ + u_int16_t port; + /* Next valid seq position for cmd matching after newline */ + u_int32_t seq_aft_nl[IP_CT_DIR_MAX]; + /* 0 means seq_match_aft_nl not set */ + int seq_aft_nl_set[IP_CT_DIR_MAX]; +}; + +#endif /* _IP_CONNTRACK_FTP_H */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h new file mode 100644 index 000000000000..006cedef5a88 --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_conntrack_helper.h @@ -0,0 +1,30 @@ +/* IP connection tracking helpers. */ +#ifndef _IP_CONNTRACK_HELPER_H +#define _IP_CONNTRACK_HELPER_H +#include + +struct module; + +struct ip_conntrack_helper +{ + /* Internal use. */ + struct list_head list; + + /* Returns TRUE if it wants to help this connection (tuple is + the tuple of REPLY packets from server). */ + int (*will_help)(const struct ip_conntrack_tuple *rtuple); + + /* Function to call when data passes; return verdict, or -1 to + invalidate. */ + int (*help)(const struct iphdr *, size_t len, + struct ip_conntrack *ct, + enum ip_conntrack_info conntrackinfo); +}; + +extern int ip_conntrack_helper_register(struct ip_conntrack_helper *); +extern void ip_conntrack_helper_unregister(struct ip_conntrack_helper *); + +/* Add an expected connection. */ +extern int ip_conntrack_expect_related(struct ip_conntrack *related_to, + const struct ip_conntrack_tuple *tuple); +#endif /*_IP_CONNTRACK_HELPER_H*/ diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h new file mode 100644 index 000000000000..3c1b4a4c646b --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h @@ -0,0 +1,58 @@ +/* Header for use in defining a given protocol for connection tracking. */ +#ifndef _IP_CONNTRACK_PROTOCOL_H +#define _IP_CONNTRACK_PROTOCOL_H +#include + +struct ip_conntrack_protocol +{ + /* Next pointer. */ + struct list_head list; + + /* Protocol number. */ + u_int8_t proto; + + /* Protocol name */ + const char *name; + + /* Try to fill in the third arg; return true if possible. */ + int (*pkt_to_tuple)(const void *datah, size_t datalen, + struct ip_conntrack_tuple *tuple); + + /* Invert the per-proto part of the tuple: ie. turn xmit into reply. + * Some packets can't be inverted: return 0 in that case. + */ + int (*invert_tuple)(struct ip_conntrack_tuple *inverse, + const struct ip_conntrack_tuple *orig); + + /* Print out the per-protocol part of the tuple. */ + unsigned int (*print_tuple)(char *buffer, + const struct ip_conntrack_tuple *); + + /* Print out the private part of the conntrack. */ + unsigned int (*print_conntrack)(char *buffer, + const struct ip_conntrack *); + + /* Returns verdict for packet, or -1 for invalid. */ + int (*packet)(struct ip_conntrack *conntrack, + struct iphdr *iph, size_t len, + enum ip_conntrack_info ctinfo); + + /* Called when a new connection for this protocol found; returns + * TRUE if it's OK. If so, packet() called next. */ + int (*new)(struct ip_conntrack *conntrack, + struct iphdr *iph, size_t len); + + /* Module (if any) which this is connected to. */ + struct module *me; +}; + +/* Protocol registration. */ +extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto); +extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto); + +/* Existing built-in protocols */ +extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp; +extern struct ip_conntrack_protocol ip_conntrack_protocol_udp; +extern struct ip_conntrack_protocol ip_conntrack_protocol_icmp; +extern int ip_conntrack_protocol_tcp_init(void); +#endif /*_IP_CONNTRACK_PROTOCOL_H*/ diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h new file mode 100644 index 000000000000..c0a845caf325 --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h @@ -0,0 +1,105 @@ +#ifndef _IP_CONNTRACK_TUPLE_H +#define _IP_CONNTRACK_TUPLE_H + +/* A `tuple' is a structure containing the information to uniquely + identify a connection. ie. if two packets have the same tuple, they + are in the same connection; if not, they are not. + + We divide the structure along "manipulatable" and + "non-manipulatable" lines, for the benefit of the NAT code. +*/ + +/* The protocol-specific manipulable parts of the tuple. */ +union ip_conntrack_manip_proto +{ + /* Add other protocols here. */ + u_int16_t all; + + struct { + u_int16_t port; + } tcp; + struct { + u_int16_t port; + } udp; + struct { + u_int16_t id; + } icmp; +}; + +/* The manipulable part of the tuple. */ +struct ip_conntrack_manip +{ + u_int32_t ip; + union ip_conntrack_manip_proto u; + u_int16_t pad; /* Must be set to 0 for memcmp. */ +}; + +/* This contains the information to distinguish a connection. */ +struct ip_conntrack_tuple +{ + struct ip_conntrack_manip src; + + /* These are the parts of the tuple which are fixed. */ + struct { + u_int32_t ip; + union { + /* Add other protocols here. */ + u_int16_t all; + + struct { + u_int16_t port; + } tcp; + struct { + u_int16_t port; + } udp; + struct { + u_int8_t type, code; + } icmp; + } u; + + /* The protocol. */ + u_int16_t protonum; + } dst; +}; + +#define IP_PARTS_NATIVE(n) \ +(unsigned int)((n)>>24)&0xFF, \ +(unsigned int)((n)>>16)&0xFF, \ +(unsigned int)((n)>>8)&0xFF, \ +(unsigned int)((n)&0xFF) + +#define IP_PARTS(n) IP_PARTS_NATIVE(ntohl(n)) + +#ifdef __KERNEL__ + +#define DUMP_TUPLE(tp) \ +DEBUGP("tuple %p: %u %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u\n", \ + (tp), (tp)->dst.protonum, \ + IP_PARTS((tp)->src.ip), ntohs((tp)->src.u.all), \ + IP_PARTS((tp)->dst.ip), ntohs((tp)->dst.u.all)) + +#define CTINFO2DIR(ctinfo) ((ctinfo) == IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL) + +/* If we're the first tuple, it's the original dir. */ +#define DIRECTION(h) ((enum ip_conntrack_dir)(&(h)->ctrack->tuplehash[1] == (h))) + +enum ip_conntrack_dir +{ + IP_CT_DIR_ORIGINAL, + IP_CT_DIR_REPLY, + IP_CT_DIR_MAX +}; + +/* Connections have two entries in the hash table: one for each way */ +struct ip_conntrack_tuple_hash +{ + struct list_head list; + + struct ip_conntrack_tuple tuple; + + /* this == &ctrack->tuplehash[DIRECTION(this)]. */ + struct ip_conntrack *ctrack; +}; + +#endif /* __KERNEL__ */ +#endif /* _IP_CONNTRACK_TUPLE_H */ diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h new file mode 100644 index 000000000000..8eb37d6d5bf7 --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_nat.h @@ -0,0 +1,117 @@ +#ifndef _IP_NAT_H +#define _IP_NAT_H +#include +#include + +#define IP_NAT_MAPPING_TYPE_MAX_NAMELEN 16 + +enum ip_nat_manip_type +{ + IP_NAT_MANIP_SRC, + IP_NAT_MANIP_DST +}; + +/* SRC manip occurs only on POST_ROUTING */ +#define HOOK2MANIP(hooknum) ((hooknum) != NF_IP_POST_ROUTING) + +/* 2.3.19 (I hope) will define this in linux/netfilter_ipv4.h. */ +#ifndef SO_ORIGINAL_DST +#define SO_ORIGINAL_DST 80 +#endif + +#define IP_NAT_RANGE_MAP_IPS 1 +#define IP_NAT_RANGE_PROTO_SPECIFIED 2 +/* Used internally by get_unique_tuple(). */ +#define IP_NAT_RANGE_FULL 4 + +/* Single range specification. */ +struct ip_nat_range +{ + /* Set to OR of flags above. */ + unsigned int flags; + + /* Inclusive: network order. */ + u_int32_t min_ip, max_ip; + + /* Inclusive: network order */ + union ip_conntrack_manip_proto min, max; +}; + +/* A range consists of an array of 1 or more ip_nat_range */ +struct ip_nat_multi_range +{ + unsigned int rangesize; + + /* hangs off end. */ + struct ip_nat_range range[1]; +}; + +#ifdef __KERNEL__ +#include +#include + +/* Protects NAT hash tables, and NAT-private part of conntracks. */ +DECLARE_RWLOCK_EXTERN(ip_nat_lock); + +/* Hashes for by-source and IP/protocol. */ +struct ip_nat_hash +{ + struct list_head list; + + /* conntrack we're embedded in: NULL if not in hash. */ + struct ip_conntrack *conntrack; +}; + +/* Worst case: local-out manip + 1 post-routing, and reverse dirn. */ +#define IP_NAT_MAX_MANIPS (2*3) + +struct ip_nat_info_manip +{ + /* The direction. */ + u_int8_t direction; + + /* Which hook the manipulation happens on. */ + u_int8_t hooknum; + + /* The manipulation type. */ + u_int8_t maniptype; + + /* Manipulations to occur at each conntrack in this dirn. */ + struct ip_conntrack_manip manip; +}; + +/* The structure embedded in the conntrack structure. */ +struct ip_nat_info +{ + /* Set to zero when conntrack created: bitmask of maniptypes */ + int initialized; + + unsigned int num_manips; + + /* Manipulations to be done on this conntrack. */ + struct ip_nat_info_manip manips[IP_NAT_MAX_MANIPS]; + + /* The mapping type which created us (NULL for null mapping). */ + const struct ip_nat_mapping_type *mtype; + + struct ip_nat_hash bysource, byipsproto; + + /* Helper (NULL if none). */ + struct ip_nat_helper *helper; +}; + +/* Set up the info structure to map into this range. */ +extern unsigned int ip_nat_setup_info(struct ip_conntrack *conntrack, + const struct ip_nat_multi_range *mr, + unsigned int hooknum); + +/* Is this tuple already taken? (not by us)*/ +extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack); + +/* Calculate relative checksum. */ +extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv, + u_int32_t newval, + u_int16_t oldcheck); +#endif /*__KERNEL__*/ +#endif diff --git a/include/linux/netfilter_ipv4/ip_nat_core.h b/include/linux/netfilter_ipv4/ip_nat_core.h new file mode 100644 index 000000000000..28735e0c1d0e --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_nat_core.h @@ -0,0 +1,33 @@ +#ifndef _IP_NAT_CORE_H +#define _IP_NAT_CORE_H +#include +#include + +/* This header used to share core functionality between the standalone + NAT module, and the compatibility layer's use of NAT for masquerading. */ +extern int ip_nat_init(void); +extern void ip_nat_cleanup(void); + +extern unsigned int do_bindings(struct ip_conntrack *ct, + enum ip_conntrack_info conntrackinfo, + struct ip_nat_info *info, + unsigned int hooknum, + struct sk_buff **pskb); + +extern struct list_head protos; + +extern void icmp_reply_translation(struct sk_buff *skb, + struct ip_conntrack *conntrack, + unsigned int hooknum, + int dir); + +extern void replace_in_hashes(struct ip_conntrack *conntrack, + struct ip_nat_info *info); +extern void place_in_hashes(struct ip_conntrack *conntrack, + struct ip_nat_info *info); + +/* Built-in protocols. */ +extern struct ip_nat_protocol ip_nat_protocol_tcp; +extern struct ip_nat_protocol ip_nat_protocol_udp; +extern struct ip_nat_protocol ip_nat_protocol_icmp; +#endif /* _IP_NAT_CORE_H */ diff --git a/include/linux/netfilter_ipv4/ip_nat_ftp.h b/include/linux/netfilter_ipv4/ip_nat_ftp.h new file mode 100644 index 000000000000..d84015529be5 --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_nat_ftp.h @@ -0,0 +1,21 @@ +#ifndef _IP_NAT_FTP_H +#define _IP_NAT_FTP_H +/* FTP extension for TCP NAT alteration. */ + +#ifndef __KERNEL__ +#error Only in kernel. +#endif + +/* Protects ftp part of conntracks */ +DECLARE_LOCK_EXTERN(ip_ftp_lock); + +/* We keep track of where the last SYN correction was, and the SYN + offsets before and after that correction. Two of these (indexed by + direction). */ +struct ip_nat_ftp_info +{ + u_int32_t syn_correction_pos; + int32_t syn_offset_before, syn_offset_after; +}; + +#endif /* _IP_NAT_FTP_H */ diff --git a/include/linux/netfilter_ipv4/ip_nat_helper.h b/include/linux/netfilter_ipv4/ip_nat_helper.h new file mode 100644 index 000000000000..1578d6efca34 --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_nat_helper.h @@ -0,0 +1,30 @@ +#ifndef _IP_NAT_HELPER_H +#define _IP_NAT_HELPER_H +/* NAT protocol helper routines. */ + +#include + +struct sk_buff; + +struct ip_nat_helper +{ + /* Internal use */ + struct list_head list; + + /* Here's the protocol and dst we care about. */ + u_int16_t protocol; + u_int16_t protocol_dst; + + /* Helper function: returns verdict */ + unsigned int (*help)(struct ip_conntrack *ct, + struct ip_nat_info *info, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb); + + const char *name; +}; + +extern int ip_nat_helper_register(struct ip_nat_helper *me); +extern void ip_nat_helper_unregister(struct ip_nat_helper *me); +#endif diff --git a/include/linux/netfilter_ipv4/ip_nat_protocol.h b/include/linux/netfilter_ipv4/ip_nat_protocol.h new file mode 100644 index 000000000000..42e2ebf33997 --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_nat_protocol.h @@ -0,0 +1,57 @@ +/* Header for use in defining a given protocol. */ +#ifndef _IP_NAT_PROTOCOL_H +#define _IP_NAT_PROTOCOL_H +#include +#include + +struct iphdr; +struct ip_nat_range; + +struct ip_nat_protocol +{ + struct list_head list; + + /* Protocol name */ + const char *name; + + /* Protocol number. */ + unsigned int protonum; + + /* Do a packet translation according to the ip_nat_proto_manip + * and manip type. */ + void (*manip_pkt)(struct iphdr *iph, size_t len, + const struct ip_conntrack_manip *manip, + enum ip_nat_manip_type maniptype); + + /* Is the manipable part of the tuple between min and max incl? */ + int (*in_range)(const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype, + const union ip_conntrack_manip_proto *min, + const union ip_conntrack_manip_proto *max); + + /* Alter the per-proto part of the tuple (depending on + maniptype), to give a unique tuple in the given range if + possible; return false if not. Per-protocol part of tuple + is initialized to the incoming packet. */ + int (*unique_tuple)(struct ip_conntrack_tuple *tuple, + const struct ip_nat_range *range, + enum ip_nat_manip_type maniptype, + const struct ip_conntrack *conntrack); + + unsigned int (*print)(char *buffer, + const struct ip_conntrack_tuple *match, + const struct ip_conntrack_tuple *mask); + + unsigned int (*print_range)(char *buffer, + const struct ip_nat_range *range); +}; + +/* Protocol registration. */ +extern int ip_nat_protocol_register(struct ip_nat_protocol *proto); +extern void ip_nat_protocol_unregister(struct ip_nat_protocol *proto); + +extern int init_protocols(void) __init; +extern void cleanup_protocols(void); +extern struct ip_nat_protocol *find_nat_proto(u_int16_t protonum); + +#endif /*_IP_NAT_PROTO_H*/ diff --git a/include/linux/netfilter_ipv4/ip_nat_rule.h b/include/linux/netfilter_ipv4/ip_nat_rule.h new file mode 100644 index 000000000000..6c92b285d184 --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_nat_rule.h @@ -0,0 +1,35 @@ +#ifndef _IP_NAT_RULE_H +#define _IP_NAT_RULE_H +#include +#include +#include + +#ifdef __KERNEL__ +/* Want to be told when we first NAT an expected packet for a conntrack? */ +struct ip_nat_expect +{ + struct list_head list; + + /* Returns 1 (and sets verdict) if it has setup NAT for this + connection */ + int (*expect)(struct sk_buff **pskb, + unsigned int hooknum, + struct ip_conntrack *ct, + struct ip_nat_info *info, + struct ip_conntrack *master, + struct ip_nat_info *masterinfo, + unsigned int *verdict); +}; + +extern int ip_nat_expect_register(struct ip_nat_expect *expect); +extern void ip_nat_expect_unregister(struct ip_nat_expect *expect); +extern int ip_nat_rule_init(void) __init; +extern void ip_nat_rule_cleanup(void); +extern int ip_nat_rule_find(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + struct ip_conntrack *ct, + struct ip_nat_info *info); +#endif +#endif /* _IP_NAT_RULE_H */ diff --git a/include/linux/netfilter_ipv4/ip_queue.h b/include/linux/netfilter_ipv4/ip_queue.h new file mode 100644 index 000000000000..8bbd6230fe85 --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_queue.h @@ -0,0 +1,86 @@ +/* + * This is a module which is used for queueing IPv4 packets and + * communicating with userspace via netlink. + * + * (C) 2000 James Morris + */ +#ifndef _IP_QUEUE_H +#define _IP_QUEUE_H + +#ifdef __KERNEL__ +#ifdef DEBUG_IPQ +#define QDEBUG(x...) printk(KERN_DEBUG ## x) +#else +#define QDEBUG(x...) +#endif /* DEBUG_IPQ */ +#else +#include +#endif /* ! __KERNEL__ */ + +/* Messages sent from kernel */ +typedef struct ipq_packet_msg { + unsigned long packet_id; /* ID of queued packet */ + unsigned long mark; /* Netfilter mark value */ + long timestamp_sec; /* Packet arrival time (seconds) */ + long timestamp_usec; /* Packet arrvial time (+useconds) */ + unsigned int hook; /* Netfilter hook we rode in on */ + char indev_name[IFNAMSIZ]; /* Name of incoming interface */ + char outdev_name[IFNAMSIZ]; /* Name of outgoing interface */ + size_t data_len; /* Length of packet data */ + /* Optional packet data follows */ +} ipq_packet_msg_t; + +/* Messages sent from userspace */ +typedef struct ipq_mode_msg { + unsigned char value; /* Requested mode */ + size_t range; /* Optional range of packet requested */ +} ipq_mode_msg_t; + +typedef struct ipq_verdict_msg { + unsigned int value; /* Verdict to hand to netfilter */ + unsigned long id; /* Packet ID for this verdict */ + size_t data_len; /* Length of replacement data */ + /* Optional replacement data follows */ +} ipq_verdict_msg_t; + +typedef struct ipq_peer_msg { + union { + ipq_verdict_msg_t verdict; + ipq_mode_msg_t mode; + } msg; +} ipq_peer_msg_t; + +/* Each queued packet has one of these states */ +enum { + IPQ_PS_NEW, /* Newly arrived packet */ + IPQ_PS_WAITING, /* User has been notified of packet, + we're waiting for a verdict */ + IPQ_PS_VERDICT /* Packet has been assigned verdict, + waiting to be reinjected */ +}; +#define IPQ_PS_MAX IPQ_PS_VERDICT + +/* The queue operates in one of these states */ +enum { + IPQ_QS_HOLD, /* Hold all packets in queue */ + IPQ_QS_COPY, /* Copy metadata and/or packets to user */ + IPQ_QS_FLUSH /* Flush and drop all queue entries */ +}; +#define IPQ_QS_MAX IPQ_QS_FLUSH + +/* Modes requested by peer */ +enum { + IPQ_COPY_NONE, /* Copy nothing */ + IPQ_COPY_META, /* Copy metadata */ + IPQ_COPY_PACKET /* Copy metadata + packet (range) */ +}; +#define IPQ_COPY_MAX IPQ_COPY_PACKET + +/* Types of messages */ +#define IPQM_BASE 0x10 /* standard netlink messages below this */ +#define IPQM_MODE (IPQM_BASE + 1) /* Mode request from peer */ +#define IPQM_VERDICT (IPQM_BASE + 2) /* Verdict from peer */ +#define IPQM_PACKET (IPQM_BASE + 3) /* Packet from kernel */ +#define IPQM_MAX (IPQM_BASE + 4) + +#endif /*_IP_QUEUE_H*/ diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h new file mode 100644 index 000000000000..ab35cbace501 --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -0,0 +1,421 @@ +/* + * 25-Jul-1998 Major changes to allow for ip chain table + * + * 3-Jan-2000 Named tables to allow packet selection for different uses. + */ + +/* + * Format of an IP firewall descriptor + * + * src, dst, src_mask, dst_mask are always stored in network byte order. + * flags are stored in host byte order (of course). + * Port numbers are stored in HOST byte order. + */ + +#ifndef _IPTABLES_H +#define _IPTABLES_H + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#endif +#include + +#define IPT_FUNCTION_MAXNAMELEN 32 +#define IPT_TABLE_MAXNAMELEN 32 + +/* Yes, Virginia, you have to zero the padding. */ +struct ipt_ip { + /* Source and destination IP addr */ + struct in_addr src, dst; + /* Mask for src and dest IP addr */ + struct in_addr smsk, dmsk; + char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; + unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; + + /* Protocol, 0 = ANY */ + u_int16_t proto; + + /* Flags word */ + u_int8_t flags; + /* Inverse flags */ + u_int8_t invflags; +}; + +struct ipt_entry_match +{ + /* Total length */ + u_int16_t match_size; + union { + /* Used by userspace */ + char name[IPT_FUNCTION_MAXNAMELEN]; + /* Used inside the kernel */ + struct ipt_match *match; + } u; + + unsigned char data[0]; +}; + +struct ipt_entry_target +{ + /* Total length */ + u_int16_t target_size; + union { + /* Used by userspace */ + char name[IPT_FUNCTION_MAXNAMELEN]; + /* Used inside the kernel */ + struct ipt_target *target; + } u; + + unsigned char data[0]; +}; + +struct ipt_standard_target +{ + struct ipt_entry_target target; + int verdict; +}; + +struct ipt_counters +{ + u_int64_t pcnt, bcnt; /* Packet and byte counters */ +}; + +/* Values for "flag" field in struct ipt_ip (general ip structure). */ +#define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */ +#define IPT_F_MASK 0x01 /* All possible flag bits mask. */ + +/* Values for "inv" field in struct ipt_ip. */ +#define IPT_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ +#define IPT_INV_VIA_OUT 0x02 /* Invert the sense of OUT IFACE */ +#define IPT_INV_TOS 0x04 /* Invert the sense of TOS. */ +#define IPT_INV_SRCIP 0x08 /* Invert the sense of SRC IP. */ +#define IPT_INV_DSTIP 0x10 /* Invert the sense of DST OP. */ +#define IPT_INV_FRAG 0x20 /* Invert the sense of FRAG. */ +#define IPT_INV_PROTO 0x40 /* Invert the sense of PROTO. */ +#define IPT_INV_MASK 0x7F /* All possible flag bits mask. */ + +/* This structure defines each of the firewall rules. Consists of 3 + parts which are 1) general IP header stuff 2) match specific + stuff 3) the target to perform if the rule matches */ +struct ipt_entry +{ + struct ipt_ip ip; + + /* Mark with fields that we care about. */ + unsigned int nfcache; + + /* Size of ipt_entry + matches */ + u_int16_t target_offset; + /* Size of ipt_entry + matches + target */ + u_int16_t next_offset; + + /* Back pointer */ + unsigned int comefrom; + + /* Packet and byte counters. */ + struct ipt_counters counters; + + /* The matches (if any), then the target. */ + unsigned char elems[0]; +}; + +/* + * New IP firewall options for [gs]etsockopt at the RAW IP level. + * Unlike BSD Linux inherits IP options so you don't have to use a raw + * socket for this. Instead we check rights in the calls. */ +#define IPT_BASE_CTL 64 /* base for firewall socket options */ + +#define IPT_SO_SET_REPLACE (IPT_BASE_CTL) +#define IPT_SO_SET_ADD_COUNTERS (IPT_BASE_CTL + 1) +#define IPT_SO_SET_MAX IPT_SO_SET_ADD_COUNTERS + +#define IPT_SO_GET_INFO (IPT_BASE_CTL) +#define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1) +#define IPT_SO_GET_MAX IPT_SO_GET_ENTRIES + +/* CONTINUE verdict for targets */ +#define IPT_CONTINUE 0xFFFFFFFF + +/* For standard target */ +#define IPT_RETURN (-NF_MAX_VERDICT - 1) + +/* TCP matching stuff */ +struct ipt_tcp +{ + u_int16_t spts[2]; /* Source port range. */ + u_int16_t dpts[2]; /* Destination port range. */ + u_int8_t option; /* TCP Option iff non-zero*/ + u_int8_t flg_mask; /* TCP flags mask byte */ + u_int8_t flg_cmp; /* TCP flags compare byte */ + u_int8_t invflags; /* Inverse flags */ +}; + +/* Values for "inv" field in struct ipt_tcp. */ +#define IPT_TCP_INV_SRCPT 0x01 /* Invert the sense of source ports. */ +#define IPT_TCP_INV_DSTPT 0x02 /* Invert the sense of dest ports. */ +#define IPT_TCP_INV_FLAGS 0x04 /* Invert the sense of TCP flags. */ +#define IPT_TCP_INV_OPTION 0x08 /* Invert the sense of option test. */ +#define IPT_TCP_INV_MASK 0x0F /* All possible flags. */ + +/* UDP matching stuff */ +struct ipt_udp +{ + u_int16_t spts[2]; /* Source port range. */ + u_int16_t dpts[2]; /* Destination port range. */ + u_int8_t invflags; /* Inverse flags */ +}; + +/* Values for "invflags" field in struct ipt_udp. */ +#define IPT_UDP_INV_SRCPT 0x01 /* Invert the sense of source ports. */ +#define IPT_UDP_INV_DSTPT 0x02 /* Invert the sense of dest ports. */ +#define IPT_UDP_INV_MASK 0x03 /* All possible flags. */ + +/* ICMP matching stuff */ +struct ipt_icmp +{ + u_int8_t type; /* type to match */ + u_int8_t code[2]; /* range of code */ + u_int8_t invflags; /* Inverse flags */ +}; + +/* Values for "inv" field for struct ipt_icmp. */ +#define IPT_ICMP_INV 0x01 /* Invert the sense of type/code test */ + +/* The argument to IPT_SO_GET_INFO */ +struct ipt_getinfo +{ + /* Which table: caller fills this in. */ + char name[IPT_TABLE_MAXNAMELEN]; + + /* Kernel fills these in. */ + /* Which hook entry points are valid: bitmask */ + unsigned int valid_hooks; + + /* Hook entry points: one per netfilter hook. */ + unsigned int hook_entry[NF_IP_NUMHOOKS]; + + /* Underflow points. */ + unsigned int underflow[NF_IP_NUMHOOKS]; + + /* Number of entries */ + unsigned int num_entries; + + /* Size of entries. */ + unsigned int size; +}; + +/* The argument to IPT_SO_SET_REPLACE. */ +struct ipt_replace +{ + /* Which table. */ + char name[IPT_TABLE_MAXNAMELEN]; + + /* Which hook entry points are valid: bitmask. You can't + change this. */ + unsigned int valid_hooks; + + /* Number of entries */ + unsigned int num_entries; + + /* Total size of new entries */ + unsigned int size; + + /* Hook entry points. */ + unsigned int hook_entry[NF_IP_NUMHOOKS]; + + /* Underflow points. */ + unsigned int underflow[NF_IP_NUMHOOKS]; + + /* Information about old entries: */ + /* Number of counters (must be equal to current number of entries). */ + unsigned int num_counters; + /* The old entries' counters. */ + struct ipt_counters *counters; + + /* The entries (hang off end: not really an array). */ + struct ipt_entry entries[0]; +}; + +/* The argument to IPT_SO_ADD_COUNTERS. */ +struct ipt_counters_info +{ + /* Which table. */ + char name[IPT_TABLE_MAXNAMELEN]; + + unsigned int num_counters; + + /* The counters (actually `number' of these). */ + struct ipt_counters counters[0]; +}; + +/* The argument to IPT_SO_GET_ENTRIES. */ +struct ipt_get_entries +{ + /* Which table: user fills this in. */ + char name[IPT_TABLE_MAXNAMELEN]; + + /* User fills this in: total entry size. */ + unsigned int size; + + /* The entries. */ + unsigned char entries[0]; +}; + +/* Standard return verdict, or do jump. */ +#define IPT_STANDARD_TARGET "" +/* Error verdict. */ +#define IPT_ERROR_TARGET "ERROR" + +/* Helper functions */ +extern __inline__ struct ipt_entry_target * +ipt_get_target(struct ipt_entry *e) +{ + return (void *)e + e->target_offset; +} + +/* fn returns 0 to continue iteration */ +#define IPT_MATCH_ITERATE(e, fn, args...) \ +({ \ + unsigned int __i; \ + int __ret = 0; \ + struct ipt_entry_match *__m; \ + \ + for (__i = sizeof(struct ipt_entry); \ + __i < (e)->target_offset; \ + __i += __m->match_size) { \ + __m = (void *)(e) + __i; \ + \ + __ret = fn(__m , ## args); \ + if (__ret != 0) \ + break; \ + } \ + __ret; \ +}) + +/* fn returns 0 to continue iteration */ +#define IPT_ENTRY_ITERATE(entries, size, fn, args...) \ +({ \ + unsigned int __i; \ + int __ret = 0; \ + struct ipt_entry *__e; \ + \ + for (__i = 0; __i < (size); __i += __e->next_offset) { \ + __e = (void *)(entries) + __i; \ + \ + __ret = fn(__e , ## args); \ + if (__ret != 0) \ + break; \ + } \ + __ret; \ +}) + +/* + * Main firewall chains definitions and global var's definitions. + */ +#ifdef __KERNEL__ + +#include +#include +extern void ipt_init(void) __init; + +struct ipt_match +{ + struct list_head list; + + const char name[IPT_FUNCTION_MAXNAMELEN]; + + /* Return true or false: return FALSE and set *hotdrop = 1 to + force immediate packet drop. */ + int (*match)(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop); + + /* Called when user tries to insert an entry of this type. */ + /* Should return true or false. */ + int (*checkentry)(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchinfosize, + unsigned int hook_mask); + + /* Set this to THIS_MODULE if you are a module, otherwise NULL */ + struct module *me; +}; + +/* Registration hooks for targets. */ +struct ipt_target +{ + struct list_head list; + + const char name[IPT_FUNCTION_MAXNAMELEN]; + + /* Returns verdict. */ + unsigned int (*target)(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userdata); + + /* Called when user tries to insert an entry of this type: + hook_mask is a bitmask of hooks from which it can be + called. */ + /* Should return true or false. */ + int (*checkentry)(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask); + + /* Set this to THIS_MODULE if you are a module, otherwise NULL */ + struct module *me; +}; + +extern int ipt_register_target(struct ipt_target *target); +extern void ipt_unregister_target(struct ipt_target *target); + +extern int ipt_register_match(struct ipt_match *match); +extern void ipt_unregister_match(struct ipt_match *match); + +/* Furniture shopping... */ +struct ipt_table +{ + struct list_head list; + + /* A unique name... */ + char name[IPT_TABLE_MAXNAMELEN]; + + /* Seed table: copied in register_table */ + struct ipt_replace *table; + + /* What hooks you will enter on */ + unsigned int valid_hooks; + + /* Lock for the curtain */ + rwlock_t lock; + + /* Man behind the curtain... */ + struct ipt_table_info *private; +}; + +extern int ipt_register_table(struct ipt_table *table); +extern void ipt_unregister_table(struct ipt_table *table); +extern unsigned int ipt_do_table(struct sk_buff **pskb, + unsigned int hook, + const struct net_device *in, + const struct net_device *out, + struct ipt_table *table, + void *userdata); + +#define IPT_ALIGN(s) (((s) + (__alignof__(struct ipt_match)-1)) & ~(__alignof__(struct ipt_match)-1)) +#endif /*__KERNEL__*/ +#endif /* _IPTABLES_H */ diff --git a/include/linux/netfilter_ipv4/ipchains_core.h b/include/linux/netfilter_ipv4/ipchains_core.h new file mode 100644 index 000000000000..13546ff8d4fa --- /dev/null +++ b/include/linux/netfilter_ipv4/ipchains_core.h @@ -0,0 +1,193 @@ +/* + * This code is heavily based on the code in ip_fw.h; see that file for + * copyrights and attributions. This code is basically GPL. + * + * 15-Feb-1997: Major changes to allow graphs for firewall rules. + * Paul Russell and + * Michael Neuling + * 2-Nov-1997: Changed types to __u16, etc. + * Removed IP_FW_F_TCPACK & IP_FW_F_BIDIR. + * Added inverse flags field. + * Removed multiple port specs. + */ + +/* + * Format of an IP firewall descriptor + * + * src, dst, src_mask, dst_mask are always stored in network byte order. + * flags are stored in host byte order (of course). + * Port numbers are stored in HOST byte order. + */ + +#ifndef _IP_FWCHAINS_H +#define _IP_FWCHAINS_H + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#endif /* __KERNEL__ */ +#define IP_FW_MAX_LABEL_LENGTH 8 +typedef char ip_chainlabel[IP_FW_MAX_LABEL_LENGTH+1]; + +struct ip_fw +{ + struct in_addr fw_src, fw_dst; /* Source and destination IP addr */ + struct in_addr fw_smsk, fw_dmsk; /* Mask for src and dest IP addr */ + __u32 fw_mark; /* ID to stamp on packet */ + __u16 fw_proto; /* Protocol, 0 = ANY */ + __u16 fw_flg; /* Flags word */ + __u16 fw_invflg; /* Inverse flags */ + __u16 fw_spts[2]; /* Source port range. */ + __u16 fw_dpts[2]; /* Destination port range. */ + __u16 fw_redirpt; /* Port to redirect to. */ + __u16 fw_outputsize; /* Max amount to output to + NETLINK */ + char fw_vianame[IFNAMSIZ]; /* name of interface "via" */ + __u8 fw_tosand, fw_tosxor; /* Revised packet priority */ +}; + +struct ip_fwuser +{ + struct ip_fw ipfw; + ip_chainlabel label; +}; + +/* Values for "fw_flg" field . */ +#define IP_FW_F_PRN 0x0001 /* Print packet if it matches */ +#define IP_FW_F_TCPSYN 0x0002 /* For tcp packets-check SYN only */ +#define IP_FW_F_FRAG 0x0004 /* Set if rule is a fragment rule */ +#define IP_FW_F_MARKABS 0x0008 /* Set the mark to fw_mark, not add. */ +#define IP_FW_F_WILDIF 0x0010 /* Need only match start of interface name. */ +#define IP_FW_F_NETLINK 0x0020 /* Redirect to netlink: 2.1.x only */ +#define IP_FW_F_MASK 0x003F /* All possible flag bits mask */ + +/* Values for "fw_invflg" field. */ +#define IP_FW_INV_SRCIP 0x0001 /* Invert the sense of fw_src. */ +#define IP_FW_INV_DSTIP 0x0002 /* Invert the sense of fw_dst. */ +#define IP_FW_INV_PROTO 0x0004 /* Invert the sense of fw_proto. */ +#define IP_FW_INV_SRCPT 0x0008 /* Invert the sense of source ports. */ +#define IP_FW_INV_DSTPT 0x0010 /* Invert the sense of destination ports. */ +#define IP_FW_INV_VIA 0x0020 /* Invert the sense of fw_vianame. */ +#define IP_FW_INV_SYN 0x0040 /* Invert the sense of IP_FW_F_TCPSYN. */ +#define IP_FW_INV_FRAG 0x0080 /* Invert the sense of IP_FW_F_FRAG. */ + +/* + * New IP firewall options for [gs]etsockopt at the RAW IP level. + * Unlike BSD Linux inherits IP options so you don't have to use + * a raw socket for this. Instead we check rights in the calls. */ + +#define IP_FW_BASE_CTL 64 /* base for firewall socket options */ + +#define IP_FW_APPEND (IP_FW_BASE_CTL) /* Takes ip_fwchange */ +#define IP_FW_REPLACE (IP_FW_BASE_CTL+1) /* Takes ip_fwnew */ +#define IP_FW_DELETE_NUM (IP_FW_BASE_CTL+2) /* Takes ip_fwdelnum */ +#define IP_FW_DELETE (IP_FW_BASE_CTL+3) /* Takes ip_fwchange */ +#define IP_FW_INSERT (IP_FW_BASE_CTL+4) /* Takes ip_fwnew */ +#define IP_FW_FLUSH (IP_FW_BASE_CTL+5) /* Takes ip_chainlabel */ +#define IP_FW_ZERO (IP_FW_BASE_CTL+6) /* Takes ip_chainlabel */ +#define IP_FW_CHECK (IP_FW_BASE_CTL+7) /* Takes ip_fwtest */ +#define IP_FW_MASQ_TIMEOUTS (IP_FW_BASE_CTL+8) /* Takes 3 ints */ +#define IP_FW_CREATECHAIN (IP_FW_BASE_CTL+9) /* Takes ip_chainlabel */ +#define IP_FW_DELETECHAIN (IP_FW_BASE_CTL+10) /* Takes ip_chainlabel */ +#define IP_FW_POLICY (IP_FW_BASE_CTL+11) /* Takes ip_fwpolicy */ +/* Masquerade control, only 1 optname */ + +#define IP_FW_MASQ_CTL (IP_FW_BASE_CTL+12) /* General ip_masq ctl */ + +/* Builtin chain labels */ +#define IP_FW_LABEL_FORWARD "forward" +#define IP_FW_LABEL_INPUT "input" +#define IP_FW_LABEL_OUTPUT "output" + +/* Special targets */ +#define IP_FW_LABEL_MASQUERADE "MASQ" +#define IP_FW_LABEL_REDIRECT "REDIRECT" +#define IP_FW_LABEL_ACCEPT "ACCEPT" +#define IP_FW_LABEL_BLOCK "DENY" +#define IP_FW_LABEL_REJECT "REJECT" +#define IP_FW_LABEL_RETURN "RETURN" +#define IP_FW_LABEL_QUEUE "QUEUE" + +/* Files in /proc/net */ +#define IP_FW_PROC_CHAINS "ip_fwchains" +#define IP_FW_PROC_CHAIN_NAMES "ip_fwnames" + + +struct ip_fwpkt +{ + struct iphdr fwp_iph; /* IP header */ + union { + struct tcphdr fwp_tcph; /* TCP header or */ + struct udphdr fwp_udph; /* UDP header */ + struct icmphdr fwp_icmph; /* ICMP header */ + } fwp_protoh; + struct in_addr fwp_via; /* interface address */ + char fwp_vianame[IFNAMSIZ]; /* interface name */ +}; + +/* The argument to IP_FW_DELETE and IP_FW_APPEND */ +struct ip_fwchange +{ + struct ip_fwuser fwc_rule; + ip_chainlabel fwc_label; +}; + +/* The argument to IP_FW_CHECK. */ +struct ip_fwtest +{ + struct ip_fwpkt fwt_packet; /* Packet to be tested */ + ip_chainlabel fwt_label; /* Block to start test in */ +}; + +/* The argument to IP_FW_DELETE_NUM */ +struct ip_fwdelnum +{ + __u32 fwd_rulenum; + ip_chainlabel fwd_label; +}; + +/* The argument to IP_FW_REPLACE and IP_FW_INSERT */ +struct ip_fwnew +{ + __u32 fwn_rulenum; + struct ip_fwuser fwn_rule; + ip_chainlabel fwn_label; +}; + +/* The argument to IP_FW_POLICY */ +struct ip_fwpolicy +{ + ip_chainlabel fwp_policy; + ip_chainlabel fwp_label; +}; +/* + * timeouts for ip masquerading + */ + +extern int ip_fw_masq_timeouts(void *, int); + + +/* + * Main firewall chains definitions and global var's definitions. + */ + +#ifdef __KERNEL__ + +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0) +#include +extern void ip_fw_init(void) __init; +#else /* 2.0.x */ +extern void ip_fw_init(void); +#endif /* 2.1.x */ +extern int ip_fw_ctl(int, void *, int); +#ifdef CONFIG_IP_MASQUERADE +extern int ip_masq_uctl(int, char *, int); +#endif +#endif /* KERNEL */ + +#endif /* _IP_FWCHAINS_H */ diff --git a/include/linux/netfilter_ipv4/ipfwadm_core.h b/include/linux/netfilter_ipv4/ipfwadm_core.h new file mode 100644 index 000000000000..8294efd7e513 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipfwadm_core.h @@ -0,0 +1,256 @@ +#ifndef _IPFWADM_CORE_H +#define _IPFWADM_CORE_H +/* Minor modifications to fit on compatibility framework: + Rusty.Russell@rustcorp.com.au +*/ + +/* + * IP firewalling code. This is taken from 4.4BSD. Please note the + * copyright message below. As per the GPL it must be maintained + * and the licenses thus do not conflict. While this port is subject + * to the GPL I also place my modifications under the original + * license in recognition of the original copyright. + * + * Ported from BSD to Linux, + * Alan Cox 22/Nov/1994. + * Merged and included the FreeBSD-Current changes at Ugen's request + * (but hey it's a lot cleaner now). Ugen would prefer in some ways + * we waited for his final product but since Linux 1.2.0 is about to + * appear it's not practical - Read: It works, it's not clean but please + * don't consider it to be his standard of finished work. + * Alan. + * + * Fixes: + * Pauline Middelink : Added masquerading. + * Jos Vos : Separate input and output firewall + * chains, new "insert" and "append" + * commands to replace "add" commands, + * add ICMP header to struct ip_fwpkt. + * Jos Vos : Add support for matching device names. + * Willy Konynenberg : Add transparent proxying support. + * Jos Vos : Add options for input/output accounting. + * + * All the real work was done by ..... + */ + +/* + * Copyright (c) 1993 Daniel Boulet + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + */ + +/* + * Format of an IP firewall descriptor + * + * src, dst, src_mask, dst_mask are always stored in network byte order. + * flags and num_*_ports are stored in host byte order (of course). + * Port numbers are stored in HOST byte order. + */ + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#endif + +struct ip_fw +{ + struct ip_fw *fw_next; /* Next firewall on chain */ + struct in_addr fw_src, fw_dst; /* Source and destination IP addr */ + struct in_addr fw_smsk, fw_dmsk; /* Mask for src and dest IP addr */ + struct in_addr fw_via; /* IP address of interface "via" */ + struct net_device *fw_viadev; /* device of interface "via" */ + __u16 fw_flg; /* Flags word */ + __u16 fw_nsp, fw_ndp; /* N'of src ports and # of dst ports */ + /* in ports array (dst ports follow */ + /* src ports; max of 10 ports in all; */ + /* count of 0 means match all ports) */ +#define IP_FW_MAX_PORTS 10 /* A reasonable maximum */ + __u16 fw_pts[IP_FW_MAX_PORTS]; /* Array of port numbers to match */ + unsigned long fw_pcnt,fw_bcnt; /* Packet and byte counters */ + __u8 fw_tosand, fw_tosxor; /* Revised packet priority */ + char fw_vianame[IFNAMSIZ]; /* name of interface "via" */ +}; + +/* + * Values for "flags" field . + */ + +#define IP_FW_F_ALL 0x0000 /* This is a universal packet firewall*/ +#define IP_FW_F_TCP 0x0001 /* This is a TCP packet firewall */ +#define IP_FW_F_UDP 0x0002 /* This is a UDP packet firewall */ +#define IP_FW_F_ICMP 0x0003 /* This is a ICMP packet firewall */ +#define IP_FW_F_KIND 0x0003 /* Mask to isolate firewall kind */ +#define IP_FW_F_ACCEPT 0x0004 /* This is an accept firewall (as * + * opposed to a deny firewall)* + * */ +#define IP_FW_F_SRNG 0x0008 /* The first two src ports are a min * + * and max range (stored in host byte * + * order). * + * */ +#define IP_FW_F_DRNG 0x0010 /* The first two dst ports are a min * + * and max range (stored in host byte * + * order). * + * (ports[0] <= port <= ports[1]) * + * */ +#define IP_FW_F_PRN 0x0020 /* In verbose mode print this firewall*/ +#define IP_FW_F_BIDIR 0x0040 /* For bidirectional firewalls */ +#define IP_FW_F_TCPSYN 0x0080 /* For tcp packets-check SYN only */ +#define IP_FW_F_ICMPRPL 0x0100 /* Send back icmp unreachable packet */ +#define IP_FW_F_MASQ 0x0200 /* Masquerading */ +#define IP_FW_F_TCPACK 0x0400 /* For tcp-packets match if ACK is set*/ +#define IP_FW_F_REDIR 0x0800 /* Redirect to local port fw_pts[n] */ +#define IP_FW_F_ACCTIN 0x1000 /* Account incoming packets only. */ +#define IP_FW_F_ACCTOUT 0x2000 /* Account outgoing packets only. */ + +#define IP_FW_F_MASK 0x3FFF /* All possible flag bits mask */ + +/* + * New IP firewall options for [gs]etsockopt at the RAW IP level. + * Unlike BSD Linux inherits IP options so you don't have to use + * a raw socket for this. Instead we check rights in the calls. + */ + +#define IP_FW_BASE_CTL 64 /* base for firewall socket options */ + +#define IP_FW_COMMAND 0x00FF /* mask for command without chain */ +#define IP_FW_TYPE 0x0300 /* mask for type (chain) */ +#define IP_FW_SHIFT 8 /* shift count for type (chain) */ + +#define IP_FW_FWD 0 +#define IP_FW_IN 1 +#define IP_FW_OUT 2 +#define IP_FW_ACCT 3 +#define IP_FW_CHAINS 4 /* total number of ip_fw chains */ +#define IP_FW_MASQ 5 + +#define IP_FW_INSERT (IP_FW_BASE_CTL) +#define IP_FW_APPEND (IP_FW_BASE_CTL+1) +#define IP_FW_DELETE (IP_FW_BASE_CTL+2) +#define IP_FW_FLUSH (IP_FW_BASE_CTL+3) +#define IP_FW_ZERO (IP_FW_BASE_CTL+4) +#define IP_FW_POLICY (IP_FW_BASE_CTL+5) +#define IP_FW_CHECK (IP_FW_BASE_CTL+6) +#define IP_FW_MASQ_TIMEOUTS (IP_FW_BASE_CTL+7) + +#define IP_FW_INSERT_FWD (IP_FW_INSERT | (IP_FW_FWD << IP_FW_SHIFT)) +#define IP_FW_APPEND_FWD (IP_FW_APPEND | (IP_FW_FWD << IP_FW_SHIFT)) +#define IP_FW_DELETE_FWD (IP_FW_DELETE | (IP_FW_FWD << IP_FW_SHIFT)) +#define IP_FW_FLUSH_FWD (IP_FW_FLUSH | (IP_FW_FWD << IP_FW_SHIFT)) +#define IP_FW_ZERO_FWD (IP_FW_ZERO | (IP_FW_FWD << IP_FW_SHIFT)) +#define IP_FW_POLICY_FWD (IP_FW_POLICY | (IP_FW_FWD << IP_FW_SHIFT)) +#define IP_FW_CHECK_FWD (IP_FW_CHECK | (IP_FW_FWD << IP_FW_SHIFT)) + +#define IP_FW_INSERT_IN (IP_FW_INSERT | (IP_FW_IN << IP_FW_SHIFT)) +#define IP_FW_APPEND_IN (IP_FW_APPEND | (IP_FW_IN << IP_FW_SHIFT)) +#define IP_FW_DELETE_IN (IP_FW_DELETE | (IP_FW_IN << IP_FW_SHIFT)) +#define IP_FW_FLUSH_IN (IP_FW_FLUSH | (IP_FW_IN << IP_FW_SHIFT)) +#define IP_FW_ZERO_IN (IP_FW_ZERO | (IP_FW_IN << IP_FW_SHIFT)) +#define IP_FW_POLICY_IN (IP_FW_POLICY | (IP_FW_IN << IP_FW_SHIFT)) +#define IP_FW_CHECK_IN (IP_FW_CHECK | (IP_FW_IN << IP_FW_SHIFT)) + +#define IP_FW_INSERT_OUT (IP_FW_INSERT | (IP_FW_OUT << IP_FW_SHIFT)) +#define IP_FW_APPEND_OUT (IP_FW_APPEND | (IP_FW_OUT << IP_FW_SHIFT)) +#define IP_FW_DELETE_OUT (IP_FW_DELETE | (IP_FW_OUT << IP_FW_SHIFT)) +#define IP_FW_FLUSH_OUT (IP_FW_FLUSH | (IP_FW_OUT << IP_FW_SHIFT)) +#define IP_FW_ZERO_OUT (IP_FW_ZERO | (IP_FW_OUT << IP_FW_SHIFT)) +#define IP_FW_POLICY_OUT (IP_FW_POLICY | (IP_FW_OUT << IP_FW_SHIFT)) +#define IP_FW_CHECK_OUT (IP_FW_CHECK | (IP_FW_OUT << IP_FW_SHIFT)) + +#define IP_ACCT_INSERT (IP_FW_INSERT | (IP_FW_ACCT << IP_FW_SHIFT)) +#define IP_ACCT_APPEND (IP_FW_APPEND | (IP_FW_ACCT << IP_FW_SHIFT)) +#define IP_ACCT_DELETE (IP_FW_DELETE | (IP_FW_ACCT << IP_FW_SHIFT)) +#define IP_ACCT_FLUSH (IP_FW_FLUSH | (IP_FW_ACCT << IP_FW_SHIFT)) +#define IP_ACCT_ZERO (IP_FW_ZERO | (IP_FW_ACCT << IP_FW_SHIFT)) + +#define IP_FW_MASQ_INSERT (IP_FW_INSERT | (IP_FW_MASQ << IP_FW_SHIFT)) +#define IP_FW_MASQ_ADD (IP_FW_APPEND | (IP_FW_MASQ << IP_FW_SHIFT)) +#define IP_FW_MASQ_DEL (IP_FW_DELETE | (IP_FW_MASQ << IP_FW_SHIFT)) +#define IP_FW_MASQ_FLUSH (IP_FW_FLUSH | (IP_FW_MASQ << IP_FW_SHIFT)) + +#define IP_FW_MASQ_INSERT (IP_FW_INSERT | (IP_FW_MASQ << IP_FW_SHIFT)) +#define IP_FW_MASQ_ADD (IP_FW_APPEND | (IP_FW_MASQ << IP_FW_SHIFT)) +#define IP_FW_MASQ_DEL (IP_FW_DELETE | (IP_FW_MASQ << IP_FW_SHIFT)) +#define IP_FW_MASQ_FLUSH (IP_FW_FLUSH | (IP_FW_MASQ << IP_FW_SHIFT)) + +struct ip_fwpkt +{ + struct iphdr fwp_iph; /* IP header */ + union { + struct tcphdr fwp_tcph; /* TCP header or */ + struct udphdr fwp_udph; /* UDP header */ + struct icmphdr fwp_icmph; /* ICMP header */ + } fwp_protoh; + struct in_addr fwp_via; /* interface address */ + char fwp_vianame[IFNAMSIZ]; /* interface name */ +}; + +#define IP_FW_MASQCTL_MAX 256 +#define IP_MASQ_MOD_NMAX 32 + +struct ip_fw_masqctl +{ + int mctl_action; + union { + struct { + char name[IP_MASQ_MOD_NMAX]; + char data[1]; + } mod; + } u; +}; + +/* + * timeouts for ip masquerading + */ + +struct ip_fw_masq; + +/* + * Main firewall chains definitions and global var's definitions. + */ + +#ifdef __KERNEL__ + +/* Modes used in the ip_fw_chk() routine. */ +#define IP_FW_MODE_FW 0x00 /* kernel firewall check */ +#define IP_FW_MODE_ACCT_IN 0x01 /* accounting (incoming) */ +#define IP_FW_MODE_ACCT_OUT 0x02 /* accounting (outgoing) */ +#define IP_FW_MODE_CHK 0x04 /* check requested by user */ + +#include +#ifdef CONFIG_IP_FIREWALL +extern struct ip_fw *ip_fw_in_chain; +extern struct ip_fw *ip_fw_out_chain; +extern struct ip_fw *ip_fw_fwd_chain; +extern int ip_fw_in_policy; +extern int ip_fw_out_policy; +extern int ip_fw_fwd_policy; +extern int ip_fw_ctl(int, void *, int); +#endif +#ifdef CONFIG_IP_ACCT +extern struct ip_fw *ip_acct_chain; +extern int ip_acct_ctl(int, void *, int); +#endif +#ifdef CONFIG_IP_MASQUERADE +extern int ip_masq_ctl(int, void *, int); +#endif +#ifdef CONFIG_IP_MASQUERADE +extern int ip_masq_ctl(int, void *, int); +#endif + +extern int ip_fw_masq_timeouts(void *user, int len); + +extern int ip_fw_chk(struct iphdr *, struct net_device *, __u16 *, + struct ip_fw *, int, int); +#endif /* KERNEL */ +#endif /* _IP_FW_H */ diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h new file mode 100644 index 000000000000..481e12846208 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_LOG.h @@ -0,0 +1,15 @@ +#ifndef _IPT_LOG_H +#define _IPT_LOG_H + +#define IPT_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ +#define IPT_LOG_TCPOPT 0x02 /* Log TCP options */ +#define IPT_LOG_IPOPT 0x04 /* Log IP options */ +#define IPT_LOG_MASK 0x07 + +struct ipt_log_info { + unsigned char level; + unsigned char logflags; + char prefix[30]; +}; + +#endif /*_IPT_LOG_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_MARK.h b/include/linux/netfilter_ipv4/ipt_MARK.h new file mode 100644 index 000000000000..cc57ae7a3416 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_MARK.h @@ -0,0 +1,8 @@ +#ifndef _IPT_MARK_H_target +#define _IPT_MARK_H_target + +struct ipt_mark_target_info { + unsigned long mark; +}; + +#endif /*_IPT_MARK_H_target*/ diff --git a/include/linux/netfilter_ipv4/ipt_REJECT.h b/include/linux/netfilter_ipv4/ipt_REJECT.h new file mode 100644 index 000000000000..1ceebe21199c --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_REJECT.h @@ -0,0 +1,17 @@ +#ifndef _IPT_REJECT_H +#define _IPT_REJECT_H + +enum ipt_reject_with { + IPT_ICMP_NET_UNREACHABLE, + IPT_ICMP_HOST_UNREACHABLE, + IPT_ICMP_PROT_UNREACHABLE, + IPT_ICMP_PORT_UNREACHABLE, + IPT_ICMP_ECHOREPLY, + IPT_TCP_RESET, +}; + +struct ipt_reject_info { + enum ipt_reject_with with; /* reject type */ +}; + +#endif /*_IPT_REJECT_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_TOS.h b/include/linux/netfilter_ipv4/ipt_TOS.h new file mode 100644 index 000000000000..6bf9e1fdfd88 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_TOS.h @@ -0,0 +1,12 @@ +#ifndef _IPT_TOS_H_target +#define _IPT_TOS_H_target + +#ifndef IPTOS_NORMALSVC +#define IPTOS_NORMALSVC 0 +#endif + +struct ipt_tos_target_info { + u_int8_t tos; +}; + +#endif /*_IPT_TOS_H_target*/ diff --git a/include/linux/netfilter_ipv4/ipt_limit.h b/include/linux/netfilter_ipv4/ipt_limit.h new file mode 100644 index 000000000000..256453409e21 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_limit.h @@ -0,0 +1,21 @@ +#ifndef _IPT_RATE_H +#define _IPT_RATE_H + +/* timings are in milliseconds. */ +#define IPT_LIMIT_SCALE 10000 + +/* 1/10,000 sec period => max of 10,000/sec. Min rate is then 429490 + seconds, or one every 59 hours. */ +struct ipt_rateinfo { + u_int32_t avg; /* Average secs between packets * scale */ + u_int32_t burst; /* Period multiplier for upper limit. */ + + /* Used internally by the kernel */ + unsigned long prev; + u_int32_t credit; + u_int32_t credit_cap, cost; + + /* Ugly, ugly fucker. */ + struct ipt_rateinfo *master; +}; +#endif /*_IPT_RATE_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_mac.h b/include/linux/netfilter_ipv4/ipt_mac.h new file mode 100644 index 000000000000..f8d5b8e7ccdb --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_mac.h @@ -0,0 +1,8 @@ +#ifndef _IPT_MAC_H +#define _IPT_MAC_H + +struct ipt_mac_info { + unsigned char srcaddr[ETH_ALEN]; + int invert; +}; +#endif /*_IPT_MAC_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_mark.h b/include/linux/netfilter_ipv4/ipt_mark.h new file mode 100644 index 000000000000..f3952b563d4c --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_mark.h @@ -0,0 +1,9 @@ +#ifndef _IPT_MARK_H +#define _IPT_MARK_H + +struct ipt_mark_info { + unsigned long mark, mask; + u_int8_t invert; +}; + +#endif /*_IPT_MARK_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_multiport.h b/include/linux/netfilter_ipv4/ipt_multiport.h new file mode 100644 index 000000000000..f6e50ae92793 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_multiport.h @@ -0,0 +1,21 @@ +#ifndef _IPT_MULTIPORT_H +#define _IPT_MULTIPORT_H +#include + +enum ipt_multiport_flags +{ + IPT_MULTIPORT_SOURCE, + IPT_MULTIPORT_DESTINATION, + IPT_MULTIPORT_EITHER +}; + +#define IPT_MULTI_PORTS 15 + +/* Must fit inside union ipt_matchinfo: 16 bytes */ +struct ipt_multiport +{ + u_int8_t flags; /* Type of comparison */ + u_int8_t count; /* Number of ports */ + u_int16_t ports[IPT_MULTI_PORTS]; /* Ports */ +}; +#endif /*_IPT_MULTIPORT_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_owner.h b/include/linux/netfilter_ipv4/ipt_owner.h new file mode 100644 index 000000000000..b014ef8b378b --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_owner.h @@ -0,0 +1,18 @@ +#ifndef _IPT_OWNER_H +#define _IPT_OWNER_H + +/* match and invert flags */ +#define IPT_OWNER_UID 0x01 +#define IPT_OWNER_GID 0x02 +#define IPT_OWNER_PID 0x04 +#define IPT_OWNER_SID 0x08 + +struct ipt_owner_info { + uid_t uid; + gid_t gid; + pid_t pid; + pid_t sid; + u_int8_t match, invert; /* flags */ +}; + +#endif /*_IPT_OWNER_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_state.h b/include/linux/netfilter_ipv4/ipt_state.h new file mode 100644 index 000000000000..ad11d316a644 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_state.h @@ -0,0 +1,12 @@ +#ifndef _IPT_STATE_H +#define _IPT_STATE_H + +#define _IPT_STATE_BIT(ctinfo) (1 << ((ctinfo)+1)) +#define IPT_STATE_BIT(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? _IPT_STATE_BIT((ctinfo)-IP_CT_IS_REPLY) : _IPT_STATE_BIT(ctinfo)) +#define IPT_STATE_INVALID (1 << 0) + +struct ipt_state_info +{ + unsigned int statemask; +}; +#endif /*_IPT_STATE_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_tos.h b/include/linux/netfilter_ipv4/ipt_tos.h new file mode 100644 index 000000000000..a21f5df23c50 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_tos.h @@ -0,0 +1,13 @@ +#ifndef _IPT_TOS_H +#define _IPT_TOS_H + +struct ipt_tos_info { + u_int8_t tos; + u_int8_t invert; +}; + +#ifndef IPTOS_NORMALSVC +#define IPTOS_NORMALSVC 0 +#endif + +#endif /*_IPT_TOS_H*/ diff --git a/include/linux/netfilter_ipv4/listhelp.h b/include/linux/netfilter_ipv4/listhelp.h new file mode 100644 index 000000000000..ac6143b46025 --- /dev/null +++ b/include/linux/netfilter_ipv4/listhelp.h @@ -0,0 +1,114 @@ +#ifndef _LISTHELP_H +#define _LISTHELP_H +#include +#include + +/* Header to do more comprehensive job than linux/list.h; assume list + is first entry in structure. */ + +/* Return pointer to first true entry, if any, or NULL. A macro + required to allow inlining of cmpfn. */ +#define LIST_FIND(head, cmpfn, type, args...) \ +({ \ + const struct list_head *__i = (head); \ + \ + ASSERT_READ_LOCK(head); \ + do { \ + __i = __i->next; \ + if (__i == (head)) { \ + __i = NULL; \ + break; \ + } \ + } while (!cmpfn((const type)__i , ## args)); \ + (type)__i; \ +}) + +#define LIST_FIND_W(head, cmpfn, type, args...) \ +({ \ + const struct list_head *__i = (head); \ + \ + ASSERT_WRITE_LOCK(head); \ + do { \ + __i = __i->next; \ + if (__i == (head)) { \ + __i = NULL; \ + break; \ + } \ + } while (!cmpfn((type)__i , ## args)); \ + (type)__i; \ +}) + +extern inline int +__list_cmp_same(const void *p1, const void *p2) { return p1 == p2; } + +/* Is this entry in the list? */ +extern inline int +list_inlist(struct list_head *head, const void *entry) +{ + return LIST_FIND(head, __list_cmp_same, void *, entry) != NULL; +} + +/* Delete from list. */ +#ifdef CONFIG_NETFILTER_DEBUG +#define LIST_DELETE(head, oldentry) \ +do { \ + ASSERT_WRITE_LOCK(head); \ + if (!list_inlist(head, oldentry)) \ + printk("LIST_DELETE: %s:%u `%s'(%p) not in %s.\n", \ + __FILE__, __LINE__, #oldentry, oldentry, #head); \ + else list_del((struct list_head *)oldentry); \ +} while(0) +#else +#define LIST_DELETE(head, oldentry) list_del((struct list_head *)oldentry) +#endif + +/* Append. */ +extern inline void +list_append(struct list_head *head, void *new) +{ + ASSERT_WRITE_LOCK(head); + list_add((new), (head)->prev); +} + +/* Prepend. */ +extern inline void +list_prepend(struct list_head *head, void *new) +{ + ASSERT_WRITE_LOCK(head); + list_add(new, head); +} + +/* Insert according to ordering function; insert before first true. */ +#define LIST_INSERT(head, new, cmpfn) \ +do { \ + struct list_head *__i; \ + ASSERT_WRITE_LOCK(head); \ + for (__i = (head)->next; \ + !cmpfn((new), (typeof (new))__i) && __i != (head); \ + __i = __i->next); \ + list_add((struct list_head *)(new), __i->prev); \ +} while(0) + +/* If the field after the list_head is a nul-terminated string, you + can use these functions. */ +extern inline int __list_cmp_name(const void *i, const char *name) +{ + return strcmp(name, i+sizeof(struct list_head)) == 0; +} + +/* Returns false if same name already in list, otherwise does insert. */ +extern inline int +list_named_insert(struct list_head *head, void *new) +{ + if (LIST_FIND(head, __list_cmp_name, void *, + new + sizeof(struct list_head))) + return 0; + list_prepend(head, new); + return 1; +} + +/* Find this named element in the list. */ +#define list_named_find(head, name) \ +LIST_FIND(head, __list_cmp_name, void *, name) + +#endif /*_LISTHELP_H*/ diff --git a/include/linux/netfilter_ipv4/lockhelp.h b/include/linux/netfilter_ipv4/lockhelp.h new file mode 100644 index 000000000000..89dd63f9f82f --- /dev/null +++ b/include/linux/netfilter_ipv4/lockhelp.h @@ -0,0 +1,129 @@ +#ifndef _LOCKHELP_H +#define _LOCKHELP_H +#include + +#include +#include +#include +#include + +/* Header to do help in lock debugging. */ + +#ifdef CONFIG_NETFILTER_DEBUG +struct spinlock_debug +{ + spinlock_t l; + atomic_t locked_by; +}; + +struct rwlock_debug +{ + rwlock_t l; + int read_locked_map; + int write_locked_map; +}; + +#define DECLARE_LOCK(l) \ +struct spinlock_debug l = { SPIN_LOCK_UNLOCKED, ATOMIC_INIT(-1) } +#define DECLARE_LOCK_EXTERN(l) \ +extern struct spinlock_debug l +#define DECLARE_RWLOCK(l) \ +struct rwlock_debug l = { RW_LOCK_UNLOCKED, 0, 0 } +#define DECLARE_RWLOCK_EXTERN(l) \ +extern struct rwlock_debug l + +#define MUST_BE_LOCKED(l) \ +do { if (atomic_read(&(l)->locked_by) != smp_processor_id()) \ + printk("ASSERT %s:%u %s unlocked\n", __FILE__, __LINE__, #l); \ +} while(0) + +#define MUST_BE_UNLOCKED(l) \ +do { if (atomic_read(&(l)->locked_by) == smp_processor_id()) \ + printk("ASSERT %s:%u %s locked\n", __FILE__, __LINE__, #l); \ +} while(0) + +/* Write locked OK as well. */ \ +#define MUST_BE_READ_LOCKED(l) \ +do { if (!((l)->read_locked_map & (1 << smp_processor_id())) \ + && !((l)->write_locked_map & (1 << smp_processor_id()))) \ + printk("ASSERT %s:%u %s not readlocked\n", __FILE__, __LINE__, #l); \ +} while(0) + +#define MUST_BE_WRITE_LOCKED(l) \ +do { if (!((l)->write_locked_map & (1 << smp_processor_id()))) \ + printk("ASSERT %s:%u %s not writelocked\n", __FILE__, __LINE__, #l); \ +} while(0) + +#define MUST_BE_READ_WRITE_UNLOCKED(l) \ +do { if ((l)->read_locked_map & (1 << smp_processor_id())) \ + printk("ASSERT %s:%u %s readlocked\n", __FILE__, __LINE__, #l); \ + else if ((l)->write_locked_map & (1 << smp_processor_id())) \ + printk("ASSERT %s:%u %s writelocked\n", __FILE__, __LINE__, #l); \ +} while(0) + +#define LOCK_BH(lk) \ +do { \ + MUST_BE_UNLOCKED(lk); \ + spin_lock_bh(&(lk)->l); \ + atomic_set(&(lk)->locked_by, smp_processor_id()); \ +} while(0) + +#define UNLOCK_BH(lk) \ +do { \ + MUST_BE_LOCKED(lk); \ + atomic_set(&(lk)->locked_by, -1); \ + spin_unlock_bh(&(lk)->l); \ +} while(0) + +#define READ_LOCK(lk) \ +do { \ + MUST_BE_READ_WRITE_UNLOCKED(lk); \ + read_lock_bh(&(lk)->l); \ + set_bit(smp_processor_id(), &(lk)->read_locked_map); \ +} while(0) + +#define WRITE_LOCK(lk) \ +do { \ + MUST_BE_READ_WRITE_UNLOCKED(lk); \ + write_lock_bh(&(lk)->l); \ + set_bit(smp_processor_id(), &(lk)->write_locked_map); \ +} while(0) + +#define READ_UNLOCK(lk) \ +do { \ + if (!((lk)->read_locked_map & (1 << smp_processor_id()))) \ + printk("ASSERT: %s:%u %s not readlocked\n", \ + __FILE__, __LINE__, #lk); \ + clear_bit(smp_processor_id(), &(lk)->read_locked_map); \ + read_unlock_bh(&(lk)->l); \ +} while(0) + +#define WRITE_UNLOCK(lk) \ +do { \ + MUST_BE_WRITE_LOCKED(lk); \ + clear_bit(smp_processor_id(), &(lk)->write_locked_map); \ + write_unlock_bh(&(lk)->l); \ +} while(0) + +#else +#define DECLARE_LOCK(l) spinlock_t l = SPIN_LOCK_UNLOCKED +#define DECLARE_LOCK_EXTERN(l) extern spinlock_t l +#define DECLARE_RWLOCK(l) rwlock_t l = RW_LOCK_UNLOCKED +#define DECLARE_RWLOCK_EXTERN(l) extern rwlock_t l + +#define MUST_BE_LOCKED(l) +#define MUST_BE_UNLOCKED(l) +#define MUST_BE_READ_LOCKED(l) +#define MUST_BE_WRITE_LOCKED(l) +#define MUST_BE_READ_WRITE_UNLOCKED(l) + +#define LOCK_BH(l) spin_lock_bh(l) +#define UNLOCK_BH(l) spin_unlock_bh(l) + +#define READ_LOCK(l) read_lock_bh(l) +#define WRITE_LOCK(l) write_lock_bh(l) +#define READ_UNLOCK(l) read_unlock_bh(l) +#define WRITE_UNLOCK(l) write_unlock_bh(l) +#endif /*CONFIG_NETFILTER_DEBUG*/ + +#endif /* _LOCKHELP_H */ diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index a2f4897fc6f3..42663e79b86b 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -172,6 +172,9 @@ void nfsd_lockd_unexport(struct svc_client *); #define nfserr_badtype __constant_htonl(NFSERR_BADTYPE) #define nfserr_jukebox __constant_htonl(NFSERR_JUKEBOX) +/* Check for dir entries '.' and '..' */ +#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.')) + /* * Time of server startup */ diff --git a/include/linux/shm.h b/include/linux/shm.h index c1ab5240b9b9..bc56c5e20ba0 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -11,8 +11,8 @@ #define SHMMAX 0x2000000 /* max shared seg size (bytes) */ #define SHMMIN 0 /* min shared seg size (bytes) */ -#define SHMMNI 128 /* max num of segs system wide */ -#define SHMALL (SHMMAX/PAGE_SIZE*SHMMNI) /* max shm system wide (pages) */ +#define SHMMNI 4096 /* max num of segs system wide */ +#define SHMALL (SHMMAX/PAGE_SIZE*(SHMMNI/16)) /* max shm system wide (pages) */ #define SHMSEG SHMMNI /* max shared segs per process */ #include diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6081b089052e..429089cc5c5c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -122,8 +122,6 @@ struct sk_buff { #ifdef CONFIG_NETFILTER /* Can be used for communication between hooks. */ unsigned long nfmark; - /* Reason for doing this to the packet (see netfilter.h) */ - __u32 nfreason; /* Cache info */ __u32 nfcache; /* Associated connection, if any */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 569b8b6ea1e5..dacdfa2a741e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -669,6 +669,8 @@ extern void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb); +extern void tcp_v4_send_reset(struct sk_buff *skb); + extern int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); diff --git a/ipc/shm.c b/ipc/shm.c index 2a89ce4b5015..4bd7834cc2ad 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -67,6 +67,7 @@ struct shmid_kernel /* private to the kernel */ unsigned long shm_npages; /* size of segment (pages) */ pte_t **shm_dir; /* ptr to arr of ptrs to frames */ int id; + int destroyed; /* set if the final detach kills */ union permap { struct shmem { time_t atime; @@ -101,7 +102,6 @@ static struct ipc_ids shm_ids; #define shm_lockall() ipc_lockall(&shm_ids) #define shm_unlockall() ipc_unlockall(&shm_ids) #define shm_get(id) ((struct shmid_kernel*)ipc_get(&shm_ids,id)) -#define shm_rmid(id) ((struct shmid_kernel*)ipc_rmid(&shm_ids,id)) #define shm_checkid(s, id) \ ipc_checkid(&shm_ids,&s->shm_perm,id) #define shm_buildid(id, seq) \ @@ -111,6 +111,7 @@ static int newseg (key_t key, const char *name, int namelen, int shmflg, size_t static void killseg_core(struct shmid_kernel *shp, int doacc); static void shm_open (struct vm_area_struct *shmd); static void shm_close (struct vm_area_struct *shmd); +static void shm_remove_name(int id); static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int); static int shm_swapout(struct page *, struct file *); #ifdef CONFIG_PROC_FS @@ -262,7 +263,7 @@ static struct super_block *shm_read_super(struct super_block *s,void *data, struct inode * root_inode; if (shm_sb) { - printk ("shm fs already mounted\n"); + printk(KERN_ERR "shm fs already mounted\n"); return NULL; } @@ -270,7 +271,7 @@ static struct super_block *shm_read_super(struct super_block *s,void *data, shm_ctlmni = SHMMNI; shm_mode = S_IRWXUGO | S_ISVTX; if (shm_parse_options (data)) { - printk ("shm fs invalid option\n"); + printk(KERN_ERR "shm fs invalid option\n"); goto out_unlock; } @@ -293,7 +294,7 @@ static struct super_block *shm_read_super(struct super_block *s,void *data, return s; out_no_root: - printk("proc_read_super: get root inode failed\n"); + printk(KERN_ERR "proc_read_super: get root inode failed\n"); iput(root_inode); out_unlock: return NULL; @@ -306,6 +307,16 @@ static int shm_remount_fs (struct super_block *sb, int *flags, char *data) return 0; } +static inline struct shmid_kernel *shm_rmid(int id) +{ + return (struct shmid_kernel *)ipc_rmid(&shm_ids,id); +} + +static __inline__ int shm_addid(struct shmid_kernel *shp) +{ + return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni+1); +} + static void shm_put_super(struct super_block *sb) { struct super_block **p = &shm_sb; @@ -325,7 +336,7 @@ static void shm_put_super(struct super_block *sb) if (!(shp = shm_lock (i))) continue; if (shp->shm_nattch) - printk ("shm_nattch = %ld\n", shp->shm_nattch); + printk(KERN_DEBUG "shm_nattch = %ld\n", shp->shm_nattch); shp = shm_rmid(i); shm_unlock(i); killseg_core(shp, 1); @@ -661,12 +672,13 @@ static int newseg (key_t key, const char *name, int namelen, if (size > shm_ctlmax) return -EINVAL; + if (shm_tot + numpages >= shm_ctlall) return -ENOSPC; if (!(shp = newseg_alloc(numpages, namelen ? namelen : SHM_FMT_LEN + 1))) return -ENOMEM; - id = ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni+1); + id = shm_addid(shp); if(id == -1) { shm_free(shp->shm_dir,numpages); kfree(shp); @@ -1002,15 +1014,49 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) } case IPC_RMID: { - char name[SHM_FMT_LEN+1]; - if ((shmid % SEQ_MULTIPLIER)== zero_id) + /* + * We cannot simply remove the file. The SVID states + * that the block remains until the last person + * detaches from it, then is deleted. A shmat() on + * an RMID segment is legal in older Linux and if + * we change it apps break... + * + * Instead we set a destroyed flag, and then blow + * the name away when the usage hits zero. + */ + if ((shmid % SEQ_MULTIPLIER) == zero_id) return -EINVAL; - sprintf (name, SHM_FMT, shmid); lock_kernel(); - err = do_unlink (name, dget(shm_sb->s_root)); + down(&shm_ids.sem); + shp = shm_lock(shmid); + if (shp == NULL) { + unlock_kernel(); + return -EINVAL; + } + err = -EIDRM; + if (shm_checkid(shp, shmid) == 0) { + if (shp->shm_nattch == 0) { + int id=shp->id; + shm_unlock(shmid); + /* The kernel lock prevents new attaches from + * being happening. We can't hold shm_lock here + * else we will deadlock in shm_lookup when we + * try to recursively grab it. + */ + shm_remove_name(id); + } else { + /* Do not find me any more */ + shp->destroyed = 1; + shp->shm_perm.key = IPC_PRIVATE; /* Do not find it any more */ + /* Unlock */ + shm_unlock(shmid); + } + err = 0; + } else { + shm_unlock(shmid); + } + up(&shm_ids.sem); unlock_kernel(); - if (err == -ENOENT) - err = -EINVAL; return err; } @@ -1136,6 +1182,19 @@ static void shm_open (struct vm_area_struct *shmd) shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino); } +/* + * Remove a name. Must be called with lock_kernel + */ + +static void shm_remove_name(int id) +{ + char name[SHM_FMT_LEN+1]; + sprintf (name, SHM_FMT, id); + if(do_unlink (name, dget(shm_sb->s_root))) + printk(KERN_ERR "Unlink of SHM object '%s' failed.\n", + name); +} + /* * remove the attach descriptor shmd. * free memory for segment if it is marked destroyed. @@ -1147,13 +1206,30 @@ static void shm_close (struct vm_area_struct *shmd) int id = shmd->vm_file->f_dentry->d_inode->i_ino; struct shmid_kernel *shp; + lock_kernel(); + /* remove from the list of attaches of the shm segment */ if(!(shp = shm_lock(id))) BUG(); shp->shm_lprid = current->pid; shp->shm_dtim = CURRENT_TIME; shp->shm_nattch--; - shm_unlock(id); + if(shp->shm_nattch == 0 && shp->destroyed) { + int pid=shp->id; + shp->destroyed = 0; + shm_unlock(id); + + /* The kernel lock prevents new attaches from + * being happening. We can't hold shm_lock here + * else we will deadlock in shm_lookup when we + * try to recursively grab it. + */ + shm_remove_name(pid); + } else { + shm_unlock(id); + } + + unlock_kernel(); } /* diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 64a9a8a6df41..b6f94f55f2c1 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -141,6 +141,7 @@ EXPORT_SYMBOL(iget4); EXPORT_SYMBOL(iput); EXPORT_SYMBOL(__namei); EXPORT_SYMBOL(lookup_dentry); +EXPORT_SYMBOL(lookup_one); EXPORT_SYMBOL(__open_namei); EXPORT_SYMBOL(sys_close); EXPORT_SYMBOL(d_alloc_root); @@ -505,3 +506,6 @@ EXPORT_SYMBOL(tasklet_kill); /* init task, for moving kthread roots - ought to export a function ?? */ EXPORT_SYMBOL(init_task_union); + +EXPORT_SYMBOL(tasklist_lock); +EXPORT_SYMBOL(pidhash); diff --git a/mm/filemap.c b/mm/filemap.c index 124ddfae3f4d..1b9f9ed6ae76 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -108,14 +108,15 @@ void invalidate_inode_pages(struct inode * inode) curr = curr->next; /* We cannot invalidate a locked page */ - if (PageLocked(page)) + if (TryLockPage(page)) continue; lru_cache_del(page); - remove_page_from_inode_queue(page); remove_page_from_hash_queue(page); page->mapping = NULL; + UnlockPage(page); + page_cache_release(page); } spin_unlock(&pagecache_lock); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0204cf1412d8..1205ab835f3a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -58,6 +58,8 @@ static int zone_balance_max[MAX_NR_ZONES] = { 255 , 255, 255, }; */ #define BAD_RANGE(zone,x) (((zone) != (x)->zone) || (((x)-mem_map) < (zone)->offset) || (((x)-mem_map) >= (zone)->offset+(zone)->size)) +#if 0 + static inline unsigned long classfree(zone_t *zone) { unsigned long free = 0; @@ -71,6 +73,8 @@ static inline unsigned long classfree(zone_t *zone) return(free); } +#endif + /* * Buddy system. Hairy. You really aren't expected to understand this * @@ -148,8 +152,10 @@ void __free_pages_ok (struct page *page, unsigned long order) spin_unlock_irqrestore(&zone->lock, flags); - if (classfree(zone) > zone->pages_high) + if (zone->free_pages > zone->pages_high) { zone->zone_wake_kswapd = 0; + zone->low_on_memory = 0; + } } #define MARK_USED(index, order, area) \ @@ -176,7 +182,8 @@ static inline struct page * expand (zone_t *zone, struct page *page, return page; } -static inline struct page * rmqueue (zone_t *zone, unsigned long order) +static FASTCALL(struct page * rmqueue(zone_t *zone, unsigned long order)); +static struct page * rmqueue(zone_t *zone, unsigned long order) { free_area_t * area = zone->free_area + order; unsigned long curr_order = order; @@ -216,19 +223,43 @@ static inline struct page * rmqueue (zone_t *zone, unsigned long order) return NULL; } -static inline int zone_balance_memory (zone_t *zone, int gfp_mask) +static int zone_balance_memory(zonelist_t *zonelist) { - int freed; + int tried = 0, freed = 0; + zone_t **zone; + int gfp_mask = zonelist->gfp_mask; + extern wait_queue_head_t kswapd_wait; - /* - * In the atomic allocation case we only 'kick' the - * state machine, but do not try to free pages - * ourselves. - */ - freed = try_to_free_pages(gfp_mask, zone); + zone = zonelist->zones; + for (;;) { + zone_t *z = *(zone++); + if (!z) + break; + if (z->free_pages > z->pages_low) + continue; - if (!freed && !(gfp_mask & __GFP_HIGH)) - return 0; + z->zone_wake_kswapd = 1; + wake_up_interruptible(&kswapd_wait); + + /* Are we reaching the critical stage? */ + if (!z->low_on_memory) { + /* Not yet critical, so let kswapd handle it.. */ + if (z->free_pages > z->pages_min) + continue; + z->low_on_memory = 1; + } + /* + * In the atomic allocation case we only 'kick' the + * state machine, but do not try to free pages + * ourselves. + */ + tried = 1; + freed |= try_to_free_pages(gfp_mask, z); + } + if (tried && !freed) { + if (!(gfp_mask & __GFP_HIGH)) + return 0; + } return 1; } @@ -237,9 +268,7 @@ static inline int zone_balance_memory (zone_t *zone, int gfp_mask) */ struct page * __alloc_pages (zonelist_t *zonelist, unsigned long order) { - zone_t **zone, *z; - struct page *page; - int gfp_mask; + zone_t **zone = zonelist->zones; /* * (If anyone calls gfp from interrupts nonatomically then it @@ -248,10 +277,8 @@ struct page * __alloc_pages (zonelist_t *zonelist, unsigned long order) * We are falling back to lower-level zones if allocation * in a higher zone fails. */ - zone = zonelist->zones; - gfp_mask = zonelist->gfp_mask; for (;;) { - z = *(zone++); + zone_t *z = *(zone++); if (!z) break; if (!z->size) @@ -261,23 +288,10 @@ struct page * __alloc_pages (zonelist_t *zonelist, unsigned long order) * do our best to just allocate things without * further thought. */ - if (!(current->flags & PF_MEMALLOC)) - { - unsigned long free = classfree(z); - - if (free <= z->pages_high) - { - extern wait_queue_head_t kswapd_wait; - - z->zone_wake_kswapd = 1; - wake_up_interruptible(&kswapd_wait); - - if (free <= z->pages_min) - z->low_on_memory = 1; - - if (z->low_on_memory) - goto balance; - } + if (!(current->flags & PF_MEMALLOC)) { + /* Are we low on memory? */ + if (z->free_pages <= z->pages_low) + continue; } /* * This is an optimization for the 'higher order zone @@ -287,24 +301,30 @@ struct page * __alloc_pages (zonelist_t *zonelist, unsigned long order) * we do not take the spinlock and it's not exact for * the higher order case, but will do it for most things.) */ -ready: if (z->free_pages) { - page = rmqueue(z, order); + struct page *page = rmqueue(z, order); if (page) return page; } } - -nopage: + if (zone_balance_memory(zonelist)) { + zone = zonelist->zones; + for (;;) { + zone_t *z = *(zone++); + if (!z) + break; + if (z->free_pages) { + struct page *page = rmqueue(z, order); + if (page) + return page; + } + } + } return NULL; /* * The main chunk of the balancing code is in this offline branch: */ -balance: - if (!zone_balance_memory(z, gfp_mask)) - goto nopage; - goto ready; } /* @@ -549,7 +569,7 @@ void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap, zone->offset = offset; cumulative += size; - mask = (cumulative / zone_balance_ratio[j]); + mask = (size / zone_balance_ratio[j]); if (mask < zone_balance_min[j]) mask = zone_balance_min[j]; else if (mask > zone_balance_max[j]) diff --git a/net/Config.in b/net/Config.in index 624885478fa2..ce5b6faa9411 100644 --- a/net/Config.in +++ b/net/Config.in @@ -13,9 +13,9 @@ if [ "$CONFIG_NETLINK" = "y" ]; then tristate ' Netlink device emulation' CONFIG_NETLINK_DEV fi bool 'Network packet filtering (replaces ipchains)' CONFIG_NETFILTER -if [ "$CONFIG_NETFILTER" = "y" ]; then - bool ' Network packet filtering debugging' CONFIG_NETFILTER_DEBUG -fi +#if [ "$CONFIG_NETFILTER" = "y" ]; then +# bool ' Network packet filtering debugging' CONFIG_NETFILTER_DEBUG +#fi bool 'Socket Filtering' CONFIG_FILTER tristate 'Unix domain sockets' CONFIG_UNIX bool 'TCP/IP networking' CONFIG_INET diff --git a/net/Makefile b/net/Makefile index bf234eae145a..44b34d799b14 100644 --- a/net/Makefile +++ b/net/Makefile @@ -20,6 +20,10 @@ endif ifeq ($(CONFIG_INET),y) SUB_DIRS += ipv4 +ifeq ($(CONFIG_NETFILTER),y) +SUB_DIRS += ipv4/netfilter +MOD_SUB_DIRS += ipv4/netfilter +endif endif ifeq ($(CONFIG_UNIX),y) @@ -198,7 +202,7 @@ endif endif L_TARGET := network.a -L_OBJS := $(SOCK) protocols.o $(join $(SUB_DIRS),$(SUB_DIRS:%=/%.o)) +L_OBJS := $(SOCK) protocols.o $(join $(SUB_DIRS), $(patsubst %,/%.o,$(notdir $(SUB_DIRS)))) M_OBJS := diff --git a/net/core/netfilter.c b/net/core/netfilter.c index 18f697755aac..02c3bc9892fa 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -4,9 +4,10 @@ * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any * way. * - * Rusty Russell (C)1998 -- This code is GPL. + * Rusty Russell (C)2000 -- This code is GPL. * * February 2000: Modified by James Morris to have 1 queue per protocol. + * 15-Mar-2000: Added NF_REPEAT --RR. */ #include #include @@ -56,8 +57,6 @@ int nf_register_hook(struct nf_hook_ops *reg) { struct list_head *i; - NFDEBUG("nf_register_hook: pf=%i hook=%u.\n", reg->pf, reg->hooknum); - br_write_lock_bh(BR_NETPROTO_LOCK); for (i = nf_hooks[reg->pf][reg->hooknum].next; i != &nf_hooks[reg->pf][reg->hooknum]; @@ -119,7 +118,16 @@ out: void nf_unregister_sockopt(struct nf_sockopt_ops *reg) { /* No point being interruptible: we're probably in cleanup_module() */ + restart: down(&nf_sockopt_mutex); + if (reg->use != 0) { + /* To be woken by nf_sockopt call... */ + reg->cleanup_task = current; + up(&nf_sockopt_mutex); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); + goto restart; + } list_del(®->list); up(&nf_sockopt_mutex); } @@ -178,7 +186,7 @@ void nf_dump_skb(int pf, struct sk_buff *skb) dst_port = ntohs(tcp->dest); } - printk("PROTO=%d %ld.%ld.%ld.%ld:%hu %ld.%ld.%ld.%ld:%hu" + printk("PROTO=%d %d.%d.%d.%d:%hu %d.%d.%d.%d:%hu" " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu", ip->protocol, (ntohl(ip->saddr)>>24)&0xFF, @@ -261,9 +269,16 @@ void nf_debug_ip_finish_output2(struct sk_buff *skb) if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_FORWARD) | (1 << NF_IP_POST_ROUTING))) { - printk("ip_finish_output: bad unowned skb = %p: ",skb); - debug_print_hooks_ip(skb->nf_debug); - nf_dump_skb(PF_INET, skb); + /* Fragments will have no owners, but still + may be local */ + if (!(skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) + || skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) + | (1 << NF_IP_POST_ROUTING))){ + printk("ip_finish_output:" + " bad unowned skb = %p: ",skb); + debug_print_hooks_ip(skb->nf_debug); + nf_dump_skb(PF_INET, skb); + } } } } @@ -274,31 +289,42 @@ static int nf_sockopt(struct sock *sk, int pf, int val, char *opt, int *len, int get) { struct list_head *i; + struct nf_sockopt_ops *ops; int ret; if (down_interruptible(&nf_sockopt_mutex) != 0) return -EINTR; for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) { - struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i; + ops = (struct nf_sockopt_ops *)i; if (ops->pf == pf) { if (get) { if (val >= ops->get_optmin && val < ops->get_optmax) { + ops->use++; + up(&nf_sockopt_mutex); ret = ops->get(sk, val, opt, len); goto out; } } else { if (val >= ops->set_optmin && val < ops->set_optmax) { + ops->use++; + up(&nf_sockopt_mutex); ret = ops->set(sk, val, opt, *len); goto out; } } } } - ret = -ENOPROTOOPT; + up(&nf_sockopt_mutex); + return -ENOPROTOOPT; + out: + down(&nf_sockopt_mutex); + ops->use--; + if (ops->cleanup_task) + wake_up_process(ops->cleanup_task); up(&nf_sockopt_mutex); return ret; } @@ -334,6 +360,10 @@ static unsigned int nf_iterate(struct list_head *head, case NF_DROP: return NF_DROP; + case NF_REPEAT: + *i = (*i)->prev; + break; + #ifdef CONFIG_NETFILTER_DEBUG case NF_ACCEPT: break; @@ -367,7 +397,6 @@ int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) /* The caller must flush their queue before this */ int nf_unregister_queue_handler(int pf) { - NFDEBUG("Unregistering Netfilter queue handler for pf=%d\n", pf); br_write_lock_bh(BR_NETPROTO_LOCK); queue_handler[pf].outfn = NULL; queue_handler[pf].data = NULL; @@ -390,7 +419,6 @@ static void nf_queue(struct sk_buff *skb, struct nf_info *info; if (!queue_handler[pf].outfn) { - NFDEBUG("nf_queue: noone wants the packet, dropping it.\n"); kfree_skb(skb); return; } @@ -432,6 +460,14 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, unsigned int verdict; int ret = 0; +#ifdef CONFIG_NETFILTER_DEBUG + if (skb->nf_debug & (1 << hook)) { + printk("nf_hook: hook %i already set.\n", hook); + nf_dump_skb(pf, skb); + } + skb->nf_debug |= (1 << hook); +#endif + elem = &nf_hooks[pf][hook]; verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev, outdev, &elem, okfn); @@ -473,6 +509,11 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, } /* Continue traversal iff userspace said ok... */ + if (verdict == NF_REPEAT) { + elem = elem->prev; + verdict = NF_ACCEPT; + } + if (verdict == NF_ACCEPT) { verdict = nf_iterate(&nf_hooks[info->pf][info->hook], &skb, info->hook, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ecda47d7a470..dad1f3925745 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4,7 +4,7 @@ * Authors: Alan Cox * Florian La Roche * - * Version: $Id: skbuff.c,v 1.69 2000/03/06 03:47:58 davem Exp $ + * Version: $Id: skbuff.c,v 1.70 2000/03/17 14:41:39 davem Exp $ * * Fixes: * Alan Cox : Fixed the worst of the load balancer bugs. @@ -203,7 +203,7 @@ static inline void skb_headerinit(void *p, kmem_cache_t *cache, skb->dst = NULL; skb->rx_dev = NULL; #ifdef CONFIG_NETFILTER - skb->nfmark = skb->nfreason = skb->nfcache = 0; + skb->nfmark = skb->nfcache = 0; skb->nfct = NULL; #ifdef CONFIG_NETFILTER_DEBUG skb->nf_debug = 0; @@ -319,7 +319,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->security=old->security; #ifdef CONFIG_NETFILTER new->nfmark=old->nfmark; - new->nfreason=old->nfreason; new->nfcache=old->nfcache; new->nfct=old->nfct; nf_conntrack_get(new->nfct); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5e54a6fa852b..2ba5f2f6c189 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -805,10 +805,7 @@ non_local_input: key.scope = RT_SCOPE_UNIVERSE; #ifdef CONFIG_DECNET_ROUTE_FWMASK - if (skb->nfreason == NF_REASON_FOR_ROUTING) - key.fwmark = skb->fwmark; - else - key.fwmark = 0; + key.fwmark = skb->fwmark; #else key.fwmark = 0; #endif @@ -886,9 +883,7 @@ int dn_route_input(struct sk_buff *skb) (rt->key.daddr == cb->dst) && (rt->key.oif == 0) && #ifdef CONFIG_DECNET_ROUTE_FWMASK - (rt->key.fwmark == (skb->nfreason == - NF_REASON_FOR_ROUTING - ? skb->nfmark : 0)) && + (rt->key.fwmark == skb->nfmark) && #endif (rt->key.iif == cb->iif)) { rt->u.dst.lastuse = jiffies; diff --git a/net/ipv4/Config.in b/net/ipv4/Config.in index 32e2aca16762..68fea0272937 100644 --- a/net/ipv4/Config.in +++ b/net/ipv4/Config.in @@ -9,7 +9,7 @@ if [ "$CONFIG_IP_ADVANCED_ROUTER" = "y" ]; then bool ' IP: policy routing' CONFIG_IP_MULTIPLE_TABLES if [ "$CONFIG_IP_MULTIPLE_TABLES" = "y" ]; then if [ "$CONFIG_NETFILTER" = "y" ]; then - bool ' IP: use FWMARK value as routing key' CONFIG_IP_ROUTE_FWMARK + bool ' IP: use netfilter MARK value as routing key' CONFIG_IP_ROUTE_FWMARK fi bool ' IP: fast network address translation' CONFIG_IP_ROUTE_NAT fi @@ -53,3 +53,6 @@ bool ' IP: Allow large windows (not recommended if <16Mb of memory)' CONFIG_SKB #if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then # bool ' IP: support checksum copy to user for UDP (EXPERIMENTAL)' CONFIG_UDP_DELAY_CSUM #fi +if [ "$CONFIG_NETFILTER" != "n" ]; then + source net/ipv4/netfilter/Config.in +fi diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index d7da63f4ef44..7561e190b634 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -3,7 +3,7 @@ * * Alan Cox, * - * Version: $Id: icmp.c,v 1.65 2000/02/22 23:54:25 davem Exp $ + * Version: $Id: icmp.c,v 1.66 2000/03/17 14:41:50 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -331,20 +331,6 @@ struct icmp_control static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; -/* - * Build xmit assembly blocks - */ - -struct icmp_bxm -{ - void *data_ptr; - int data_len; - struct icmphdr icmph; - unsigned long csum; - struct ip_options replyopts; - unsigned char optbuf[40]; -}; - /* * The ICMP socket. This is the most convenient way to flow control * our ICMP output as well as maintain a clean interface throughout @@ -508,7 +494,7 @@ static int icmp_glue_bits(const void *p, char *to, unsigned int offset, unsigned * Driving logic for building and sending ICMP messages. */ -static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) +void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { struct sock *sk=icmp_socket->sk; struct ipcm_cookie ipc; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 8d651b0422b5..01a39b6e4102 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -599,6 +599,10 @@ int ipgre_rcv(struct sk_buff *skb, unsigned short len) skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif netif_rx(skb); read_unlock(&ipgre_lock); return(0); @@ -818,6 +822,11 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) ip_select_ident(iph, &rt->u.dst); ip_send_check(iph); +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif + stats->tx_bytes += skb->len; stats->tx_packets++; ip_send(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 26d025d3280d..f3013ca573f3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -5,7 +5,7 @@ * * The Internet Protocol (IP) output module. * - * Version: $Id: ip_output.c,v 1.81 2000/03/06 03:48:01 davem Exp $ + * Version: $Id: ip_output.c,v 1.82 2000/03/17 14:41:50 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -894,6 +894,9 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) /* Connection association is same as pre-frag packet */ skb2->nfct = skb->nfct; nf_conntrack_get(skb2->nfct); +#ifdef CONFIG_NETFILTER_DEBUG + skb2->nf_debug = skb->nf_debug; +#endif #endif /* diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 5518ec1cb283..25fec33fb692 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -1,7 +1,7 @@ /* * Linux NET3: IP/IP protocol decoder. * - * Version: $Id: ipip.c,v 1.30 2000/01/06 00:41:55 davem Exp $ + * Version: $Id: ipip.c,v 1.31 2000/03/17 14:41:51 davem Exp $ * * Authors: * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 @@ -483,6 +483,10 @@ int ipip_rcv(struct sk_buff *skb, unsigned short len) skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif netif_rx(skb); read_unlock(&ipip_lock); return 0; @@ -619,6 +623,11 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) ip_select_ident(iph, &rt->u.dst); ip_send_check(iph); +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif + stats->tx_bytes += skb->len; stats->tx_packets++; ip_send(skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index fce5a43f8ba5..1e33ec4ca1a2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -9,7 +9,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Version: $Id: ipmr.c,v 1.50 2000/01/09 02:19:32 davem Exp $ + * Version: $Id: ipmr.c,v 1.51 2000/03/17 14:41:52 davem Exp $ * * Fixes: * Michael Chastain : Incorrect size of copying. @@ -1100,6 +1100,10 @@ static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr) skb->h.ipiph = skb->nh.iph; skb->nh.iph = iph; +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif } static inline int ipmr_forward_finish(struct sk_buff *skb) @@ -1433,6 +1437,10 @@ int pim_rcv_v1(struct sk_buff * skb, unsigned short len) skb->dst = NULL; ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; ((struct net_device_stats*)reg_dev->priv)->rx_packets++; +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif netif_rx(skb); dev_put(reg_dev); return 0; @@ -1488,6 +1496,10 @@ int pim_rcv(struct sk_buff * skb, unsigned short len) ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; ((struct net_device_stats*)reg_dev->priv)->rx_packets++; skb->dst = NULL; +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif netif_rx(skb); dev_put(reg_dev); return 0; diff --git a/net/ipv4/netfilter/Config.in b/net/ipv4/netfilter/Config.in new file mode 100644 index 000000000000..bf2a2826911b --- /dev/null +++ b/net/ipv4/netfilter/Config.in @@ -0,0 +1,64 @@ +# +# IP netfilter configuration +# +mainmenu_option next_comment +comment ' IP: Netfilter Configuration' + +tristate 'Connection tracking (required for masq/NAT)' CONFIG_IP_NF_CONNTRACK +if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then + dep_tristate ' FTP protocol support' CONFIG_IP_NF_FTP $CONFIG_IP_NF_CONNTRACK +fi + +if [ "$CONFIG_EXPERIMENTAL" = "y" -a "$CONFIG_NETLINK" = "y" ]; then + tristate 'Userspace queueing via NETLINK (EXPERIMENTAL)' CONFIG_IP_NF_QUEUE +fi +tristate 'IP tables support (required for filtering/masq/NAT)' CONFIG_IP_NF_IPTABLES +if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then +# The simple matches. + dep_tristate ' limit match support' CONFIG_IP_NF_MATCH_LIMIT $CONFIG_IP_NF_IPTABLES + dep_tristate ' MAC address match support' CONFIG_IP_NF_MATCH_MAC $CONFIG_IP_NF_IPTABLES + dep_tristate ' netfilter MARK match support' CONFIG_IP_NF_MATCH_MARK $CONFIG_IP_NF_IPTABLES + dep_tristate ' Multiple port match support' CONFIG_IP_NF_MATCH_MULTIPORT $CONFIG_IP_NF_IPTABLES + dep_tristate ' TOS match support' CONFIG_IP_NF_MATCH_TOS $CONFIG_IP_NF_IPTABLES + if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then + dep_tristate ' Connection state match support' CONFIG_IP_NF_MATCH_STATE $CONFIG_IP_NF_CONNTRACK $CONFIG_IP_NF_IPTABLES + fi + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + dep_tristate ' Unclean match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_UNCLEAN $CONFIG_IP_NF_IPTABLES + dep_tristate ' Owner match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_OWNER $CONFIG_IP_NF_IPTABLES + fi +# The targets + dep_tristate ' Packet filtering' CONFIG_IP_NF_FILTER $CONFIG_IP_NF_IPTABLES + if [ "$CONFIG_IP_NF_FILTER" != "n" ]; then + dep_tristate ' REJECT target support' CONFIG_IP_NF_TARGET_REJECT $CONFIG_IP_NF_FILTER + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + dep_tristate ' MIRROR target support (EXPERIMENTAL)' CONFIG_IP_NF_TARGET_MIRROR $CONFIG_IP_NF_FILTER + fi + fi + + if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then + dep_tristate ' Full NAT' CONFIG_IP_NF_NAT $CONFIG_IP_NF_IPTABLES + if [ "$CONFIG_IP_NF_NAT" != "n" ]; then + dep_tristate ' MASQUERADE target support' CONFIG_IP_NF_TARGET_MASQUERADE $CONFIG_IP_NF_NAT + dep_tristate ' REDIRECT target support' CONFIG_IP_NF_TARGET_REDIRECT $CONFIG_IP_NF_NAT + fi + fi + + dep_tristate ' Packet mangling' CONFIG_IP_NF_MANGLE $CONFIG_IP_NF_IPTABLES + if [ "$CONFIG_IP_NF_MANGLE" != "n" ]; then + dep_tristate ' TOS target support' CONFIG_IP_NF_TARGET_TOS $CONFIG_IP_NF_MANGLE + dep_tristate ' MARK target support' CONFIG_IP_NF_TARGET_MARK $CONFIG_IP_NF_MANGLE + fi + dep_tristate ' LOG target support' CONFIG_IP_NF_TARGET_LOG $CONFIG_IP_NF_IPTABLES +fi + +# Backwards compatibility modules: only if you don't build in the others. +if [ "$CONFIG_IP_NF_CONNTRACK" != "y" ]; then + if [ "$CONFIG_IP_NF_IPTABLES" != "y" ]; then + tristate 'ipchains (2.2-style) support' CONFIG_IP_NF_COMPAT_IPCHAINS + if [ "$CONFIG_IP_NF_COMPAT_IPCHAINS" != "y" ]; then + tristate 'ipfwadm (2.0-style) support' CONFIG_IP_NF_COMPAT_IPFWADM + fi + fi +fi +endmenu diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile new file mode 100644 index 000000000000..41a61e010b8c --- /dev/null +++ b/net/ipv4/netfilter/Makefile @@ -0,0 +1,234 @@ +# +# Makefile for the netfilter modules on top of IPv4. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +O_TARGET := netfilter.o +MOD_LIST_NAME := IPV4_MODULES +M_OBJS := + +IP_NF_CONNTRACK_OBJ:=ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o + +IP_NF_NAT_OBJ:=ip_nat_core.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o + +# Link order matters here. +ifeq ($(CONFIG_IP_NF_CONNTRACK),y) +OX_OBJS += ip_conntrack_standalone.o +O_OBJS += $(IP_NF_CONNTRACK_OBJ) +else + ifeq ($(CONFIG_IP_NF_CONNTRACK),m) + M_OBJS += ip_conntrack.o + endif +endif + +ifeq ($(CONFIG_IP_NF_QUEUE),y) +O_OBJS += ip_queue.o +else + ifeq ($(CONFIG_IP_NF_QUEUE),m) + M_OBJS += ip_queue.o + endif +endif + +ifeq ($(CONFIG_IP_NF_FTP),y) +OX_OBJS += ip_conntrack_ftp.o +else + ifeq ($(CONFIG_IP_NF_FTP),m) + M_OBJS += ip_conntrack_ftp.o + endif +endif + +ifeq ($(CONFIG_IP_NF_IPTABLES),y) +O_OBJS += ip_tables.o +else + ifeq ($(CONFIG_IP_NF_IPTABLES),m) + M_OBJS += ip_tables.o + endif +endif + +ifeq ($(CONFIG_IP_NF_MATCH_LIMIT),y) +O_OBJS += ipt_limit.o +else + ifeq ($(CONFIG_IP_NF_MATCH_LIMIT),m) + M_OBJS += ipt_limit.o + endif +endif + +ifeq ($(CONFIG_IP_NF_MATCH_MARK),y) +O_OBJS += ipt_mark.o +else + ifeq ($(CONFIG_IP_NF_MATCH_MARK),m) + M_OBJS += ipt_mark.o + endif +endif + +ifeq ($(CONFIG_IP_NF_MATCH_MAC),y) +O_OBJS += ipt_mac.o +else + ifeq ($(CONFIG_IP_NF_MATCH_MAC),m) + M_OBJS += ipt_mac.o + endif +endif + +ifeq ($(CONFIG_IP_NF_MATCH_MULTIPORT),y) +O_OBJS += ipt_multiport.o +else + ifeq ($(CONFIG_IP_NF_MATCH_MULTIPORT),m) + M_OBJS += ipt_multiport.o + endif +endif + +ifeq ($(CONFIG_IP_NF_MATCH_OWNER),y) +O_OBJS += ipt_owner.o +else + ifeq ($(CONFIG_IP_NF_MATCH_OWNER),m) + M_OBJS += ipt_owner.o + endif +endif + +ifeq ($(CONFIG_IP_NF_MATCH_TOS),y) +O_OBJS += ipt_tos.o +else + ifeq ($(CONFIG_IP_NF_MATCH_TOS),m) + M_OBJS += ipt_tos.o + endif +endif + +ifeq ($(CONFIG_IP_NF_MATCH_STATE),y) +O_OBJS += ipt_state.o +else + ifeq ($(CONFIG_IP_NF_MATCH_STATE),m) + M_OBJS += ipt_state.o + endif +endif + +ifeq ($(CONFIG_IP_NF_MATCH_UNCLEAN),y) +O_OBJS += ipt_unclean.o +else + ifeq ($(CONFIG_IP_NF_MATCH_UNCLEAN),m) + M_OBJS += ipt_unclean.o + endif +endif + +ifeq ($(CONFIG_IP_NF_FILTER),y) +O_OBJS += iptable_filter.o +else + ifeq ($(CONFIG_IP_NF_FILTER),m) + M_OBJS += iptable_filter.o + endif +endif + +ifeq ($(CONFIG_IP_NF_NAT),y) +OX_OBJS += ip_nat_standalone.o +O_OBJS += ip_nat_rule.o $(IP_NF_NAT_OBJ) + ifeq ($(CONFIG_IP_NF_FTP),y) + O_OBJS += ip_nat_ftp.o + endif +else + ifeq ($(CONFIG_IP_NF_NAT),m) + M_OBJS += iptable_nat.o + ifeq ($(CONFIG_IP_NF_FTP),m) + M_OBJS += ip_nat_ftp.o + endif + endif +endif + +ifeq ($(CONFIG_IP_NF_MANGLE),y) +O_OBJS += iptable_mangle.o +else + ifeq ($(CONFIG_IP_NF_MANGLE),m) + M_OBJS += iptable_mangle.o + endif +endif + +ifeq ($(CONFIG_IP_NF_TARGET_REJECT),y) +O_OBJS += ipt_REJECT.o +else + ifeq ($(CONFIG_IP_NF_TARGET_REJECT),m) + M_OBJS += ipt_REJECT.o + endif +endif + +ifeq ($(CONFIG_IP_NF_TARGET_MIRROR),y) +O_OBJS += ipt_MIRROR.o +else + ifeq ($(CONFIG_IP_NF_TARGET_MIRROR),m) + M_OBJS += ipt_MIRROR.o + endif +endif + +ifeq ($(CONFIG_IP_NF_TARGET_TOS),y) +O_OBJS += ipt_TOS.o +else + ifeq ($(CONFIG_IP_NF_TARGET_TOS),m) + M_OBJS += ipt_TOS.o + endif +endif + +ifeq ($(CONFIG_IP_NF_TARGET_MARK),y) +O_OBJS += ipt_MARK.o +else + ifeq ($(CONFIG_IP_NF_TARGET_MARK),m) + M_OBJS += ipt_MARK.o + endif +endif + +ifeq ($(CONFIG_IP_NF_TARGET_MASQUERADE),y) +O_OBJS += ipt_MASQUERADE.o +else + ifeq ($(CONFIG_IP_NF_TARGET_MASQUERADE),m) + M_OBJS += ipt_MASQUERADE.o + endif +endif + +ifeq ($(CONFIG_IP_NF_TARGET_REDIRECT),y) +O_OBJS += ipt_REDIRECT.o +else + ifeq ($(CONFIG_IP_NF_TARGET_REDIRECT),m) + M_OBJS += ipt_REDIRECT.o + endif +endif + +ifeq ($(CONFIG_IP_NF_TARGET_LOG),y) +O_OBJS += ipt_LOG.o +else + ifeq ($(CONFIG_IP_NF_TARGET_LOG),m) + M_OBJS += ipt_LOG.o + endif +endif + +ifeq ($(CONFIG_IP_NF_COMPAT_IPCHAINS),y) +O_OBJS += ipchains.o +else + ifeq ($(CONFIG_IP_NF_COMPAT_IPCHAINS),m) + M_OBJS += ipchains.o + endif +endif + +ifeq ($(CONFIG_IP_NF_COMPAT_IPFWADM),y) +O_OBJS += ipfwadm.o +else + ifeq ($(CONFIG_IP_NF_COMPAT_IPFWADM),m) + M_OBJS += ipfwadm.o + endif +endif + +include $(TOPDIR)/Rules.make + +ip_conntrack.o: ip_conntrack_standalone.o $(IP_NF_CONNTRACK_OBJ) + $(LD) -r -o $@ $(IP_NF_CONNTRACK_OBJ) ip_conntrack_standalone.o + +iptable_nat.o: ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ) + $(LD) -r -o $@ ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ) + +# All the parts of conntrack and NAT required for compatibility layer. +IP_NF_COMPAT_LAYER:=ip_fw_compat.o ip_fw_compat_redir.o ip_fw_compat_masq.o $(IP_NF_CONNTRACK_OBJ) $(IP_NF_NAT_OBJ) + +ipfwadm.o: ipfwadm_core.o $(IP_NF_COMPAT_LAYER) + $(LD) -r -o $@ ipfwadm_core.o $(IP_NF_COMPAT_LAYER) + +ipchains.o: ipchains_core.o $(IP_NF_COMPAT_LAYER) + $(LD) -r -o $@ ipchains_core.o $(IP_NF_COMPAT_LAYER) diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c new file mode 100644 index 000000000000..9007cdc89a95 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -0,0 +1,891 @@ +/* Connection state tracking for netfilter. This is separated from, + but required by, the NAT layer; it can also be used by an iptables + extension. */ + +/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General + Public Licence. */ + +#ifdef MODULE +#define __NO_VERSION__ +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* This rwlock protects the main hash table, protocol/helper/expected + registrations, conntrack timers*/ +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock) + +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +DECLARE_RWLOCK(ip_conntrack_lock); + +void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; +static LIST_HEAD(expect_list); +static LIST_HEAD(protocol_list); +static LIST_HEAD(helpers); +unsigned int ip_conntrack_htable_size = 0; +static int ip_conntrack_max = 0; +static atomic_t ip_conntrack_count = ATOMIC_INIT(0); +struct list_head *ip_conntrack_hash; + +extern struct ip_conntrack_protocol ip_conntrack_generic_protocol; + +static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr, + u_int8_t protocol) +{ + return protocol == curr->proto; +} + +struct ip_conntrack_protocol *__find_proto(u_int8_t protocol) +{ + struct ip_conntrack_protocol *p; + + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + p = LIST_FIND(&protocol_list, proto_cmpfn, + struct ip_conntrack_protocol *, protocol); + if (!p) + p = &ip_conntrack_generic_protocol; + + return p; +} + +struct ip_conntrack_protocol *find_proto(u_int8_t protocol) +{ + struct ip_conntrack_protocol *p; + + READ_LOCK(&ip_conntrack_lock); + p = __find_proto(protocol); + READ_UNLOCK(&ip_conntrack_lock); + return p; +} + +static inline void ip_conntrack_put(struct ip_conntrack *ct) +{ + IP_NF_ASSERT(ct); + IP_NF_ASSERT(ct->infos[0].master); + /* nf_conntrack_put wants to go via an info struct, so feed it + one at random. */ + nf_conntrack_put(&ct->infos[0]); +} + +static inline u_int32_t +hash_conntrack(const struct ip_conntrack_tuple *tuple) +{ +#if 0 + dump_tuple(tuple); +#endif +#ifdef CONFIG_NETFILTER_DEBUG + if (tuple->src.pad) + DEBUGP("Tuple %p has non-zero padding.\n", tuple); +#endif + /* ntohl because more differences in low bits. */ + /* To ensure that halves of the same connection don't hash + clash, we add the source per-proto again. */ + return (ntohl(tuple->src.ip + tuple->dst.ip + + tuple->src.u.all + tuple->dst.u.all + + tuple->dst.protonum) + + ntohs(tuple->src.u.all)) + % ip_conntrack_htable_size; +} + +inline int +get_tuple(const struct iphdr *iph, size_t len, + struct ip_conntrack_tuple *tuple, + struct ip_conntrack_protocol *protocol) +{ + int ret; + + /* Can only happen when extracting tuples from inside ICMP + packets */ + if (iph->frag_off & htons(IP_OFFSET)) { + if (net_ratelimit()) + printk("ip_conntrack_core: Frag of proto %u.\n", + iph->protocol); + return 0; + } + /* Guarantee 8 protocol bytes: if more wanted, use len param */ + else if (iph->ihl * 4 + 8 > len) + return 0; + + tuple->src.ip = iph->saddr; + tuple->src.pad = 0; + tuple->dst.ip = iph->daddr; + tuple->dst.protonum = iph->protocol; + + ret = protocol->pkt_to_tuple((u_int32_t *)iph + iph->ihl, + len - 4*iph->ihl, + tuple); + return ret; +} + +static int +invert_tuple(struct ip_conntrack_tuple *inverse, + const struct ip_conntrack_tuple *orig, + const struct ip_conntrack_protocol *protocol) +{ + inverse->src.ip = orig->dst.ip; + inverse->src.pad = 0; + inverse->dst.ip = orig->src.ip; + inverse->dst.protonum = orig->dst.protonum; + + return protocol->invert_tuple(inverse, orig); +} + +static void +destroy_conntrack(struct nf_conntrack *nfct) +{ + struct ip_conntrack *ct = (struct ip_conntrack *)nfct; + + IP_NF_ASSERT(atomic_read(&nfct->use) == 0); + IP_NF_ASSERT(!timer_pending(&ct->timeout)); + + if (ct->master.master) + nf_conntrack_put(&ct->master); + + if (ip_conntrack_destroyed) + ip_conntrack_destroyed(ct); + kfree(ct); + atomic_dec(&ip_conntrack_count); +} + +static void death_by_timeout(unsigned long ul_conntrack) +{ + struct ip_conntrack *ct = (void *)ul_conntrack; + + WRITE_LOCK(&ip_conntrack_lock); + /* Remove from both hash lists */ + LIST_DELETE(&ip_conntrack_hash + [hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)], + &ct->tuplehash[IP_CT_DIR_ORIGINAL]); + LIST_DELETE(&ip_conntrack_hash + [hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple)], + &ct->tuplehash[IP_CT_DIR_REPLY]); + /* If our expected is in the list, take it out. */ + if (ct->expected.expectant) { + IP_NF_ASSERT(list_inlist(&expect_list, &ct->expected)); + IP_NF_ASSERT(ct->expected.expectant == ct); + LIST_DELETE(&expect_list, &ct->expected); + } + WRITE_UNLOCK(&ip_conntrack_lock); + ip_conntrack_put(ct); +} + +static inline int +conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, + const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + return i->ctrack != ignored_conntrack + && memcmp(tuple, &i->tuple, sizeof(*tuple)) == 0; +} + +static struct ip_conntrack_tuple_hash * +__ip_conntrack_find(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + struct ip_conntrack_tuple_hash *h; + + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + h = LIST_FIND(&ip_conntrack_hash[hash_conntrack(tuple)], + conntrack_tuple_cmp, + struct ip_conntrack_tuple_hash *, + tuple, ignored_conntrack); + return h; +} + +/* Find a connection corresponding to a tuple. */ +struct ip_conntrack_tuple_hash * +ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + struct ip_conntrack_tuple_hash *h; + + READ_LOCK(&ip_conntrack_lock); + h = __ip_conntrack_find(tuple, ignored_conntrack); + if (h) + atomic_inc(&h->ctrack->ct_general.use); + READ_UNLOCK(&ip_conntrack_lock); + + return h; +} + +/* Returns true if a connection correspondings to the tuple (required + for NAT). */ +int +ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + struct ip_conntrack_tuple_hash *h; + + READ_LOCK(&ip_conntrack_lock); + h = __ip_conntrack_find(tuple, ignored_conntrack); + READ_UNLOCK(&ip_conntrack_lock); + + return h != NULL; +} + +/* Returns TRUE if it dealt with ICMP, and filled in skb fields */ +int icmp_error_track(struct sk_buff *skb) +{ + const struct iphdr *iph = skb->nh.iph; + struct icmphdr *hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl); + struct ip_conntrack_tuple innertuple, origtuple; + struct iphdr *inner = (struct iphdr *)(hdr + 1); + size_t datalen = skb->len - iph->ihl*4 - sizeof(*hdr); + struct ip_conntrack_protocol *innerproto; + struct ip_conntrack_tuple_hash *h; + enum ip_conntrack_info ctinfo; + + if (iph->protocol != IPPROTO_ICMP) + return 0; + + if (skb->len < iph->ihl * 4 + sizeof(struct icmphdr)) { + DEBUGP("icmp_error_track: too short\n"); + return 1; + } + + if (hdr->type != ICMP_DEST_UNREACH + && hdr->type != ICMP_SOURCE_QUENCH + && hdr->type != ICMP_TIME_EXCEEDED + && hdr->type != ICMP_PARAMETERPROB + && hdr->type != ICMP_REDIRECT) + return 0; + + /* Ignore it if the checksum's bogus. */ + if (ip_compute_csum((unsigned char *)hdr, sizeof(*hdr) + datalen)) { + DEBUGP("icmp_error_track: bad csum\n"); + return 1; + } + + innerproto = find_proto(inner->protocol); + /* Are they talking about one of our connections? */ + if (inner->ihl * 4 + 8 > datalen + || !get_tuple(inner, datalen, &origtuple, innerproto)) { + DEBUGP("icmp_error: ! get_tuple p=%u (%u*4+%u dlen=%u)\n", + inner->protocol, inner->ihl, 8, + datalen); + return 1; + } + + /* Ordinarily, we'd expect the inverted tupleproto, but it's + been preserved inside the ICMP. */ + if (!invert_tuple(&innertuple, &origtuple, innerproto)) { + DEBUGP("icmp_error_track: Can't invert tuple\n"); + return 1; + } + h = ip_conntrack_find_get(&innertuple, NULL); + if (!h) { + DEBUGP("icmp_error_track: no match\n"); + return 1; + } + + ctinfo = IP_CT_RELATED; + if (DIRECTION(h) == IP_CT_DIR_REPLY) + ctinfo += IP_CT_IS_REPLY; + + /* Update skb to refer to this connection */ + skb->nfct = &h->ctrack->infos[ctinfo]; + return 1; +} + +static inline int helper_cmp(const struct ip_conntrack_helper *i, + const struct ip_conntrack_tuple *rtuple) +{ + return i->will_help(rtuple); +} + +/* Compare all but src per-proto part. */ +static int expect_cmp(const struct ip_conntrack_expect *i, + const struct ip_conntrack_tuple *tuple) +{ + return (tuple->src.ip == i->tuple.src.ip + && tuple->dst.ip == i->tuple.dst.ip + && tuple->dst.u.all == i->tuple.dst.u.all + && tuple->dst.protonum == i->tuple.dst.protonum); +} + +/* Allocate a new conntrack; we set everything up, then grab write + lock and see if we lost a race. If we lost it we return 0, + indicating the controlling code should look again. */ +static int +init_conntrack(const struct ip_conntrack_tuple *tuple, + struct ip_conntrack_protocol *protocol, + struct sk_buff *skb) +{ + struct ip_conntrack *conntrack; + struct ip_conntrack_tuple repl_tuple; + size_t hash, repl_hash; + struct ip_conntrack_expect *expected; + enum ip_conntrack_info ctinfo; + int i; + + if (!invert_tuple(&repl_tuple, tuple, protocol)) { + DEBUGP("Can't invert tuple.\n"); + return 1; + } + + if(ip_conntrack_max && + (atomic_read(&ip_conntrack_count) >= ip_conntrack_max)) { + if (net_ratelimit()) + printk(KERN_WARNING "ip_conntrack: maximum limit of %d entries exceeded\n", ip_conntrack_max); + return 1; + } + + conntrack = kmalloc(sizeof(struct ip_conntrack), GFP_ATOMIC); + if (!conntrack) { + DEBUGP("Can't allocate conntrack.\n"); + return 1; + } + hash = hash_conntrack(tuple); + repl_hash = hash_conntrack(&repl_tuple); + + memset(conntrack, 0, sizeof(struct ip_conntrack)); + atomic_set(&conntrack->ct_general.use, 1); + conntrack->ct_general.destroy = destroy_conntrack; + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple; + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack; + conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple; + conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack; + for(i=0; i < IP_CT_NUMBER; i++) + conntrack->infos[i].master = &conntrack->ct_general; + + if (!protocol->new(conntrack, skb->nh.iph, skb->len)) { + kfree(conntrack); + return 1; + } + + /* Sew in at head of hash list. */ + WRITE_LOCK(&ip_conntrack_lock); + /* Check noone else beat us in the race... */ + if (__ip_conntrack_find(tuple, NULL)) { + WRITE_UNLOCK(&ip_conntrack_lock); + printk("ip_conntrack: Wow someone raced us!\n"); + kfree(conntrack); + return 0; + } + conntrack->helper = LIST_FIND(&helpers, helper_cmp, + struct ip_conntrack_helper *, + &repl_tuple); + /* Need finding and deleting of expected ONLY if we win race */ + expected = LIST_FIND(&expect_list, expect_cmp, + struct ip_conntrack_expect *, tuple); + if (expected) { + /* Welcome, Mr. Bond. We've been expecting you... */ + conntrack->status = IPS_EXPECTED; + conntrack->master.master = &expected->expectant->ct_general; + IP_NF_ASSERT(conntrack->master.master); + LIST_DELETE(&expect_list, expected); + expected->expectant = NULL; + nf_conntrack_get(&conntrack->master); + ctinfo = IP_CT_RELATED; + } else { + ctinfo = IP_CT_NEW; + } + list_prepend(&ip_conntrack_hash[hash], + &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]); + list_prepend(&ip_conntrack_hash[repl_hash], + &conntrack->tuplehash[IP_CT_DIR_REPLY]); + WRITE_UNLOCK(&ip_conntrack_lock); + + /* Update skb to refer to this connection */ + skb->nfct = &conntrack->infos[ctinfo]; + + atomic_inc(&ip_conntrack_count); + return 1; +} + +static void +resolve_normal_ct(struct sk_buff *skb) +{ + struct ip_conntrack_tuple tuple; + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack_protocol *proto; + enum ip_conntrack_info ctinfo; + + proto = find_proto(skb->nh.iph->protocol); + if (!get_tuple(skb->nh.iph, skb->len, &tuple, proto)) + return; + + /* Loop around search/insert race */ + do { + /* look for tuple match */ + h = ip_conntrack_find_get(&tuple, NULL); + if (!h && init_conntrack(&tuple, proto, skb)) + return; + } while (!h); + + /* It exists; we have (non-exclusive) reference. */ + if (DIRECTION(h) == IP_CT_DIR_REPLY) { + ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; + h->ctrack->status |= IPS_SEEN_REPLY; + } else { + /* Once we've had two way comms, always ESTABLISHED. */ + if (h->ctrack->status & IPS_SEEN_REPLY) { + DEBUGP("ip_conntrack_in: normal packet for %p\n", + h->ctrack); + ctinfo = IP_CT_ESTABLISHED; + } else if (h->ctrack->status & IPS_EXPECTED) { + DEBUGP("ip_conntrack_in: related packet for %p\n", + h->ctrack); + ctinfo = IP_CT_RELATED; + } else { + DEBUGP("ip_conntrack_in: new packet for %p\n", + h->ctrack); + ctinfo = IP_CT_NEW; + } + } + skb->nfct = &h->ctrack->infos[ctinfo]; +} + +/* Return conntrack and conntrack_info a given skb */ +struct ip_conntrack * +ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo) +{ + if (!skb->nfct) { + /* It may be an icmp error... */ + if (!icmp_error_track(skb)) + resolve_normal_ct(skb); + } + + if (skb->nfct) { + struct ip_conntrack *ct + = (struct ip_conntrack *)skb->nfct->master; + + /* ctinfo is the index of the nfct inside the conntrack */ + *ctinfo = skb->nfct - ct->infos; + IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER); + return ct; + } + return NULL; +} + +/* Netfilter hook itself. */ +unsigned int ip_conntrack_in(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + struct ip_conntrack_protocol *proto; + int ret; + + /* FIXME: Do this right please. --RR */ + (*pskb)->nfcache |= NFC_UNKNOWN; + + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if ((*pskb)->nfct) + return NF_ACCEPT; + + /* Gather fragments. */ + if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { + *pskb = ip_ct_gather_frags(*pskb); + if (!*pskb) + return NF_STOLEN; + } + + ct = ip_conntrack_get(*pskb, &ctinfo); + if (!ct) + /* Not valid part of a connection */ + return NF_ACCEPT; + + proto = find_proto((*pskb)->nh.iph->protocol); + /* If this is new, this is first time timer will be set */ + ret = proto->packet(ct, (*pskb)->nh.iph, (*pskb)->len, ctinfo); + + if (ret == -1) { + /* Invalid */ + nf_conntrack_put((*pskb)->nfct); + (*pskb)->nfct = NULL; + return NF_ACCEPT; + } + + if (ret != NF_DROP && ct->helper) { + ret = ct->helper->help((*pskb)->nh.iph, (*pskb)->len, + ct, ctinfo); + if (ret == -1) { + /* Invalid */ + nf_conntrack_put((*pskb)->nfct); + (*pskb)->nfct = NULL; + return NF_ACCEPT; + } + } + + return ret; +} + +int invert_tuplepr(struct ip_conntrack_tuple *inverse, + const struct ip_conntrack_tuple *orig) +{ + return invert_tuple(inverse, orig, find_proto(orig->dst.protonum)); +} + +/* Add a related connection. */ +int ip_conntrack_expect_related(struct ip_conntrack *related_to, + const struct ip_conntrack_tuple *tuple) +{ + WRITE_LOCK(&ip_conntrack_lock); + related_to->expected.tuple = *tuple; + + if (!related_to->expected.expectant) { + list_prepend(&expect_list, &related_to->expected); + related_to->expected.expectant = related_to; + } else { + IP_NF_ASSERT(list_inlist(&expect_list, &related_to->expected)); + IP_NF_ASSERT(related_to->expected.expectant + == related_to); + } + WRITE_UNLOCK(&ip_conntrack_lock); + + return 0; +} + +/* Alter reply tuple (maybe alter helper). If it's already taken, + return 0 and don't do alteration. */ +int ip_conntrack_alter_reply(struct ip_conntrack *conntrack, + const struct ip_conntrack_tuple *newreply) +{ + unsigned int newindex = hash_conntrack(newreply); + + WRITE_LOCK(&ip_conntrack_lock); + if (__ip_conntrack_find(newreply, conntrack)) { + WRITE_UNLOCK(&ip_conntrack_lock); + return 0; + } + DEBUGP("Altering reply tuple of %p to ", conntrack); + DUMP_TUPLE(newreply); + + LIST_DELETE(&ip_conntrack_hash + [hash_conntrack(&conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple)], + &conntrack->tuplehash[IP_CT_DIR_REPLY]); + conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; + list_prepend(&ip_conntrack_hash[newindex], + &conntrack->tuplehash[IP_CT_DIR_REPLY]); + conntrack->helper = LIST_FIND(&helpers, helper_cmp, + struct ip_conntrack_helper *, + newreply); + WRITE_UNLOCK(&ip_conntrack_lock); + return 1; +} + +int ip_conntrack_helper_register(struct ip_conntrack_helper *me) +{ + MOD_INC_USE_COUNT; + + WRITE_LOCK(&ip_conntrack_lock); + list_prepend(&helpers, me); + WRITE_UNLOCK(&ip_conntrack_lock); + + return 0; +} + +static inline int unhelp(struct ip_conntrack_tuple_hash *i, + const struct ip_conntrack_helper *me) +{ + if (i->ctrack->helper == me) { + i->ctrack->helper = NULL; + /* Get rid of any expected. */ + if (i->ctrack->expected.expectant) { + IP_NF_ASSERT(i->ctrack->expected.expectant + == i->ctrack); + LIST_DELETE(&expect_list, &i->ctrack->expected); + i->ctrack->expected.expectant = NULL; + } + } + return 0; +} + +void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) +{ + unsigned int i; + + /* Need write lock here, to delete helper. */ + WRITE_LOCK(&ip_conntrack_lock); + LIST_DELETE(&helpers, me); + + /* Get rid of expecteds, set helpers to NULL. */ + for (i = 0; i < ip_conntrack_htable_size; i++) + LIST_FIND_W(&ip_conntrack_hash[i], unhelp, + struct ip_conntrack_tuple_hash *, me); + WRITE_UNLOCK(&ip_conntrack_lock); + + /* Someone could be still looking at the helper in a bh. */ + br_write_lock_bh(BR_NETPROTO_LOCK); + br_write_unlock_bh(BR_NETPROTO_LOCK); + + MOD_DEC_USE_COUNT; +} + +/* Refresh conntrack for this many jiffies: if noone calls this, + conntrack will vanish with current skb. */ +void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies) +{ + WRITE_LOCK(&ip_conntrack_lock); + /* If this hasn't had a timer before, it's still being set up */ + if (ct->timeout.data == 0) { + ct->timeout.data = (unsigned long)ct; + ct->timeout.function = death_by_timeout; + ct->timeout.expires = jiffies + extra_jiffies; + atomic_inc(&ct->ct_general.use); + add_timer(&ct->timeout); + } else { + /* Need del_timer for race avoidance (may already be dying). */ + if (del_timer(&ct->timeout)) { + ct->timeout.expires = jiffies + extra_jiffies; + add_timer(&ct->timeout); + } + } + WRITE_UNLOCK(&ip_conntrack_lock); +} + +/* Returns new sk_buff, or NULL */ +struct sk_buff * +ip_ct_gather_frags(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; +#ifdef CONFIG_NETFILTER_DEBUG + unsigned int olddebug = skb->nf_debug; +#endif + if (sk) sock_hold(sk); + skb = ip_defrag(skb); + if (!skb) { + if (sk) sock_put(sk); + return skb; + } + if (sk) { + skb_set_owner_w(skb, sk); + sock_put(sk); + } + + ip_send_check(skb->nh.iph); + skb->nfcache |= NFC_ALTERED; +#ifdef CONFIG_NETFILTER_DEBUG + /* Packet path as if nothing had happened. */ + skb->nf_debug = olddebug; +#endif + return skb; +} + +static inline int +do_kill(const struct ip_conntrack_tuple_hash *i, + int (*kill)(const struct ip_conntrack *i, void *data), + void *data) +{ + return kill(i->ctrack, data); +} + +/* Bring out ya dead! */ +static struct ip_conntrack_tuple_hash * +get_next_corpse(int (*kill)(const struct ip_conntrack *i, void *data), + void *data) +{ + struct ip_conntrack_tuple_hash *h = NULL; + unsigned int i; + + READ_LOCK(&ip_conntrack_lock); + for (i = 0; !h && i < ip_conntrack_htable_size; i++) { + h = LIST_FIND(&ip_conntrack_hash[i], do_kill, + struct ip_conntrack_tuple_hash *, kill, data); + } + if (h) + atomic_inc(&h->ctrack->ct_general.use); + READ_UNLOCK(&ip_conntrack_lock); + + return h; +} + +void +ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data), + void *data) +{ + struct ip_conntrack_tuple_hash *h; + + /* This is order n^2, by the way. */ + while ((h = get_next_corpse(kill, data)) != NULL) { + /* Time to push up daises... */ + if (del_timer(&h->ctrack->timeout)) + death_by_timeout((unsigned long)h->ctrack); + /* ... else the timer will get him soon. */ + + ip_conntrack_put(h->ctrack); + } +} + +/* Fast function for those who don't want to parse /proc (and I don't + blame them). */ +/* Reversing the socket's dst/src point of view gives us the reply + mapping. */ +static int +getorigdst(struct sock *sk, int optval, void *user, int *len) +{ + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack_tuple tuple = { { sk->rcv_saddr, { sk->sport }, + 0 }, + { sk->daddr, { sk->dport }, + IPPROTO_TCP } }; + + /* We only do TCP at the moment: is there a better way? */ + if (strcmp(sk->prot->name, "TCP") != 0) { + DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); + return -ENOPROTOOPT; + } + + if (*len != sizeof(struct sockaddr_in)) { + DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", + *len, sizeof(struct sockaddr_in)); + return -EINVAL; + } + + h = ip_conntrack_find_get(&tuple, NULL); + if (h) { + struct sockaddr_in sin; + + sin.sin_family = AF_INET; + sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.u.tcp.port; + sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.ip; + + DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", + IP_PARTS(sin.sin_addr.s_addr), ntohs(sin.sin_port)); + ip_conntrack_put(h->ctrack); + if (copy_to_user(user, &sin, sizeof(sin)) != 0) + return -EFAULT; + else + return 0; + } + DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", + IP_PARTS(tuple.src.ip), ntohs(tuple.src.u.tcp.port), + IP_PARTS(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port)); + return -ENOENT; +} + +static struct nf_sockopt_ops so_getorigdst += { { NULL, NULL }, PF_INET, + 0, 0, NULL, /* Setsockopts */ + SO_ORIGINAL_DST, SO_ORIGINAL_DST+1, &getorigdst, + 0, NULL }; + +#define NET_IP_CONNTRACK_MAX 2089 +#define NET_IP_CONNTRACK_MAX_NAME "ip_conntrack_max" + +static struct ctl_table_header *ip_conntrack_sysctl_header; + +static ctl_table ip_conntrack_table[] = { + { NET_IP_CONNTRACK_MAX, NET_IP_CONNTRACK_MAX_NAME, &ip_conntrack_max, + sizeof(ip_conntrack_max), 0644, NULL, proc_dointvec }, + { 0 } +}; + +static ctl_table ip_conntrack_dir_table[] = { + {NET_IPV4, "ipv4", NULL, 0, 0555, ip_conntrack_table, 0, 0, 0, 0, 0}, + { 0 } +}; + +static ctl_table ip_conntrack_root_table[] = { + {CTL_NET, "net", NULL, 0, 0555, ip_conntrack_dir_table, 0, 0, 0, 0, 0}, + { 0 } +}; + +static int kill_all(const struct ip_conntrack *i, void *data) +{ + return 1; +} + +/* Mishearing the voices in his head, our hero wonders how he's + supposed to kill the mall. */ +void ip_conntrack_cleanup(void) +{ + unregister_sysctl_table(ip_conntrack_sysctl_header); + ip_ct_selective_cleanup(kill_all, NULL); + vfree(ip_conntrack_hash); + nf_unregister_sockopt(&so_getorigdst); +} + +int __init ip_conntrack_init(void) +{ + unsigned int i; + int ret; + + /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB + * machine has 256 buckets. 1GB machine has 8192 buckets. */ + ip_conntrack_htable_size + = (((num_physpages << PAGE_SHIFT) / 16384) + / sizeof(struct list_head)); + ip_conntrack_max = 8 * ip_conntrack_htable_size; + + printk("ip_conntrack (%u buckets, %d max)\n", + ip_conntrack_htable_size, ip_conntrack_max); + + ret = nf_register_sockopt(&so_getorigdst); + if (ret != 0) + return ret; + + ip_conntrack_hash = vmalloc(sizeof(struct list_head) + * ip_conntrack_htable_size); + if (!ip_conntrack_hash) { + nf_unregister_sockopt(&so_getorigdst); + return -ENOMEM; + } + + /* Don't NEED lock here, but good form anyway. */ + WRITE_LOCK(&ip_conntrack_lock); + /* Sew in builtin protocols. */ + list_append(&protocol_list, &ip_conntrack_protocol_tcp); + list_append(&protocol_list, &ip_conntrack_protocol_udp); + list_append(&protocol_list, &ip_conntrack_protocol_icmp); + WRITE_UNLOCK(&ip_conntrack_lock); + + for (i = 0; i < ip_conntrack_htable_size; i++) + INIT_LIST_HEAD(&ip_conntrack_hash[i]); + +/* This is fucking braindead. There is NO WAY of doing this without + the CONFIG_SYSCTL unless you don't want to detect errors. + Grrr... --RR */ +#ifdef CONFIG_SYSCTL + ip_conntrack_sysctl_header + = register_sysctl_table(ip_conntrack_root_table, 0); + if (ip_conntrack_sysctl_header == NULL) { + vfree(ip_conntrack_hash); + nf_unregister_sockopt(&so_getorigdst); + return -ENOMEM; + } +#endif /*CONFIG_SYSCTL*/ + + ret = ip_conntrack_protocol_tcp_init(); + if (ret != 0) { + unregister_sysctl_table(ip_conntrack_sysctl_header); + vfree(ip_conntrack_hash); + nf_unregister_sockopt(&so_getorigdst); + } + + return ret; +} + diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c new file mode 100644 index 000000000000..9137d13ead5e --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -0,0 +1,251 @@ +/* FTP extension for IP connection tracking. */ +#ifdef MODULE +#define EXPORT_SYMTAB +#endif +#include +#include +#include +#include +#include + +#include +#include +#include + +DECLARE_LOCK(ip_ftp_lock); + +#define SERVER_STRING "227 Entering Passive Mode (" +#define CLIENT_STRING "PORT " + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +#define IP_PARTS_NATIVE(n) \ +(unsigned int)((n)>>24)&0xFF, \ +(unsigned int)((n)>>16)&0xFF, \ +(unsigned int)((n)>>8)&0xFF, \ +(unsigned int)((n)&0xFF) + +#define IP_PARTS(n) IP_PARTS_NATIVE(ntohl(n)) + +static struct { + const char *pattern; + size_t plen; + char term; +} search[2] = { + [IP_CT_FTP_PORT] { CLIENT_STRING, sizeof(CLIENT_STRING) - 1, '\r' }, + [IP_CT_FTP_PASV] { SERVER_STRING, sizeof(SERVER_STRING) - 1, ')' } +}; + +/* Returns 0, or length of numbers */ +static int try_number(const char *data, size_t dlen, u_int32_t array[6], + char term) +{ + u_int32_t i, len; + + /* Keep data pointing at next char. */ + for (i = 0, len = 0; len < dlen; len++, data++) { + if (*data >= '0' && *data <= '9') { + array[i] = array[i]*10 + *data - '0'; + } + else if (*data == ',') + i++; + else { + /* Unexpected character; true if it's the + terminator and we're finished. */ + if (*data == term && i == 5) + return len; + + DEBUGP("Char %u (got %u nums) `%u' unexpected\n", + len, i, *data); + return 0; + } + } + + return 0; +} + +/* Return 1 for match, 0 for accept, -1 for partial. */ +static int find_pattern(const char *data, size_t dlen, + const char *pattern, size_t plen, + char term, + unsigned int *numoff, + unsigned int *numlen, + u_int32_t array[6]) +{ + if (dlen == 0) + return 0; + + if (dlen < plen) { + /* Short packet: try for partial? */ + if (strnicmp(data, pattern, dlen) == 0) + return -1; + else return 0; + } + + if (strnicmp(data, pattern, plen) != 0) { +#if 0 + size_t i; + + DEBUGP("ftp: string mismatch\n"); + for (i = 0; i < plen; i++) { + DEBUGFTP("ftp:char %u `%c'(%u) vs `%c'(%u)\n", + i, data[i], data[i], + pattern[i], pattern[i]); + } +#endif + return 0; + } + + *numoff = plen; + *numlen = try_number(data + plen, dlen - plen, array, term); + if (!*numlen) + return -1; + + return 1; +} + +/* FIXME: This should be in userspace. Later. */ +static int help(const struct iphdr *iph, size_t len, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo) +{ + /* tcplen not negative guarenteed by ip_conntrack_tcp.c */ + struct tcphdr *tcph = (void *)iph + iph->ihl * 4; + const char *data = (const char *)tcph + tcph->doff * 4; + unsigned int tcplen = len - iph->ihl * 4; + unsigned int datalen = tcplen - tcph->doff * 4; + u_int32_t old_seq_aft_nl; + int old_seq_aft_nl_set; + u_int32_t array[6] = { 0 }; + int dir = CTINFO2DIR(ctinfo); + unsigned int matchlen, matchoff; + struct ip_conntrack_tuple t; + struct ip_ct_ftp *info = &ct->help.ct_ftp_info; + + /* Can't track connections formed before we registered */ + if (!info) + return NF_ACCEPT; + + /* Until there's been traffic both ways, don't look in packets. */ + if (ctinfo != IP_CT_ESTABLISHED + && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { + DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo); + return NF_ACCEPT; + } + + /* Not whole TCP header? */ + if (tcplen < sizeof(struct tcphdr) || tcplen < tcph->doff*4) { + DEBUGP("ftp: tcplen = %u\n", (unsigned)tcplen); + return NF_ACCEPT; + } + + /* Checksum invalid? Ignore. */ + /* FIXME: Source route IP option packets --RR */ + if (tcp_v4_check(tcph, tcplen, iph->saddr, iph->daddr, + csum_partial((char *)tcph, tcplen, 0))) { + DEBUGP("ftp_help: bad csum: %p %u %u.%u.%u.%u %u.%u.%u.%u\n", + tcph, tcplen, IP_PARTS(iph->saddr), + IP_PARTS(iph->daddr)); + return NF_ACCEPT; + } + + LOCK_BH(&ip_ftp_lock); + old_seq_aft_nl_set = info->seq_aft_nl_set[dir]; + old_seq_aft_nl = info->seq_aft_nl[dir]; + + DEBUGP("conntrack_ftp: datalen %u\n", datalen); + if ((datalen > 0) && (data[datalen-1] == '\n')) { + DEBUGP("conntrack_ftp: datalen %u ends in \\n\n", datalen); + if (!old_seq_aft_nl_set + || after(ntohl(tcph->seq) + datalen, old_seq_aft_nl)) { + DEBUGP("conntrack_ftp: updating nl to %u\n", + ntohl(tcph->seq) + datalen); + info->seq_aft_nl[dir] = ntohl(tcph->seq) + datalen; + info->seq_aft_nl_set[dir] = 1; + } + } + UNLOCK_BH(&ip_ftp_lock); + + if(!old_seq_aft_nl_set || + (ntohl(tcph->seq) != old_seq_aft_nl)) { + DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u)\n", + old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl); + return NF_ACCEPT; + } + + switch (find_pattern(data, datalen, + search[dir].pattern, + search[dir].plen, search[dir].term, + &matchoff, &matchlen, + array)) { + case -1: /* partial */ + /* We don't usually drop packets. After all, this is + connection tracking, not packet filtering. + However, it is neccessary for accurate tracking in + this case. */ + DEBUGP("conntrack_ftp: partial `%.*s'\n", + (int)datalen, data); + return NF_DROP; + + case 0: /* no match */ + DEBUGP("ip_conntrack_ftp_help: no match\n"); + return NF_ACCEPT; + } + + DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n", + (int)matchlen, data + matchoff, + matchlen, ntohl(tcph->seq) + matchoff); + + /* Update the ftp info */ + LOCK_BH(&ip_ftp_lock); + info->is_ftp = 1; + info->seq = ntohl(tcph->seq) + matchoff; + info->len = matchlen; + info->ftptype = dir; + info->port = array[4] << 8 | array[5]; + + t = ((struct ip_conntrack_tuple) + { { ct->tuplehash[!dir].tuple.src.ip, + { 0 }, 0 }, + { htonl((array[0] << 24) | (array[1] << 16) + | (array[2] << 8) | array[3]), + { htons(array[4] << 8 | array[5]) }, + IPPROTO_TCP }}); + ip_conntrack_expect_related(ct, &t); + UNLOCK_BH(&ip_ftp_lock); + + return NF_ACCEPT; +} + +/* Returns TRUE if it wants to help this connection (tuple is the + tuple of REPLY packets from server). */ +static int ftp_will_help(const struct ip_conntrack_tuple *rtuple) +{ + return (rtuple->dst.protonum == IPPROTO_TCP + && rtuple->src.u.tcp.port == __constant_htons(21)); +} + +static struct ip_conntrack_helper ftp = { { NULL, NULL }, + ftp_will_help, + help }; + +static int __init init(void) +{ + return ip_conntrack_helper_register(&ftp); +} + +static void __exit fini(void) +{ + ip_conntrack_helper_unregister(&ftp); +} + +struct module *ip_conntrack_ftp = THIS_MODULE; +EXPORT_SYMBOL(ip_conntrack_ftp); +EXPORT_SYMBOL(ip_ftp_lock); + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c new file mode 100644 index 000000000000..77a491e349c6 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include + +#define GENERIC_TIMEOUT (3600*HZ) + +static int generic_pkt_to_tuple(const void *datah, size_t datalen, + struct ip_conntrack_tuple *tuple) +{ + tuple->src.u.all = 0; + tuple->dst.u.all = 0; + + return 1; +} + +static int generic_invert_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig) +{ + tuple->src.u.all = 0; + tuple->dst.u.all = 0; + + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static unsigned int generic_print_tuple(char *buffer, + const struct ip_conntrack_tuple *tuple) +{ + return 0; +} + +/* Print out the private part of the conntrack. */ +static unsigned int generic_print_conntrack(char *buffer, + const struct ip_conntrack *state) +{ + return 0; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int established(struct ip_conntrack *conntrack, + struct iphdr *iph, size_t len, + enum ip_conntrack_info conntrackinfo) +{ + ip_ct_refresh(conntrack, GENERIC_TIMEOUT); + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int new(struct ip_conntrack *conntrack, struct iphdr *iph, size_t len) +{ + return 1; +} + +struct ip_conntrack_protocol ip_conntrack_generic_protocol += { { NULL, NULL }, 0, "unknown", + generic_pkt_to_tuple, generic_invert_tuple, generic_print_tuple, + generic_print_conntrack, established, new, NULL }; + diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c new file mode 100644 index 000000000000..1d1256be530a --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -0,0 +1,111 @@ +#include +#include +#include +#include +#include +#include + +#define ICMP_TIMEOUT (30*HZ) + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static int icmp_pkt_to_tuple(const void *datah, size_t datalen, + struct ip_conntrack_tuple *tuple) +{ + const struct icmphdr *hdr = datah; + + tuple->dst.u.icmp.type = hdr->type; + tuple->src.u.icmp.id = hdr->un.echo.id; + tuple->dst.u.icmp.code = hdr->code; + + return 1; +} + +static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig) +{ + /* Add 1; spaces filled with 0. */ + static u_int8_t invmap[] + = { [ICMP_ECHO] = ICMP_ECHOREPLY + 1, + [ICMP_ECHOREPLY] = ICMP_ECHO + 1, + [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1, + [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1, + [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1, + [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1, + [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1, + [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1}; + + if (orig->dst.u.icmp.type >= sizeof(invmap) + || !invmap[orig->dst.u.icmp.type]) + return 0; + + tuple->src.u.icmp.id = orig->src.u.icmp.id; + tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1; + tuple->dst.u.icmp.code = orig->dst.u.icmp.code; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static unsigned int icmp_print_tuple(char *buffer, + const struct ip_conntrack_tuple *tuple) +{ + return sprintf(buffer, "type=%u code=%u id=%u ", + tuple->dst.u.icmp.type, + tuple->dst.u.icmp.code, + ntohs(tuple->src.u.icmp.id)); +} + +/* Print out the private part of the conntrack. */ +static unsigned int icmp_print_conntrack(char *buffer, + const struct ip_conntrack *conntrack) +{ + return 0; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int icmp_packet(struct ip_conntrack *ct, + struct iphdr *iph, size_t len, + enum ip_conntrack_info ctinfo) +{ + /* FIXME: Should keep count of orig - reply packets: if == 0, + destroy --RR */ + /* Delete connection immediately on reply: won't actually + vanish as we still have skb */ + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { + if (del_timer(&ct->timeout)) + ct->timeout.function((unsigned long)ct); + } else + ip_ct_refresh(ct, ICMP_TIMEOUT); + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int icmp_new(struct ip_conntrack *conntrack, + struct iphdr *iph, size_t len) +{ + static u_int8_t valid_new[] + = { [ICMP_ECHO] = 1, + [ICMP_TIMESTAMP] = 1, + [ICMP_INFO_REQUEST] = 1, + [ICMP_ADDRESS] = 1 }; + + if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) + || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { + /* Can't create a new ICMP `conn' with this. */ + DEBUGP("icmp: can't create new conn with type %u\n", + conntrack->tuplehash[0].tuple.dst.u.icmp.type); + DUMP_TUPLE(&conntrack->tuplehash[0].tuple); + return 0; + } + return 1; +} + +struct ip_conntrack_protocol ip_conntrack_protocol_icmp += { { NULL, NULL }, IPPROTO_ICMP, "icmp", + icmp_pkt_to_tuple, icmp_invert_tuple, icmp_print_tuple, + icmp_print_conntrack, icmp_packet, icmp_new, NULL }; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c new file mode 100644 index 000000000000..3dd4482529f2 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -0,0 +1,227 @@ +#define __NO_VERSION__ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/* Protects conntrack->proto.tcp_state */ +static DECLARE_RWLOCK(tcp_lock); + +/* FIXME: Examine ipfilter's timeouts and conntrack transitions more + closely. They're more complex. --RR */ + +/* Actually, I believe that neither ipmasq (where this code is stolen + from) nor ipfilter do it exactly right. A new conntrack machine taking + into account packet loss (which creates uncertainty as to exactly + the conntrack of the connection) is required. RSN. --RR */ +enum tcp_conntrack { + TCP_CONNTRACK_NONE, + TCP_CONNTRACK_ESTABLISHED, + TCP_CONNTRACK_SYN_SENT, + TCP_CONNTRACK_SYN_RECV, + TCP_CONNTRACK_FIN_WAIT, + TCP_CONNTRACK_TIME_WAIT, + TCP_CONNTRACK_CLOSE, + TCP_CONNTRACK_CLOSE_WAIT, + TCP_CONNTRACK_LAST_ACK, + TCP_CONNTRACK_LISTEN, + TCP_CONNTRACK_MAX +}; + +static const char *tcp_conntrack_names[] = { + "NONE", + "ESTABLISHED", + "SYN_SENT", + "SYN_RECV", + "FIN_WAIT", + "TIME_WAIT", + "CLOSE", + "CLOSE_WAIT", + "LAST_ACK", + "LISTEN" +}; + +#define SECS *HZ +#define MINS * 60 SECS +#define HOURS * 60 MINS +#define DAYS * 24 HOURS + + +static unsigned long tcp_timeouts[] += { 30 MINS, /* TCP_CONNTRACK_NONE, */ + 5 DAYS, /* TCP_CONNTRACK_ESTABLISHED, */ + 2 MINS, /* TCP_CONNTRACK_SYN_SENT, */ + 60 SECS, /* TCP_CONNTRACK_SYN_RECV, */ + 2 MINS, /* TCP_CONNTRACK_FIN_WAIT, */ + 2 MINS, /* TCP_CONNTRACK_TIME_WAIT, */ + 10 SECS, /* TCP_CONNTRACK_CLOSE, */ + 60 SECS, /* TCP_CONNTRACK_CLOSE_WAIT, */ + 30 SECS, /* TCP_CONNTRACK_LAST_ACK, */ + 2 MINS, /* TCP_CONNTRACK_LISTEN, */ +}; + +#define sNO TCP_CONNTRACK_NONE +#define sES TCP_CONNTRACK_ESTABLISHED +#define sSS TCP_CONNTRACK_SYN_SENT +#define sSR TCP_CONNTRACK_SYN_RECV +#define sFW TCP_CONNTRACK_FIN_WAIT +#define sTW TCP_CONNTRACK_TIME_WAIT +#define sCL TCP_CONNTRACK_CLOSE +#define sCW TCP_CONNTRACK_CLOSE_WAIT +#define sLA TCP_CONNTRACK_LAST_ACK +#define sLI TCP_CONNTRACK_LISTEN +#define sIV TCP_CONNTRACK_MAX + +static enum tcp_conntrack tcp_conntracks[2][5][TCP_CONNTRACK_MAX] = { + { +/* ORIGINAL */ +/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */ +/*syn*/ {sSS, sES, sSS, sES, sSS, sSS, sSS, sSS, sSS, sLI }, +/*fin*/ {sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI }, +/*ack*/ {sES, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sES }, +/*rst*/ {sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL }, +/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } + }, + { +/* REPLY */ +/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */ +/*syn*/ {sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR }, +/*fin*/ {sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI }, +/*ack*/ {sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI }, +/*rst*/ {sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI }, +/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } + } +}; + +static int tcp_pkt_to_tuple(const void *datah, size_t datalen, + struct ip_conntrack_tuple *tuple) +{ + const struct tcphdr *hdr = datah; + + tuple->src.u.tcp.port = hdr->source; + tuple->dst.u.tcp.port = hdr->dest; + + return 1; +} + +static int tcp_invert_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig) +{ + tuple->src.u.tcp.port = orig->dst.u.tcp.port; + tuple->dst.u.tcp.port = orig->src.u.tcp.port; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static unsigned int tcp_print_tuple(char *buffer, + const struct ip_conntrack_tuple *tuple) +{ + return sprintf(buffer, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.tcp.port), + ntohs(tuple->dst.u.tcp.port)); +} + +/* Print out the private part of the conntrack. */ +static unsigned int tcp_print_conntrack(char *buffer, + const struct ip_conntrack *conntrack) +{ + enum tcp_conntrack state; + + READ_LOCK(&tcp_lock); + state = conntrack->proto.tcp_state; + READ_UNLOCK(&tcp_lock); + + return sprintf(buffer, "%s ", tcp_conntrack_names[state]); +} + +static unsigned int get_conntrack_index(const struct tcphdr *tcph) +{ + if (tcph->rst) return 3; + else if (tcph->syn) return 0; + else if (tcph->fin) return 1; + else if (tcph->ack) return 2; + else return 4; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int tcp_packet(struct ip_conntrack *conntrack, + struct iphdr *iph, size_t len, + enum ip_conntrack_info ctinfo) +{ + enum tcp_conntrack newconntrack; + struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + iph->ihl); + + /* We're guaranteed to have the base header, but maybe not the + options. */ + if (len < (iph->ihl + tcph->doff) * 4) { + DEBUGP("ip_conntrack_tcp: Truncated packet.\n"); + return -1; + } + + WRITE_LOCK(&tcp_lock); + newconntrack + = tcp_conntracks + [CTINFO2DIR(ctinfo)] + [get_conntrack_index(tcph)][conntrack->proto.tcp_state]; + + /* Invalid */ + if (newconntrack == TCP_CONNTRACK_MAX) { + DEBUGP("ip_conntrack_tcp: Invalid dir=%i index=%u conntrack=%u\n", + CTINFO2DIR(ctinfo), get_conntrack_index(tcph), + conntrack->proto.tcp_state); + WRITE_UNLOCK(&tcp_lock); + return -1; + } + + conntrack->proto.tcp_state = newconntrack; + WRITE_UNLOCK(&tcp_lock); + + /* Refresh: need write lock to write to conntrack. */ + ip_ct_refresh(conntrack, tcp_timeouts[conntrack->proto.tcp_state]); + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int tcp_new(struct ip_conntrack *conntrack, + struct iphdr *iph, size_t len) +{ + enum tcp_conntrack newconntrack; + struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + iph->ihl); + + /* Don't need lock here: this conntrack not in circulation yet */ + newconntrack + = tcp_conntracks[0][get_conntrack_index(tcph)] + [TCP_CONNTRACK_NONE]; + + /* Invalid: delete conntrack */ + if (newconntrack == TCP_CONNTRACK_MAX) { + DEBUGP("ip_conntrack_tcp: invalid new deleting.\n"); + return 0; + } else { + conntrack->proto.tcp_state = newconntrack; + ip_ct_refresh(conntrack, tcp_timeouts[conntrack->proto.tcp_state]); + } + return 1; +} + +struct ip_conntrack_protocol ip_conntrack_protocol_tcp += { { NULL, NULL }, IPPROTO_TCP, "tcp", + tcp_pkt_to_tuple, tcp_invert_tuple, tcp_print_tuple, tcp_print_conntrack, + tcp_packet, tcp_new, NULL }; + +int __init ip_conntrack_protocol_tcp_init(void) +{ + return 0; +} diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c new file mode 100644 index 000000000000..688ae10fb845 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -0,0 +1,65 @@ +#include +#include +#include +#include +#include +#include + +#define UDP_TIMEOUT (60*HZ) + +static int udp_pkt_to_tuple(const void *datah, size_t datalen, + struct ip_conntrack_tuple *tuple) +{ + const struct udphdr *hdr = datah; + + tuple->src.u.udp.port = hdr->source; + tuple->dst.u.udp.port = hdr->dest; + + return 1; +} + +static int udp_invert_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig) +{ + tuple->src.u.udp.port = orig->dst.u.udp.port; + tuple->dst.u.udp.port = orig->src.u.udp.port; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static unsigned int udp_print_tuple(char *buffer, + const struct ip_conntrack_tuple *tuple) +{ + return sprintf(buffer, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.udp.port), + ntohs(tuple->dst.u.udp.port)); +} + +/* Print out the private part of the conntrack. */ +static unsigned int udp_print_conntrack(char *buffer, + const struct ip_conntrack *conntrack) +{ + return 0; +} + +/* Returns verdict for packet, and may modify conntracktype */ +static int udp_packet(struct ip_conntrack *conntrack, + struct iphdr *iph, size_t len, + enum ip_conntrack_info conntrackinfo) +{ + /* Refresh. */ + ip_ct_refresh(conntrack, UDP_TIMEOUT); + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int udp_new(struct ip_conntrack *conntrack, + struct iphdr *iph, size_t len) +{ + return 1; +} + +struct ip_conntrack_protocol ip_conntrack_protocol_udp += { { NULL, NULL }, IPPROTO_UDP, "udp", + udp_pkt_to_tuple, udp_invert_tuple, udp_print_tuple, udp_print_conntrack, + udp_packet, udp_new, NULL }; diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c new file mode 100644 index 000000000000..594ed0ac75e8 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -0,0 +1,298 @@ +/* This file contains all the functions required for the standalone + ip_conntrack module. + + These are not required by the compatibility layer. +*/ + +/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General + Public Licence. */ + +#ifdef MODULE +#define EXPORT_SYMTAB +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock) + +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +struct module *ip_conntrack_module = THIS_MODULE; + +static unsigned int +print_tuple(char *buffer, const struct ip_conntrack_tuple *tuple, + struct ip_conntrack_protocol *proto) +{ + int len; + + len = sprintf(buffer, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ", + NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip)); + + len += proto->print_tuple(buffer + len, tuple); + + return len; +} + +/* FIXME: Don't print source proto part. --RR */ +static unsigned int +print_expect(char *buffer, const struct ip_conntrack_expect *expect) +{ + unsigned int len; + + len = sprintf(buffer, "EXPECTING: proto=%u ", + expect->tuple.dst.protonum); + len += print_tuple(buffer + len, &expect->tuple, + __find_proto(expect->tuple.dst.protonum)); + len += sprintf(buffer + len, "\n"); + return len; +} + +static unsigned int +print_conntrack(char *buffer, const struct ip_conntrack *conntrack) +{ + unsigned int len; + struct ip_conntrack_protocol *proto + = __find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + + len = sprintf(buffer, "%-8s %u %lu ", + proto->name, + conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum, + timer_pending(&conntrack->timeout) + ? (conntrack->timeout.expires - jiffies)/HZ : 0); + + len += proto->print_conntrack(buffer + len, conntrack); + len += print_tuple(buffer + len, + &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + proto); + if (!(conntrack->status & IPS_SEEN_REPLY)) + len += sprintf(buffer + len, "[UNREPLIED] "); + len += print_tuple(buffer + len, + &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, + proto); + len += sprintf(buffer + len, "\n"); + + return len; +} + +/* Returns true when finished. */ +static int +conntrack_iterate(const struct ip_conntrack_tuple_hash *hash, + char *buffer, off_t offset, off_t *upto, + unsigned int *len, unsigned int maxlen) +{ + unsigned int newlen; + IP_NF_ASSERT(hash->ctrack); + + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + + /* Only count originals */ + if (DIRECTION(hash)) + return 0; + + if ((*upto)++ < offset) + return 0; + + newlen = print_conntrack(buffer + *len, hash->ctrack); + if (*len + newlen > maxlen) + return 1; + else *len += newlen; + + return 0; +} + +static int +list_conntracks(char *buffer, char **start, off_t offset, int length) +{ + unsigned int i; + unsigned int len = 0; + off_t upto = 0; + struct list_head *e; + + READ_LOCK(&ip_conntrack_lock); + /* Traverse hash; print originals then reply. */ + for (i = 0; i < ip_conntrack_htable_size; i++) { + if (LIST_FIND(&ip_conntrack_hash[i], conntrack_iterate, + struct ip_conntrack_tuple_hash *, + buffer, offset, &upto, &len, length)) + goto finished; + } + + /* Now iterate through expecteds. */ + for (e = expect_list.next; e != &expect_list; e = e->next) { + unsigned int last_len; + struct ip_conntrack_expect *expect + = (struct ip_conntrack_expect *)e; + if (upto++ < offset) continue; + + last_len = len; + len += print_expect(buffer + len, expect); + if (len > length) { + len = last_len; + goto finished; + } + } + + finished: + READ_UNLOCK(&ip_conntrack_lock); + + /* `start' hack - see fs/proc/generic.c line ~165 */ + *start = (char *)((unsigned int)upto - offset); + return len; +} + +static unsigned int ip_refrag(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct rtable *rt = (struct rtable *)(*pskb)->dst; + + /* Local packets are never produced too large for their + interface. We degfragment them at LOCAL_OUT, however, + so we have to refragment them here. */ + if ((*pskb)->len > rt->u.dst.pmtu) { + DEBUGP("ip_conntrack: refragm %p (size %u) to %u (okfn %p)\n", + *pskb, (*pskb)->len, rt->u.dst.pmtu, okfn); + /* No hook can be after us, so this should be OK. */ + ip_fragment(*pskb, okfn); + return NF_STOLEN; + } + return NF_ACCEPT; +} + +/* Connection tracking may drop packets, but never alters them, so + make it the first hook. */ +static struct nf_hook_ops ip_conntrack_in_ops += { { NULL, NULL }, ip_conntrack_in, PF_INET, NF_IP_PRE_ROUTING, + NF_IP_PRI_CONNTRACK }; +static struct nf_hook_ops ip_conntrack_local_out_ops += { { NULL, NULL }, ip_conntrack_in, PF_INET, NF_IP_LOCAL_OUT, + NF_IP_PRI_CONNTRACK }; +/* Refragmenter; last chance. */ +static struct nf_hook_ops ip_conntrack_out_ops += { { NULL, NULL }, ip_refrag, PF_INET, NF_IP_POST_ROUTING, NF_IP_PRI_LAST }; + +static int init_or_cleanup(int init) +{ + int ret = 0; + + if (!init) goto cleanup; + + ret = ip_conntrack_init(); + if (ret < 0) + goto cleanup_nothing; + + proc_net_create("ip_conntrack",0,list_conntracks); + ret = nf_register_hook(&ip_conntrack_in_ops); + if (ret < 0) { + printk("ip_conntrack: can't register in hook.\n"); + goto cleanup_init; + } + ret = nf_register_hook(&ip_conntrack_local_out_ops); + if (ret < 0) { + printk("ip_conntrack: can't register local out hook.\n"); + goto cleanup_inops; + } + ret = nf_register_hook(&ip_conntrack_out_ops); + if (ret < 0) { + printk("ip_conntrack: can't register post-routing hook.\n"); + goto cleanup_inandlocalops; + } + + return ret; + + cleanup: + nf_unregister_hook(&ip_conntrack_out_ops); + cleanup_inandlocalops: + nf_unregister_hook(&ip_conntrack_local_out_ops); + cleanup_inops: + nf_unregister_hook(&ip_conntrack_in_ops); + cleanup_init: + proc_net_remove("ip_conntrack"); + ip_conntrack_cleanup(); + cleanup_nothing: + return ret; +} + +/* FIXME: Allow NULL functions and sub in pointers to generic for + them. --RR */ +int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto) +{ + int ret = 0; + struct list_head *i; + + WRITE_LOCK(&ip_conntrack_lock); + for (i = protocol_list.next; i != &protocol_list; i = i->next) { + if (((struct ip_conntrack_protocol *)i)->proto + == proto->proto) { + ret = -EBUSY; + goto out; + } + } + + list_prepend(&protocol_list, proto); + MOD_INC_USE_COUNT; + + out: + WRITE_UNLOCK(&ip_conntrack_lock); + return ret; +} + +/* FIXME: Implement this --RR */ +#if 0 +void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto) +{ +} +#endif + +static int __init init(void) +{ + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +module_init(init); +module_exit(fini); + +EXPORT_SYMBOL(ip_conntrack_protocol_register); +EXPORT_SYMBOL(invert_tuplepr); +EXPORT_SYMBOL(ip_conntrack_alter_reply); +EXPORT_SYMBOL(ip_conntrack_destroyed); +EXPORT_SYMBOL(ip_conntrack_get); +EXPORT_SYMBOL(ip_conntrack_module); +EXPORT_SYMBOL(ip_conntrack_helper_register); +EXPORT_SYMBOL(ip_conntrack_helper_unregister); +EXPORT_SYMBOL(ip_conntrack_lock); +EXPORT_SYMBOL(find_proto); +EXPORT_SYMBOL(get_tuple); +EXPORT_SYMBOL(ip_ct_selective_cleanup); +EXPORT_SYMBOL(ip_ct_refresh); +EXPORT_SYMBOL(ip_conntrack_expect_related); +EXPORT_SYMBOL(ip_conntrack_tuple_taken); +EXPORT_SYMBOL(ip_ct_gather_frags); diff --git a/net/ipv4/netfilter/ip_fw_compat.c b/net/ipv4/netfilter/ip_fw_compat.c new file mode 100644 index 000000000000..9edaef896fc5 --- /dev/null +++ b/net/ipv4/netfilter/ip_fw_compat.c @@ -0,0 +1,239 @@ +/* Compatibility framework for ipchains and ipfwadm support; designed + to look as much like the 2.2 infrastructure as possible. */ +struct notifier_block; + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +EXPORT_NO_SYMBOLS; + +static struct firewall_ops *fwops; + +/* From ip_fw_compat_redir.c */ +extern unsigned int +do_redirect(struct sk_buff *skb, + const struct net_device *dev, + u_int16_t redirpt); + +extern void +check_for_redirect(struct sk_buff *skb); + +extern void +check_for_unredirect(struct sk_buff *skb); + +/* From ip_fw_compat_masq.c */ +extern unsigned int +do_masquerade(struct sk_buff **pskb, const struct net_device *dev); + +extern unsigned int +check_for_demasq(struct sk_buff **pskb); + +extern int __init masq_init(void); +extern void masq_cleanup(void); + +/* They call these; we do what they want. */ +int register_firewall(int pf, struct firewall_ops *fw) +{ + if (pf != PF_INET) { + printk("Attempt to register non-IP firewall module.\n"); + return -EINVAL; + } + if (fwops) { + printk("Attempt to register multiple firewall modules.\n"); + return -EBUSY; + } + + fwops = fw; + return 0; +} + +int unregister_firewall(int pf, struct firewall_ops *fw) +{ + fwops = NULL; + return 0; +} + +static unsigned int +fw_in(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + int ret = FW_BLOCK; + u_int16_t redirpt; + + (*pskb)->nfcache |= NFC_UNKNOWN; + (*pskb)->ip_summed = CHECKSUM_NONE; + + switch (hooknum) { + case NF_IP_PRE_ROUTING: + if (fwops->fw_acct_in) + fwops->fw_acct_in(fwops, PF_INET, + (struct net_device *)in, + (*pskb)->nh.raw, &redirpt, pskb); + + if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { + *pskb = ip_ct_gather_frags(*pskb); + + if (!*pskb) + return NF_STOLEN; + } + + ret = fwops->fw_input(fwops, PF_INET, (struct net_device *)in, + (*pskb)->nh.raw, &redirpt, pskb); + break; + + case NF_IP_FORWARD: + /* Connection will only be set if it was + demasqueraded: if so, skip forward chain. */ + if ((*pskb)->nfct) + ret = FW_ACCEPT; + else ret = fwops->fw_forward(fwops, PF_INET, + (struct net_device *)out, + (*pskb)->nh.raw, &redirpt, pskb); + break; + + case NF_IP_POST_ROUTING: + ret = fwops->fw_output(fwops, PF_INET, + (struct net_device *)out, + (*pskb)->nh.raw, &redirpt, pskb); + if (fwops->fw_acct_out && (ret == FW_ACCEPT || ret == FW_SKIP)) + fwops->fw_acct_out(fwops, PF_INET, + (struct net_device *)in, + (*pskb)->nh.raw, &redirpt, pskb); + break; + } + + switch (ret) { + case FW_REJECT: { + /* Alexey says: + * + * Generally, routing is THE FIRST thing to make, when + * packet enters IP stack. Before packet is routed you + * cannot call any service routines from IP stack. */ + struct iphdr *iph = (*pskb)->nh.iph; + + if ((*pskb)->dst != NULL + || ip_route_input(*pskb, iph->daddr, iph->saddr, iph->tos, + (struct net_device *)in) == 0) + icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, + 0); + return NF_DROP; + } + + case FW_ACCEPT: + case FW_SKIP: + if (hooknum == NF_IP_PRE_ROUTING) { + check_for_demasq(pskb); + check_for_redirect(*pskb); + } else if (hooknum == NF_IP_POST_ROUTING) + check_for_unredirect(*pskb); + + return NF_ACCEPT; + + case FW_MASQUERADE: + if (hooknum == NF_IP_FORWARD) + return do_masquerade(pskb, out); + else return NF_ACCEPT; + + case FW_REDIRECT: + if (hooknum == NF_IP_PRE_ROUTING) + return do_redirect(*pskb, in, redirpt); + else return NF_ACCEPT; + + default: + /* FW_BLOCK */ + return NF_DROP; + } +} + +extern int ip_fw_ctl(int optval, void *user, unsigned int len); + +static int sock_fn(struct sock *sk, int optval, void *user, unsigned int len) +{ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + return -ip_fw_ctl(optval, user, len); +} + +static struct nf_hook_ops preroute_ops += { { NULL, NULL }, fw_in, PF_INET, NF_IP_PRE_ROUTING, NF_IP_PRI_FILTER }; + +static struct nf_hook_ops postroute_ops += { { NULL, NULL }, fw_in, PF_INET, NF_IP_POST_ROUTING, NF_IP_PRI_FILTER }; + +static struct nf_hook_ops forward_ops += { { NULL, NULL }, fw_in, PF_INET, NF_IP_FORWARD, NF_IP_PRI_FILTER }; + +static struct nf_sockopt_ops sock_ops += { { NULL, NULL }, PF_INET, 64, 64 + 1024 + 1, &sock_fn, 0, 0, NULL, + 0, NULL }; + +extern int ipfw_init_or_cleanup(int init); + +static int init_or_cleanup(int init) +{ + int ret = 0; + + if (!init) goto cleanup; + + ret = nf_register_sockopt(&sock_ops); + + if (ret < 0) + goto cleanup_nothing; + + ret = ipfw_init_or_cleanup(1); + if (ret < 0) + goto cleanup_sockopt; + + ret = masq_init(); + if (ret < 0) + goto cleanup_ipfw; + + nf_register_hook(&preroute_ops); + nf_register_hook(&postroute_ops); + nf_register_hook(&forward_ops); + + return ret; + + cleanup: + nf_unregister_hook(&preroute_ops); + nf_unregister_hook(&postroute_ops); + nf_unregister_hook(&forward_ops); + + masq_cleanup(); + + cleanup_ipfw: + ipfw_init_or_cleanup(0); + + cleanup_sockopt: + nf_unregister_sockopt(&sock_ops); + + cleanup_nothing: + return ret; +} + +static int __init init(void) +{ + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ip_fw_compat_masq.c b/net/ipv4/netfilter/ip_fw_compat_masq.c new file mode 100644 index 000000000000..e0074c1e2cff --- /dev/null +++ b/net/ipv4/netfilter/ip_fw_compat_masq.c @@ -0,0 +1,288 @@ +/* Masquerading compatibility layer. + + Note that there are no restrictions on other programs binding to + ports 61000:65095 (in 2.0 and 2.2 they get EADDRINUSE). Just DONT + DO IT. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock) + +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +unsigned int +do_masquerade(struct sk_buff **pskb, const struct net_device *dev) +{ + struct iphdr *iph = (*pskb)->nh.iph; + struct ip_nat_info *info; + enum ip_conntrack_info ctinfo; + struct ip_conntrack *ct; + unsigned int ret; + + /* Sorry, only ICMP, TCP and UDP. */ + if (iph->protocol != IPPROTO_ICMP + && iph->protocol != IPPROTO_TCP + && iph->protocol != IPPROTO_UDP) + return NF_DROP; + + /* Feed it to connection tracking; in fact we're in NF_IP_FORWARD, + but connection tracking doesn't expect that */ + ret = ip_conntrack_in(NF_IP_POST_ROUTING, pskb, dev, NULL, NULL); + if (ret != NF_ACCEPT) { + DEBUGP("ip_conntrack_in returned %u.\n", ret); + return ret; + } + + ct = ip_conntrack_get(*pskb, &ctinfo); + + if (!ct) { + DEBUGP("ip_conntrack_in set to invalid conntrack.\n"); + return NF_DROP; + } + + info = &ct->nat.info; + + WRITE_LOCK(&ip_nat_lock); + /* Setup the masquerade, if not already */ + if (!info->initialized) { + u_int32_t newsrc; + struct rtable *rt; + struct ip_nat_multi_range range; + + /* Pass 0 instead of saddr, since it's going to be changed + anyway. */ + if (ip_route_output(&rt, iph->daddr, 0, 0, 0) != 0) { + DEBUGP("ipnat_rule_masquerade: Can't reroute.\n"); + return NF_DROP; + } + newsrc = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, + RT_SCOPE_UNIVERSE); + ip_rt_put(rt); + range = ((struct ip_nat_multi_range) + { 1, + {{IP_NAT_RANGE_MAP_IPS|IP_NAT_RANGE_PROTO_SPECIFIED, + newsrc, newsrc, + { htons(61000) }, { htons(65095) } } } }); + + ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); + place_in_hashes(ct, info); + info->initialized = 1; + } else + DEBUGP("Masquerading already done on this conn.\n"); + WRITE_UNLOCK(&ip_nat_lock); + + return do_bindings(ct, ctinfo, info, NF_IP_POST_ROUTING, pskb); +} + +unsigned int +check_for_demasq(struct sk_buff **pskb) +{ + struct ip_conntrack_tuple tuple; + struct iphdr *iph = (*pskb)->nh.iph; + struct ip_conntrack_protocol *protocol; + struct ip_conntrack_tuple_hash *h; + enum ip_conntrack_info ctinfo; + int ret; + + protocol = find_proto(iph->protocol); + + /* We don't feed packets to conntrack system unless we know + they're part of an connection already established by an + explicit masq command. */ + switch (iph->protocol) { + case IPPROTO_ICMP: + /* ICMP errors. */ + if (icmp_error_track(*pskb)) { + /* If it is valid, tranlsate it */ + if ((*pskb)->nfct) { + struct ip_conntrack *ct + = (struct ip_conntrack *) + (*pskb)->nfct->master; + enum ip_conntrack_dir dir; + + if ((*pskb)->nfct-ct->infos >= IP_CT_IS_REPLY) + dir = IP_CT_DIR_REPLY; + else + dir = IP_CT_DIR_ORIGINAL; + + icmp_reply_translation(*pskb, + ct, + NF_IP_PRE_ROUTING, + dir); + } + return NF_ACCEPT; + } + /* Fall thru... */ + case IPPROTO_TCP: + case IPPROTO_UDP: + if (!get_tuple(iph, (*pskb)->len, &tuple, protocol)) { + printk("ip_fw_compat_masq: Couldn't get tuple\n"); + return NF_ACCEPT; + } + break; + + default: + /* Not ours... */ + return NF_ACCEPT; + } + h = ip_conntrack_find_get(&tuple, NULL); + + /* MUST be found, and MUST be reply. */ + if (h && DIRECTION(h) == 1) { + ret = ip_conntrack_in(NF_IP_PRE_ROUTING, pskb, + NULL, NULL, NULL); + + /* Put back the reference gained from find_get */ + nf_conntrack_put(&h->ctrack->infos[0]); + if (ret == NF_ACCEPT) { + struct ip_conntrack *ct; + ct = ip_conntrack_get(*pskb, &ctinfo); + + if (ct) { + struct ip_nat_info *info = &ct->nat.info; + + do_bindings(ct, ctinfo, info, + NF_IP_PRE_ROUTING, + pskb); + } else + printk("ip_fw_compat_masq: conntrack" + " didn't like\n"); + } + } else { + if (h) + /* Put back the reference gained from find_get */ + nf_conntrack_put(&h->ctrack->infos[0]); + ret = NF_ACCEPT; + } + + return ret; +} + +int ip_fw_masq_timeouts(void *user, int len) +{ + printk("Sorry: masquerading timeouts set 5DAYS/2MINS/60SECS\n"); + return 0; +} + +static const char *masq_proto_name(u_int16_t protonum) +{ + switch (protonum) { + case IPPROTO_TCP: return "TCP"; + case IPPROTO_UDP: return "UDP"; + case IPPROTO_ICMP: return "ICMP"; + default: return "MORE-CAFFIENE-FOR-RUSTY"; + } +} + +static unsigned int +print_masq(char *buffer, const struct ip_conntrack *conntrack) +{ + char temp[129]; + + /* This is for backwards compatibility, but ick!. + We should never export jiffies to userspace. + */ + sprintf(temp,"%s %08X:%04X %08X:%04X %04X %08X %6d %6d %7lu", + masq_proto_name(conntrack->tuplehash[0].tuple.dst.protonum), + ntohl(conntrack->tuplehash[0].tuple.src.ip), + ntohs(conntrack->tuplehash[0].tuple.src.u.all), + ntohl(conntrack->tuplehash[0].tuple.dst.ip), + ntohs(conntrack->tuplehash[0].tuple.dst.u.all), + ntohs(conntrack->tuplehash[1].tuple.dst.u.all), + /* Sorry, no init_seq, delta or previous_delta (yet). */ + 0, 0, 0, + conntrack->timeout.expires - jiffies); + + return sprintf(buffer, "%-127s\n", temp); +} + +/* Returns true when finished. */ +static int +masq_iterate(const struct ip_conntrack_tuple_hash *hash, + char *buffer, off_t offset, off_t *upto, + unsigned int *len, unsigned int maxlen) +{ + unsigned int newlen; + + IP_NF_ASSERT(hash->ctrack); + + /* Only count originals */ + if (DIRECTION(hash)) + return 0; + + if ((*upto)++ < offset) + return 0; + + newlen = print_masq(buffer + *len, hash->ctrack); + if (*len + newlen > maxlen) + return 1; + else *len += newlen; + + return 0; +} + +/* Everything in the hash is masqueraded. */ +static int +masq_procinfo(char *buffer, char **start, off_t offset, int length) +{ + unsigned int i; + int len = 0; + off_t upto = 0; + + READ_LOCK(&ip_conntrack_lock); + /* Traverse hash; print originals then reply. */ + for (i = 0; i < ip_conntrack_htable_size; i++) { + if (LIST_FIND(&ip_conntrack_hash[i], masq_iterate, + struct ip_conntrack_tuple_hash *, + buffer, offset, &upto, &len, length)) + break; + } + READ_UNLOCK(&ip_conntrack_lock); + + /* `start' hack - see fs/proc/generic.c line ~165 */ + *start = (char *)((unsigned int)upto - offset); + return len; +} + +int __init masq_init(void) +{ + int ret; + + ret = ip_conntrack_init(); + if (ret == 0) { + ret = ip_nat_init(); + if (ret == 0) + proc_net_create("ip_masquerade", 0, masq_procinfo); + else + ip_conntrack_cleanup(); + } + + return ret; +} + +void masq_cleanup(void) +{ + ip_nat_cleanup(); + ip_conntrack_cleanup(); + proc_net_remove("ip_masquerade"); +} diff --git a/net/ipv4/netfilter/ip_fw_compat_redir.c b/net/ipv4/netfilter/ip_fw_compat_redir.c new file mode 100644 index 000000000000..2624c26e53bc --- /dev/null +++ b/net/ipv4/netfilter/ip_fw_compat_redir.c @@ -0,0 +1,283 @@ +/* This is a file to handle the "simple" NAT cases (redirect and + masquerade) required for the compatibility layer. + + `bind to foreign address' and `getpeername' hacks are not + supported. + + FIXME: Timing is overly simplistic. If anyone complains, make it + use conntrack. +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static DECLARE_LOCK(redir_lock); +#define ASSERT_READ_LOCK(x) MUST_BE_LOCKED(&redir_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_LOCKED(&redir_lock) + +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +#ifdef CONFIG_NETFILTER_DEBUG +#define IP_NF_ASSERT(x) \ +do { \ + if (!(x)) \ + /* Wooah! I'm tripping my conntrack in a frenzy of \ + netplay... */ \ + printk("ASSERT: %s:%i(%s)\n", \ + __FILE__, __LINE__, __FUNCTION__); \ +} while(0); +#else +#define IP_NF_ASSERT(x) +#endif + +static u_int16_t +cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) +{ + u_int32_t diffs[] = { oldvalinv, newval }; + return csum_fold(csum_partial((char *)diffs, sizeof(diffs), + oldcheck^0xFFFF)); +} + +struct redir_core { + u_int32_t orig_srcip, orig_dstip; + u_int16_t orig_sport, orig_dport; + + u_int32_t new_dstip; + u_int16_t new_dport; +}; + +struct redir +{ + struct list_head list; + struct redir_core core; + struct timer_list destroyme; +}; + +static LIST_HEAD(redirs); + +static int +redir_cmp(const struct redir *i, + u_int32_t orig_srcip, u_int32_t orig_dstip, + u_int16_t orig_sport, u_int16_t orig_dport) +{ + return (i->core.orig_srcip == orig_srcip + && i->core.orig_dstip == orig_dstip + && i->core.orig_sport == orig_sport + && i->core.orig_dport == orig_dport); +} + +/* Search for an existing redirection of the TCP packet. */ +static struct redir * +find_redir(u_int32_t orig_srcip, u_int32_t orig_dstip, + u_int16_t orig_sport, u_int16_t orig_dport) +{ + return LIST_FIND(&redirs, redir_cmp, struct redir *, + orig_srcip, orig_dstip, orig_sport, orig_dport); +} + +static void do_tcp_redir(struct sk_buff *skb, struct redir *redir) +{ + struct iphdr *iph = skb->nh.iph; + struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + + iph->ihl); + + tcph->check = cheat_check(~redir->core.orig_dstip, + redir->core.new_dstip, + cheat_check(redir->core.orig_dport ^ 0xFFFF, + redir->core.new_dport, + tcph->check)); + iph->check = cheat_check(~redir->core.orig_dstip, + redir->core.new_dstip, iph->check); + tcph->dest = redir->core.new_dport; + iph->daddr = redir->core.new_dstip; + + skb->nfcache |= NFC_ALTERED; +} + +static int +unredir_cmp(const struct redir *i, + u_int32_t new_dstip, u_int32_t orig_srcip, + u_int16_t new_dport, u_int16_t orig_sport) +{ + return (i->core.orig_srcip == orig_srcip + && i->core.new_dstip == new_dstip + && i->core.orig_sport == orig_sport + && i->core.new_dport == new_dport); +} + +/* Match reply packet against redir */ +static struct redir * +find_unredir(u_int32_t new_dstip, u_int32_t orig_srcip, + u_int16_t new_dport, u_int16_t orig_sport) +{ + return LIST_FIND(&redirs, unredir_cmp, struct redir *, + new_dstip, orig_srcip, new_dport, orig_sport); +} + +/* `unredir' a reply packet. */ +static void do_tcp_unredir(struct sk_buff *skb, struct redir *redir) +{ + struct iphdr *iph = skb->nh.iph; + struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + + iph->ihl); + + tcph->check = cheat_check(~redir->core.new_dstip, + redir->core.orig_dstip, + cheat_check(redir->core.new_dport ^ 0xFFFF, + redir->core.orig_dport, + tcph->check)); + iph->check = cheat_check(~redir->core.new_dstip, + redir->core.orig_dstip, + iph->check); + tcph->source = redir->core.orig_dport; + iph->saddr = redir->core.orig_dstip; + + skb->nfcache |= NFC_ALTERED; +} + +/* REDIRECT a packet. */ +unsigned int +do_redirect(struct sk_buff *skb, + const struct net_device *dev, + u_int16_t redirpt) +{ + struct iphdr *iph = skb->nh.iph; + u_int32_t newdst; + + /* Figure out address: not loopback. */ + if (!dev) + return NF_DROP; + + /* Grab first address on interface. */ + newdst = ((struct in_device *)dev->ip_ptr)->ifa_list->ifa_local; + + switch (iph->protocol) { + case IPPROTO_UDP: { + /* Simple mangle. */ + struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + + iph->ihl); + + udph->check = cheat_check(~iph->daddr, newdst, + cheat_check(udph->dest ^ 0xFFFF, + redirpt, + udph->check)); + iph->check = cheat_check(~iph->daddr, newdst, iph->check); + udph->dest = redirpt; + iph->daddr = newdst; + + skb->nfcache |= NFC_ALTERED; + return NF_ACCEPT; + } + case IPPROTO_TCP: { + /* Mangle, maybe record. */ + struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + + iph->ihl); + struct redir *redir; + int ret; + + DEBUGP("Doing tcp redirect. %08X:%u %08X:%u -> %08X:%u\n", + iph->saddr, tcph->source, iph->daddr, tcph->dest, + newdst, redirpt); + LOCK_BH(&redir_lock); + redir = find_redir(iph->saddr, iph->daddr, + tcph->source, tcph->dest); + + if (!redir) { + redir = kmalloc(sizeof(struct redir), GFP_ATOMIC); + if (!redir) { + ret = NF_DROP; + goto out; + } + list_prepend(&redirs, redir); + init_timer(&redir->destroyme); + } + /* In case mangling has changed, rewrite this part. */ + redir->core = ((struct redir_core) + { iph->saddr, iph->daddr, + tcph->source, tcph->dest, + newdst, redirpt }); + do_tcp_redir(skb, redir); + ret = NF_ACCEPT; + + out: + UNLOCK_BH(&redir_lock); + return ret; + } + + default: /* give up if not TCP or UDP. */ + return NF_DROP; + } +} + +static void destroyme(unsigned long me) +{ + LOCK_BH(&redir_lock); + LIST_DELETE(&redirs, (struct redir *)me); + UNLOCK_BH(&redir_lock); +} + +/* Incoming packet: is it a reply to a masqueraded connection, or + part of an already-redirected TCP connection? */ +void +check_for_redirect(struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + + iph->ihl); + struct redir *redir; + + if (iph->protocol != IPPROTO_TCP) + return; + + LOCK_BH(&redir_lock); + redir = find_redir(iph->saddr, iph->daddr, tcph->source, tcph->dest); + if (redir) { + DEBUGP("Doing tcp redirect again.\n"); + do_tcp_redir(skb, redir); + if (tcph->rst || tcph->fin) { + redir->destroyme.function = destroyme; + redir->destroyme.data = (unsigned long)redir; + mod_timer(&redir->destroyme, 75*HZ); + } + } + UNLOCK_BH(&redir_lock); +} + +void +check_for_unredirect(struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + + iph->ihl); + struct redir *redir; + + if (iph->protocol != IPPROTO_TCP) + return; + + LOCK_BH(&redir_lock); + redir = find_unredir(iph->saddr, iph->daddr, tcph->source, tcph->dest); + if (redir) { + DEBUGP("Doing tcp unredirect.\n"); + do_tcp_unredir(skb, redir); + if (tcph->rst || tcph->fin) { + redir->destroyme.function = destroyme; + redir->destroyme.data = (unsigned long)redir; + mod_timer(&redir->destroyme, 75*HZ); + } + } + UNLOCK_BH(&redir_lock); +} diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c new file mode 100644 index 000000000000..996e5a7fffc8 --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -0,0 +1,855 @@ +/* NAT for netfilter; shared with compatibility layer. */ + +/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General + Public Licence. */ +#ifdef MODULE +#define __NO_VERSION__ +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* For tcp_prot in getorigdst */ + +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) + +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +DECLARE_RWLOCK(ip_nat_lock); + +#define IP_NAT_HTABLE_SIZE 64 + +static struct list_head bysource[IP_NAT_HTABLE_SIZE]; +static struct list_head byipsproto[IP_NAT_HTABLE_SIZE]; +LIST_HEAD(protos); +static LIST_HEAD(helpers); + +extern struct ip_nat_protocol unknown_nat_protocol; + +/* We keep extra hashes for each conntrack, for fast searching. */ +static inline size_t +hash_by_ipsproto(u_int32_t src, u_int32_t dst, u_int16_t proto) +{ + /* Modified src and dst, to ensure we don't create two + identical streams. */ + return (src + dst + proto) % IP_NAT_HTABLE_SIZE; +} + +static inline size_t +hash_by_src(const struct ip_conntrack_manip *manip, u_int16_t proto) +{ + /* Original src, to ensure we map it consistently if poss. */ + return (manip->ip + manip->u.all + proto) % IP_NAT_HTABLE_SIZE; +} + +/* Noone using conntrack by the time this called. */ +static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn) +{ + struct ip_nat_info *info = &conn->nat.info; + + if (!info->initialized) + return; + + IP_NF_ASSERT(info->bysource.conntrack); + IP_NF_ASSERT(info->byipsproto.conntrack); + + WRITE_LOCK(&ip_nat_lock); + LIST_DELETE(&bysource[hash_by_src(&conn->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src, + conn->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum)], + &info->bysource); + + LIST_DELETE(&byipsproto + [hash_by_ipsproto(conn->tuplehash[IP_CT_DIR_REPLY] + .tuple.src.ip, + conn->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.ip, + conn->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.protonum)], + &info->byipsproto); + WRITE_UNLOCK(&ip_nat_lock); +} + +/* We do checksum mangling, so if they were wrong before they're still + * wrong. Also works for incomplete packets (eg. ICMP dest + * unreachables.) */ +u_int16_t +ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) +{ + u_int32_t diffs[] = { oldvalinv, newval }; + return csum_fold(csum_partial((char *)diffs, sizeof(diffs), + oldcheck^0xFFFF)); +} + +static inline int cmp_proto(const struct ip_nat_protocol *i, int proto) +{ + return i->protonum == proto; +} + +struct ip_nat_protocol * +find_nat_proto(u_int16_t protonum) +{ + struct ip_nat_protocol *i; + + MUST_BE_READ_LOCKED(&ip_nat_lock); + i = LIST_FIND(&protos, cmp_proto, struct ip_nat_protocol *, protonum); + if (!i) + i = &unknown_nat_protocol; + return i; +} + +/* Is this tuple already taken? (not by us) */ +int +ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + /* Conntrack tracking doesn't keep track of outgoing tuples; only + incoming ones. NAT means they don't have a fixed mapping, + so we invert the tuple and look for the incoming reply. + + We could keep a separate hash if this proves too slow. */ + struct ip_conntrack_tuple reply; + + invert_tuplepr(&reply, tuple); + return ip_conntrack_tuple_taken(&reply, ignored_conntrack); +} + +/* Does tuple + the source manip come within the range mr */ +static int +in_range(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_manip *manip, + const struct ip_nat_multi_range *mr) +{ + struct ip_nat_protocol *proto = find_nat_proto(tuple->dst.protonum); + unsigned int i; + struct ip_conntrack_tuple newtuple = { *manip, tuple->dst }; + + for (i = 0; i < mr->rangesize; i++) { + /* If we are allowed to map IPs, then we must be in the + range specified, otherwise we must be unchanged. */ + if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) { + if (ntohl(newtuple.src.ip) < ntohl(mr->range[i].min_ip) + || (ntohl(newtuple.src.ip) + > ntohl(mr->range[i].max_ip))) + continue; + } else { + if (newtuple.src.ip != tuple->src.ip) + continue; + } + + if ((mr->range[i].flags & IP_NAT_RANGE_PROTO_SPECIFIED) + && proto->in_range(&newtuple, IP_NAT_MANIP_SRC, + &mr->range[i].min, &mr->range[i].max)) + return 1; + } + return 0; +} + +static inline int +src_cmp(const struct ip_nat_hash *i, + const struct ip_conntrack_tuple *tuple, + const struct ip_nat_multi_range *mr) +{ + return (i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum + == tuple->dst.protonum + && i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip + == tuple->src.ip + && i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all + == tuple->src.u.all + && in_range(tuple, + &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src, + mr)); +} + +/* Only called for SRC manip */ +static struct ip_conntrack_manip * +find_appropriate_src(const struct ip_conntrack_tuple *tuple, + const struct ip_nat_multi_range *mr) +{ + unsigned int h = hash_by_src(&tuple->src, tuple->dst.protonum); + struct ip_nat_hash *i; + + MUST_BE_READ_LOCKED(&ip_nat_lock); + i = LIST_FIND(&bysource[h], src_cmp, struct ip_nat_hash *, tuple, mr); + if (i) + return &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src; + else + return NULL; +} + +/* If it's really a local destination manip, it may need to do a + source manip too. */ +static int +do_extra_mangle(u_int32_t var_ip, u_int32_t *other_ipp) +{ + struct rtable *rt; + + /* FIXME: IPTOS_TOS(iph->tos) --RR */ + if (ip_route_output(&rt, var_ip, 0, 0, 0) != 0) { + DEBUGP("do_extra_mangle: Can't get route to %u.%u.%u.%u\n", + IP_PARTS(var_ip)); + return 0; + } + + *other_ipp = rt->rt_src; + ip_rt_put(rt); + return 1; +} + +/* Simple way to iterate through all. */ +static inline int fake_cmp(const struct ip_nat_hash *i, + u_int32_t src, u_int32_t dst, u_int16_t protonum, + unsigned int *score, + const struct ip_conntrack *conntrack) +{ + /* Compare backwards: we're dealing with OUTGOING tuples, and + inside the conntrack is the REPLY tuple. Don't count this + conntrack. */ + if (i->conntrack != conntrack + && i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip == dst + && i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip == src + && (i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum + == protonum)) + (*score)++; + return 0; +} + +static inline unsigned int +count_maps(u_int32_t src, u_int32_t dst, u_int16_t protonum, + const struct ip_conntrack *conntrack) +{ + unsigned int score = 0; + + MUST_BE_READ_LOCKED(&ip_nat_lock); + LIST_FIND(&byipsproto[hash_by_ipsproto(src, dst, protonum)], + fake_cmp, struct ip_nat_hash *, src, dst, protonum, &score, + conntrack); + + return score; +} + +/* For [FUTURE] fragmentation handling, we want the least-used + src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus + if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports + 1-65535, we don't do pro-rata allocation based on ports; we choose + the ip with the lowest src-ip/dst-ip/proto usage. + + If an allocation then fails (eg. all 6 ports used in the 1.2.3.4 + range), we eliminate that and try again. This is not the most + efficient approach, but if you're worried about that, don't hand us + ranges you don't really have. */ +static struct ip_nat_range * +find_best_ips_proto(struct ip_conntrack_tuple *tuple, + const struct ip_nat_multi_range *mr, + const struct ip_conntrack *conntrack, + unsigned int hooknum) +{ + unsigned int i; + struct { + const struct ip_nat_range *range; + unsigned int score; + struct ip_conntrack_tuple tuple; + } best = { NULL, 0xFFFFFFFF }; + u_int32_t *var_ipp, *other_ipp, saved_ip; + + if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) { + var_ipp = &tuple->src.ip; + saved_ip = tuple->dst.ip; + other_ipp = &tuple->dst.ip; + } else { + var_ipp = &tuple->dst.ip; + saved_ip = tuple->src.ip; + other_ipp = &tuple->src.ip; + } + + IP_NF_ASSERT(mr->rangesize >= 1); + for (i = 0; i < mr->rangesize; i++) { + u_int32_t minip, maxip; + + /* Don't do ranges which are already eliminated. */ + if (mr->range[i].flags & IP_NAT_RANGE_FULL) { + continue; + } + + if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) { + minip = mr->range[i].min_ip; + maxip = mr->range[i].max_ip; + } else + minip = maxip = *var_ipp; + + for (*var_ipp = minip; + ntohl(*var_ipp) <= ntohl(maxip); + *var_ipp = htonl(ntohl(*var_ipp) + 1)) { + unsigned int score; + + /* Reset the other ip in case it was mangled by + * do_extra_mangle last time. */ + *other_ipp = saved_ip; + + if (hooknum == NF_IP_LOCAL_OUT + && !do_extra_mangle(*var_ipp, other_ipp)) { + DEBUGP("Range %u %u.%u.%u.%u rt failed!\n", + i, IP_PARTS(*var_ipp)); + /* Can't route? This whole range part is + * probably screwed, but keep trying + * anyway. */ + continue; + } + + /* Count how many others map onto this. */ + score = count_maps(tuple->src.ip, tuple->dst.ip, + tuple->dst.protonum, conntrack); + if (score < best.score) { + /* Optimization: doesn't get any better than + this. */ + if (score == 0) + return (struct ip_nat_range *) + &mr->range[i]; + + best.score = score; + best.tuple = *tuple; + best.range = &mr->range[i]; + } + } + } + *tuple = best.tuple; + + /* Discard const. */ + return (struct ip_nat_range *)best.range; +} + +static int +get_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig_tuple, + const struct ip_nat_multi_range *mrr, + struct ip_conntrack *conntrack, + unsigned int hooknum) +{ + struct ip_nat_protocol *proto + = find_nat_proto(orig_tuple->dst.protonum); + struct ip_nat_range *rptr; + unsigned int i; + int ret; + + /* We temporarily use flags for marking full parts, but we + always clean up afterwards */ + struct ip_nat_multi_range *mr = (void *)mrr; + + /* 1) If this srcip/proto/src-proto-part is currently mapped, + and that same mapping gives a unique tuple within the given + range, use that. + + This is only required for source (ie. NAT/masq) mappings. + So far, we don't do local source mappings, so multiple + manips not an issue. */ + if (hooknum == NF_IP_POST_ROUTING) { + struct ip_conntrack_manip *manip; + + manip = find_appropriate_src(orig_tuple, mr); + if (manip) { + /* Apply same source manipulation. */ + *tuple = ((struct ip_conntrack_tuple) + { *manip, orig_tuple->dst }); + DEBUGP("get_unique_tuple: Found current src map\n"); + return 1; + } + } + + /* 2) Select the least-used IP/proto combination in the given + range. + */ + *tuple = *orig_tuple; + while ((rptr = find_best_ips_proto(tuple, mr, conntrack, hooknum)) + != NULL) { + DEBUGP("Found best for "); DUMP_TUPLE(tuple); + /* 3) The per-protocol part of the manip is made to + map into the range to make a unique tuple. */ + + /* Only bother mapping if it's not already in range + and unique */ + if ((!(rptr->flags & IP_NAT_RANGE_PROTO_SPECIFIED) + || proto->in_range(tuple, HOOK2MANIP(hooknum), + &rptr->min, &rptr->max)) + && !ip_nat_used_tuple(tuple, conntrack)) { + ret = 1; + goto clear_fulls; + } else { + if (proto->unique_tuple(tuple, rptr, + HOOK2MANIP(hooknum), + conntrack)) { + /* Must be unique. */ + IP_NF_ASSERT(!ip_nat_used_tuple(tuple, + conntrack)); + ret = 1; + goto clear_fulls; + } + DEBUGP("Protocol can't get unique tuple.\n"); + } + + /* Eliminate that from range, and try again. */ + rptr->flags |= IP_NAT_RANGE_FULL; + *tuple = *orig_tuple; + } + + ret = 0; + + clear_fulls: + /* Clear full flags. */ + IP_NF_ASSERT(mr->rangesize >= 1); + for (i = 0; i < mr->rangesize; i++) + mr->range[i].flags &= ~IP_NAT_RANGE_FULL; + + return ret; +} + +static inline int +helper_cmp(const struct ip_nat_helper *helper, + u_int16_t protocol, + u_int16_t protocol_dst) +{ + return (protocol == helper->protocol + && protocol_dst == helper->protocol_dst); +} + +/* Where to manip the reply packets (will be reverse manip). */ +static unsigned int opposite_hook[NF_IP_NUMHOOKS] += { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING, + [NF_IP_POST_ROUTING] = NF_IP_PRE_ROUTING, + [NF_IP_LOCAL_OUT] = NF_IP_PRE_ROUTING +}; + +unsigned int +ip_nat_setup_info(struct ip_conntrack *conntrack, + const struct ip_nat_multi_range *mr, + unsigned int hooknum) +{ + struct ip_conntrack_tuple new_tuple, inv_tuple, reply; + struct ip_conntrack_tuple orig_tp; + struct ip_nat_info *info = &conntrack->nat.info; + + MUST_BE_WRITE_LOCKED(&ip_nat_lock); + IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING + || hooknum == NF_IP_POST_ROUTING + || hooknum == NF_IP_LOCAL_OUT); + IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS); + + /* What we've got will look like inverse of reply. Normally + this is what is in the conntrack, except for prior + manipulations (future optimization: if num_manips == 0, + orig_tp = + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ + invert_tuplepr(&orig_tp, + &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple); + +#if 0 + { + unsigned int i; + + DEBUGP("Hook %u (%s), ", hooknum, + HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST"); + DUMP_TUPLE(&orig_tp); + DEBUGP("Range %p: ", mr); + for (i = 0; i < mr->rangesize; i++) { + DEBUGP("%u:%s%s%s %u.%u.%u.%u - %u.%u.%u.%u %u - %u\n", + i, + (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) + ? " MAP_IPS" : "", + (mr->range[i].flags + & IP_NAT_RANGE_PROTO_SPECIFIED) + ? " PROTO_SPECIFIED" : "", + (mr->range[i].flags & IP_NAT_RANGE_FULL) + ? " FULL" : "", + IP_PARTS(mr->range[i].min_ip), + IP_PARTS(mr->range[i].max_ip), + mr->range[i].min.all, + mr->range[i].max.all); + } + } +#endif + + do { + if (!get_unique_tuple(&new_tuple, &orig_tp, mr, conntrack, + hooknum)) { + DEBUGP("ip_nat_setup_info: Can't get unique for %p.\n", + conntrack); + return NF_DROP; + } + +#if 0 + DEBUGP("Hook %u (%s) %p\n", hooknum, + HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST", + conntrack); + DEBUGP("Original: "); + DUMP_TUPLE(&orig_tp); + DEBUGP("New: "); + DUMP_TUPLE(&new_tuple); +#endif + + /* We now have two tuples (SRCIP/SRCPT/DSTIP/DSTPT): + the original (A/B/C/D') and the mangled one (E/F/G/H'). + + We're only allowed to work with the SRC per-proto + part, so we create inverses of both to start, then + derive the other fields we need. */ + + /* Reply connection: simply invert the new tuple + (G/H/E/F') */ + invert_tuplepr(&reply, &new_tuple); + + /* Alter conntrack table so it recognizes replies. + If fail this race (reply tuple now used), repeat. */ + } while (!ip_conntrack_alter_reply(conntrack, &reply)); + + /* FIXME: We can simply used existing conntrack reply tuple + here --RR */ + /* Create inverse of original: C/D/A/B' */ + invert_tuplepr(&inv_tuple, &orig_tp); + + /* Has source changed?. */ + if (memcmp(&new_tuple.src, &orig_tp.src, sizeof(new_tuple.src)) + != 0) { + /* In this direction, a source manip. */ + info->manips[info->num_manips++] = + ((struct ip_nat_info_manip) + { IP_CT_DIR_ORIGINAL, hooknum, + IP_NAT_MANIP_SRC, new_tuple.src }); + + IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS); + + /* In the reverse direction, a destination manip. */ + info->manips[info->num_manips++] = + ((struct ip_nat_info_manip) + { IP_CT_DIR_REPLY, opposite_hook[hooknum], + IP_NAT_MANIP_DST, orig_tp.src }); + IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS); + } + + /* Has destination changed? */ + if (memcmp(&new_tuple.dst, &orig_tp.dst, sizeof(new_tuple.dst)) + != 0) { + /* In this direction, a destination manip */ + info->manips[info->num_manips++] = + ((struct ip_nat_info_manip) + { IP_CT_DIR_ORIGINAL, hooknum, + IP_NAT_MANIP_DST, reply.src }); + + IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS); + + /* In the reverse direction, a source manip. */ + info->manips[info->num_manips++] = + ((struct ip_nat_info_manip) + { IP_CT_DIR_REPLY, opposite_hook[hooknum], + IP_NAT_MANIP_SRC, inv_tuple.src }); + IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS); + } + + /* If there's a helper, assign it; based on new tuple. */ + info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *, + new_tuple.dst.protonum, + new_tuple.dst.u.all); + + /* It's done. */ + info->initialized |= (1 << HOOK2MANIP(hooknum)); + return NF_ACCEPT; +} + +void replace_in_hashes(struct ip_conntrack *conntrack, + struct ip_nat_info *info) +{ + /* Source has changed, so replace in hashes. */ + unsigned int srchash + = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src, + conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + /* We place packet as seen OUTGOUNG in byips_proto hash + (ie. reverse dst and src of reply packet. */ + unsigned int ipsprotohash + = hash_by_ipsproto(conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.ip, + conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.src.ip, + conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.protonum); + + IP_NF_ASSERT(info->bysource.conntrack == conntrack); + MUST_BE_WRITE_LOCKED(&ip_nat_lock); + + list_del(&info->bysource.list); + list_del(&info->byipsproto.list); + + list_prepend(&bysource[srchash], &info->bysource); + list_prepend(&byipsproto[ipsprotohash], &info->byipsproto); +} + +void place_in_hashes(struct ip_conntrack *conntrack, + struct ip_nat_info *info) +{ + unsigned int srchash + = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src, + conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + /* We place packet as seen OUTGOUNG in byips_proto hash + (ie. reverse dst and src of reply packet. */ + unsigned int ipsprotohash + = hash_by_ipsproto(conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.ip, + conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.src.ip, + conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.protonum); + + IP_NF_ASSERT(!info->bysource.conntrack); + + MUST_BE_WRITE_LOCKED(&ip_nat_lock); + info->byipsproto.conntrack = conntrack; + info->bysource.conntrack = conntrack; + + list_prepend(&bysource[srchash], &info->bysource); + list_prepend(&byipsproto[ipsprotohash], &info->byipsproto); +} + +static void +manip_pkt(u_int16_t proto, struct iphdr *iph, size_t len, + const struct ip_conntrack_manip *manip, + enum ip_nat_manip_type maniptype) +{ + find_nat_proto(proto)->manip_pkt(iph, len, manip, maniptype); + + if (maniptype == IP_NAT_MANIP_SRC) { + iph->check = ip_nat_cheat_check(~iph->saddr, manip->ip, + iph->check); + iph->saddr = manip->ip; + } else { + iph->check = ip_nat_cheat_check(~iph->daddr, manip->ip, + iph->check); + iph->daddr = manip->ip; + } +#if 0 + if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) + DEBUGP("IP: checksum on packet bad.\n"); + + if (proto == IPPROTO_TCP) { + void *th = (u_int32_t *)iph + iph->ihl; + if (tcp_v4_check(th, len - 4*iph->ihl, iph->saddr, iph->daddr, + csum_partial((char *)th, len-4*iph->ihl, 0))) + DEBUGP("TCP: checksum on packet bad\n"); + } +#endif +} + +/* Do packet manipulations according to binding. */ +unsigned int +do_bindings(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct ip_nat_info *info, + unsigned int hooknum, + struct sk_buff **pskb) +{ + unsigned int i; + struct ip_nat_helper *helper; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + /* Need nat lock to protect against modification, but neither + conntrack (referenced) and helper (deleted with + synchronize_bh()) can vanish. */ + READ_LOCK(&ip_nat_lock); + for (i = 0; i < info->num_manips; i++) { + if (info->manips[i].direction == dir + && info->manips[i].hooknum == hooknum) { + DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n", + *pskb, + info->manips[i].maniptype == IP_NAT_MANIP_SRC + ? "SRC" : "DST", + IP_PARTS(info->manips[i].manip.ip), + htons(info->manips[i].manip.u.all)); + manip_pkt((*pskb)->nh.iph->protocol, + (*pskb)->nh.iph, + (*pskb)->len, + &info->manips[i].manip, + info->manips[i].maniptype); + } + } + helper = info->helper; + READ_UNLOCK(&ip_nat_lock); + + if (helper) { + /* Always defragged for helpers */ + IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off + & __constant_htons(IP_MF|IP_OFFSET))); + return helper->help(ct, info, ctinfo, hooknum, pskb); + } else return NF_ACCEPT; +} + +void +icmp_reply_translation(struct sk_buff *skb, + struct ip_conntrack *conntrack, + unsigned int hooknum, + int dir) +{ + struct iphdr *iph = skb->nh.iph; + struct icmphdr *hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl); + struct iphdr *inner = (struct iphdr *)(hdr + 1); + size_t datalen = skb->len - ((void *)inner - (void *)iph); + unsigned int i; + struct ip_nat_info *info = &conntrack->nat.info; + + IP_NF_ASSERT(skb->len >= iph->ihl*4 + sizeof(struct icmphdr)); + + DEBUGP("icmp_reply_translation: translating error %p hook %u dir %s\n", + skb, hooknum, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); + /* Note: May not be from a NAT'd host, but probably safest to + do translation always as if it came from the host itself + (even though a "host unreachable" coming from the host + itself is a bit wierd). + + More explanation: some people use NAT for anonomizing. + Also, CERT recommends dropping all packets from private IP + addresses (although ICMP errors from internal links with + such addresses are not too uncommon, as Alan Cox points + out) */ + + READ_LOCK(&ip_nat_lock); + for (i = 0; i < info->num_manips; i++) { + DEBUGP("icmp_reply: manip %u dir %s hook %u\n", + i, info->manips[i].direction == IP_CT_DIR_ORIGINAL ? + "ORIG" : "REPLY", info->manips[i].hooknum); + /* Mapping the inner packet is just like a normal + packet in the other direction, except it was never + src/dst reversed, so where we would normally apply + a dst manip, we reply a src, and vice versa. */ + if (info->manips[i].direction != dir + && info->manips[i].hooknum == opposite_hook[hooknum]) { + DEBUGP("icmp_reply: inner %s -> %u.%u.%u.%u %u\n", + info->manips[i].maniptype == IP_NAT_MANIP_SRC + ? "DST" : "SRC", + IP_PARTS(info->manips[i].manip.ip), + ntohs(info->manips[i].manip.u.udp.port)); + manip_pkt(inner->protocol, inner, + skb->len - ((void *)inner - (void *)iph), + &info->manips[i].manip, + !info->manips[i].maniptype); + } + /* Outer packet needs to have IP header NATed like + it's a reply. */ + else if (info->manips[i].direction != dir + && info->manips[i].hooknum == hooknum) { + /* Use mapping to map outer packet: 0 give no + per-proto mapping */ + DEBUGP("icmp_reply: outer %s %u.%u.%u.%u\n", + info->manips[i].maniptype == IP_NAT_MANIP_SRC + ? "SRC" : "DST", + IP_PARTS(info->manips[i].manip.ip)); + manip_pkt(0, iph, skb->len, + &info->manips[i].manip, + info->manips[i].maniptype); + } + } + READ_UNLOCK(&ip_nat_lock); + + /* Since we mangled inside ICMP packet, recalculate its + checksum from scratch. (Hence the handling of incorrect + checksums in conntrack, so we don't accidentally fix one.) */ + hdr->checksum = 0; + hdr->checksum = ip_compute_csum((unsigned char *)hdr, + sizeof(*hdr) + datalen); +} + +int ip_nat_helper_register(struct ip_nat_helper *me) +{ + int ret = 0; + + WRITE_LOCK(&ip_nat_lock); + if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *, + me->protocol, me->protocol_dst)) + ret = -EBUSY; + else { + list_prepend(&helpers, me); + MOD_INC_USE_COUNT; + } + WRITE_UNLOCK(&ip_nat_lock); + + return ret; +} + +static int +kill_helper(const struct ip_conntrack *i, void *helper) +{ + int ret; + + READ_LOCK(&ip_nat_lock); + ret = (i->nat.info.helper == helper); + READ_UNLOCK(&ip_nat_lock); + + return ret; +} + +void ip_nat_helper_unregister(struct ip_nat_helper *me) +{ + WRITE_LOCK(&ip_nat_lock); + LIST_DELETE(&helpers, me); + WRITE_UNLOCK(&ip_nat_lock); + + /* Someone could be still looking at the helper in a bh. */ + br_write_lock_bh(BR_NETPROTO_LOCK); + br_write_unlock_bh(BR_NETPROTO_LOCK); + + /* Find anything using it, and umm, kill them. We can't turn + them into normal connections: if we've adjusted SYNs, then + they'll ackstorm. So we just drop it. We used to just + bump module count when a connection existed, but that + forces admins to gen fake RSTs or bounce box, either of + which is just a long-winded way of making things + worse. --RR */ + ip_ct_selective_cleanup(kill_helper, me); + + MOD_DEC_USE_COUNT; +} + +int __init ip_nat_init(void) +{ + size_t i; + + /* Sew in builtin protocols. */ + WRITE_LOCK(&ip_nat_lock); + list_append(&protos, &ip_nat_protocol_tcp); + list_append(&protos, &ip_nat_protocol_udp); + list_append(&protos, &ip_nat_protocol_icmp); + WRITE_UNLOCK(&ip_nat_lock); + + for (i = 0; i < IP_NAT_HTABLE_SIZE; i++) { + INIT_LIST_HEAD(&bysource[i]); + INIT_LIST_HEAD(&byipsproto[i]); + } + + /* FIXME: Man, this is a hack. */ + IP_NF_ASSERT(ip_conntrack_destroyed == NULL); + ip_conntrack_destroyed = &ip_nat_cleanup_conntrack; + + return 0; +} + +void ip_nat_cleanup(void) +{ + ip_conntrack_destroyed = NULL; +} diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c new file mode 100644 index 000000000000..8252e6d9b1c6 --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_ftp.c @@ -0,0 +1,403 @@ +/* FTP extension for TCP NAT alteration. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +EXPORT_NO_SYMBOLS; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/* FIXME: Time out? --RR */ + +static int +ftp_nat_expected(struct sk_buff **pskb, + unsigned int hooknum, + struct ip_conntrack *ct, + struct ip_nat_info *info, + struct ip_conntrack *master, + struct ip_nat_info *masterinfo, + unsigned int *verdict) +{ + struct ip_nat_multi_range mr; + u_int32_t newdstip, newsrcip, newip; + struct ip_ct_ftp *ftpinfo; + + IP_NF_ASSERT(info); + IP_NF_ASSERT(master); + IP_NF_ASSERT(masterinfo); + + IP_NF_ASSERT(!(info->initialized & (1<help.ct_ftp_info; + + LOCK_BH(&ip_ftp_lock); + if (!ftpinfo->is_ftp) { + UNLOCK_BH(&ip_ftp_lock); + DEBUGP("nat_expected: master not ftp\n"); + return 0; + } + + if (ftpinfo->ftptype == IP_CT_FTP_PORT) { + /* PORT command: make connection go to the client. */ + newdstip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; + newsrcip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + DEBUGP("nat_expected: PORT cmd. %u.%u.%u.%u->%u.%u.%u.%u\n", + IP_PARTS(newsrcip), IP_PARTS(newdstip)); + } else { + /* PASV command: make the connection go to the server */ + newdstip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; + newsrcip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + DEBUGP("nat_expected: PASV cmd. %u.%u.%u.%u->%u.%u.%u.%u\n", + IP_PARTS(newsrcip), IP_PARTS(newdstip)); + } + UNLOCK_BH(&ip_ftp_lock); + + if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) + newip = newsrcip; + else + newip = newdstip; + + DEBUGP("nat_expected: IP to %u.%u.%u.%u\n", IP_PARTS(newip)); + + mr.rangesize = 1; + /* We don't want to manip the per-protocol, just the IPs. */ + mr.range[0].flags = IP_NAT_RANGE_MAP_IPS; + mr.range[0].min_ip = mr.range[0].max_ip = newip; + + *verdict = ip_nat_setup_info(ct, &mr, hooknum); + + return 1; +} + +/* This is interesting. We simply use the port given us by the client + or server. In practice it's extremely unlikely to clash; if it + does, the rule won't be able to get a unique tuple and will drop + the packets. */ +static int +mangle_packet(struct sk_buff **pskb, + u_int32_t newip, + u_int16_t port, + unsigned int matchoff, + unsigned int matchlen, + struct ip_nat_ftp_info *this_way, + struct ip_nat_ftp_info *other_way) +{ + struct iphdr *iph = (*pskb)->nh.iph; + struct tcphdr *tcph; + unsigned char *data; + unsigned int tcplen, newlen, newtcplen; + char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")]; + + MUST_BE_LOCKED(&ip_ftp_lock); + sprintf(buffer, "%u,%u,%u,%u,%u,%u", + IP_PARTS(newip), port>>8, port&0xFF); + + tcplen = (*pskb)->len - iph->ihl * 4; + newtcplen = tcplen - matchlen + strlen(buffer); + newlen = iph->ihl*4 + newtcplen; + + /* So there I am, in the middle of my `netfilter-is-wonderful' + talk in Sydney, and someone asks `What happens if you try + to enlarge a 64k packet here?'. I think I said something + eloquent like `fuck'. */ + if (newlen > 65535) { + if (net_ratelimit()) + printk("nat_ftp cheat: %u.%u.%u.%u->%u.%u.%u.%u %u\n", + NIPQUAD((*pskb)->nh.iph->saddr), + NIPQUAD((*pskb)->nh.iph->daddr), + (*pskb)->nh.iph->protocol); + return NF_DROP; + } + + if (newlen > (*pskb)->len + skb_tailroom(*pskb)) { + struct sk_buff *newskb; + newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), newlen, + GFP_ATOMIC); + if (!newskb) { + DEBUGP("ftp: oom\n"); + return 0; + } else { + kfree_skb(*pskb); + *pskb = newskb; + iph = (*pskb)->nh.iph; + } + } + + tcph = (void *)iph + iph->ihl*4; + data = (void *)tcph + tcph->doff*4; + + DEBUGP("Mapping `%.*s' [%u %u %u] to new `%s' [%u]\n", + (int)matchlen, data+matchoff, + data[matchoff], data[matchoff+1], + matchlen, buffer, strlen(buffer)); + + /* SYN adjust. If it's uninitialized, or this is after last + correction, record it: we don't handle more than one + adjustment in the window, but do deal with common case of a + retransmit. */ + if (this_way->syn_offset_before == this_way->syn_offset_after + || before(this_way->syn_correction_pos, ntohl(tcph->seq))) { + this_way->syn_correction_pos = ntohl(tcph->seq); + this_way->syn_offset_before = this_way->syn_offset_after; + this_way->syn_offset_after = (int32_t) + this_way->syn_offset_before + newlen - (*pskb)->len; + } + + /* Move post-replacement */ + memmove(data + matchoff + strlen(buffer), + data + matchoff + matchlen, + (*pskb)->tail - (data + matchoff + matchlen)); + memcpy(data + matchoff, buffer, strlen(buffer)); + + /* Resize packet. */ + if (newlen > (*pskb)->len) { + DEBUGP("ip_nat_ftp: Extending packet by %u to %u bytes\n", + newlen - (*pskb)->len, newlen); + skb_put(*pskb, newlen - (*pskb)->len); + } else { + DEBUGP("ip_nat_ftp: Shrinking packet from %u to %u bytes\n", + (*pskb)->len, newlen); + skb_trim(*pskb, newlen); + } + + /* Fix checksums */ + iph->tot_len = htons(newlen); + (*pskb)->csum = csum_partial((char *)tcph + tcph->doff*4, + newtcplen - tcph->doff*4, 0); + tcph->check = 0; + tcph->check = tcp_v4_check(tcph, newtcplen, iph->saddr, iph->daddr, + csum_partial((char *)tcph, tcph->doff*4, + (*pskb)->csum)); + ip_send_check(iph); + return 1; +} + +/* Grrr... SACK. Fuck me even harder. Don't want to fix it on the + fly, so blow it away. */ +static void +delete_sack(struct sk_buff *skb, struct tcphdr *tcph) +{ + unsigned int i; + u_int8_t *opt = (u_int8_t *)tcph; + + DEBUGP("Seeking SACKPERM in SYN packet (doff = %u).\n", + tcph->doff * 4); + for (i = sizeof(struct tcphdr); i < tcph->doff * 4;) { + DEBUGP("%u ", opt[i]); + switch (opt[i]) { + case TCPOPT_NOP: + case TCPOPT_EOL: + i++; + break; + + case TCPOPT_SACK_PERM: + goto found_opt; + + default: + /* Worst that can happen: it will take us over. */ + i += opt[i+1] ?: 1; + } + } + DEBUGP("\n"); + return; + + found_opt: + DEBUGP("\n"); + DEBUGP("Found SACKPERM at offset %u.\n", i); + + /* Must be within TCP header, and valid SACK perm. */ + if (i + opt[i+1] <= tcph->doff*4 && opt[i+1] == 2) { + /* Replace with NOPs. */ + tcph->check + = ip_nat_cheat_check(*((u_int16_t *)(opt + i))^0xFFFF, + 0, tcph->check); + opt[i] = opt[i+1] = 0; + } + else DEBUGP("Something wrong with SACK_PERM.\n"); +} + +static int ftp_data_fixup(const struct ip_ct_ftp *ct_ftp_info, + struct ip_conntrack *ct, + struct ip_nat_ftp_info *ftp, + unsigned int datalen, + struct sk_buff **pskb) +{ + u_int32_t newip; + struct ip_conntrack_tuple t; + struct iphdr *iph = (*pskb)->nh.iph; + struct tcphdr *tcph = (void *)iph + iph->ihl*4; + + MUST_BE_LOCKED(&ip_ftp_lock); + DEBUGP("FTP_NAT: seq %u + %u in %u + %u\n", + ct_ftp_info->seq, ct_ftp_info->len, + ntohl(tcph->seq), datalen); + + /* Change address inside packet to match way we're mapping + this connection. */ + if (ct_ftp_info->ftptype == IP_CT_FTP_PASV) { + /* PASV response: must be where client thinks server + is */ + newip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + } else { + /* PORT command: must be where server thinks client is */ + newip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + } + + if (!mangle_packet(pskb, newip, ct_ftp_info->port, + ct_ftp_info->seq - ntohl(tcph->seq), + ct_ftp_info->len, + &ftp[ct_ftp_info->ftptype], + &ftp[!ct_ftp_info->ftptype])) + return 0; + + /* Alter conntrack's expectations. */ + + /* We can read expect here without conntrack lock, since it's + only set in ip_conntrack_ftp, with ip_ftp_lock held + writable */ + t = ct->expected.tuple; + t.dst.ip = newip; + ip_conntrack_expect_related(ct, &t); + + return 1; +} + +static unsigned int help(struct ip_conntrack *ct, + struct ip_nat_info *info, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb) +{ + struct iphdr *iph = (*pskb)->nh.iph; + struct tcphdr *tcph = (void *)iph + iph->ihl*4; + u_int32_t newseq, newack; + unsigned int datalen; + int dir; + int score; + struct ip_ct_ftp *ct_ftp_info + = &ct->help.ct_ftp_info; + struct ip_nat_ftp_info *ftp + = &ct->nat.help.ftp_info[0]; + + /* Delete SACK_OK on initial TCP SYNs. */ + if (tcph->syn && !tcph->ack) + delete_sack(*pskb, tcph); + + /* Only mangle things once: original direction in POST_ROUTING + and reply direction on PRE_ROUTING. */ + dir = CTINFO2DIR(ctinfo); + if (!((hooknum == NF_IP_POST_ROUTING && dir == IP_CT_DIR_ORIGINAL) + || (hooknum == NF_IP_PRE_ROUTING && dir == IP_CT_DIR_REPLY))) { + DEBUGP("nat_ftp: Not touching dir %s at hook %s\n", + dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY", + hooknum == NF_IP_POST_ROUTING ? "POSTROUTING" + : hooknum == NF_IP_PRE_ROUTING ? "PREROUTING" + : hooknum == NF_IP_LOCAL_OUT ? "OUTPUT" : "???"); + return NF_ACCEPT; + } + + datalen = (*pskb)->len - iph->ihl * 4 - tcph->doff * 4; + score = 0; + LOCK_BH(&ip_ftp_lock); + if (ct_ftp_info->len) { + /* If it's in the right range... */ + score += between(ct_ftp_info->seq, ntohl(tcph->seq), + ntohl(tcph->seq) + datalen); + score += between(ct_ftp_info->seq + ct_ftp_info->len, + ntohl(tcph->seq), + ntohl(tcph->seq) + datalen); + if (score == 1) { + /* Half a match? This means a partial retransmisison. + It's a cracker being funky. */ + if (net_ratelimit()) { + printk("FTP_NAT: partial packet %u/%u in %u/%u\n", + ct_ftp_info->seq, ct_ftp_info->len, + ntohl(tcph->seq), + ntohl(tcph->seq) + datalen); + } + UNLOCK_BH(&ip_ftp_lock); + return NF_DROP; + } else if (score == 2) { + if (!ftp_data_fixup(ct_ftp_info, ct, ftp, datalen, + pskb)) { + UNLOCK_BH(&ip_ftp_lock); + return NF_DROP; + } + + /* skb may have been reallocated */ + iph = (*pskb)->nh.iph; + tcph = (void *)iph + iph->ihl*4; + } + } + + /* Sequence adjust */ + if (after(ntohl(tcph->seq), ftp[dir].syn_correction_pos)) + newseq = ntohl(tcph->seq) + ftp[dir].syn_offset_after; + else + newseq = ntohl(tcph->seq) + ftp[dir].syn_offset_before; + newseq = htonl(newseq); + + /* Ack adjust */ + if (after(ntohl(tcph->ack_seq), ftp[!dir].syn_correction_pos)) + newack = ntohl(tcph->ack_seq) - ftp[!dir].syn_offset_after; + else + newack = ntohl(tcph->ack_seq) - ftp[!dir].syn_offset_before; + newack = htonl(newack); + UNLOCK_BH(&ip_ftp_lock); + + tcph->check = ip_nat_cheat_check(~tcph->seq, newseq, + ip_nat_cheat_check(~tcph->ack_seq, + newack, + tcph->check)); + tcph->seq = newseq; + tcph->ack_seq = newack; + + return NF_ACCEPT; +} + +static struct ip_nat_helper ftp += { { NULL, NULL }, IPPROTO_TCP, __constant_htons(21), help, "ftp" }; +static struct ip_nat_expect ftp_expect += { { NULL, NULL }, ftp_nat_expected }; + +extern struct module *ip_conntrack_ftp; + +static int __init init(void) +{ + int ret; + + ret = ip_nat_expect_register(&ftp_expect); + if (ret == 0) { + ret = ip_nat_helper_register(&ftp); + + if (ret == 0) + __MOD_INC_USE_COUNT(ip_conntrack_ftp); + else + ip_nat_expect_unregister(&ftp_expect); + } + return ret; +} + +static void __exit fini(void) +{ + __MOD_DEC_USE_COUNT(ip_conntrack_ftp); + ip_nat_helper_unregister(&ftp); + ip_nat_expect_unregister(&ftp_expect); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c new file mode 100644 index 000000000000..9bc7427ce53f --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int +icmp_in_range(const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype, + const union ip_conntrack_manip_proto *min, + const union ip_conntrack_manip_proto *max) +{ + return (tuple->src.u.icmp.id >= min->icmp.id + && tuple->src.u.icmp.id <= max->icmp.id); +} + +static int +icmp_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_nat_range *range, + enum ip_nat_manip_type maniptype, + const struct ip_conntrack *conntrack) +{ + static u_int16_t id = 0; + unsigned int range_size + = (unsigned int)range->max.icmp.id - range->min.icmp.id + 1; + unsigned int i; + + /* If no range specified... */ + if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) + range_size = 0xFFFF; + + for (i = 0; i < range_size; i++, id++) { + tuple->src.u.icmp.id = range->min.icmp.id + (id % range_size); + if (!ip_nat_used_tuple(tuple, conntrack)) + return 1; + } + return 0; +} + +static void +icmp_manip_pkt(struct iphdr *iph, size_t len, + const struct ip_conntrack_manip *manip, + enum ip_nat_manip_type maniptype) +{ + struct icmphdr *hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl); + + hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF, + manip->u.icmp.id, + hdr->checksum); + hdr->un.echo.id = manip->u.icmp.id; +} + +static unsigned int +icmp_print(char *buffer, + const struct ip_conntrack_tuple *match, + const struct ip_conntrack_tuple *mask) +{ + unsigned int len = 0; + + if (mask->src.u.icmp.id) + len += sprintf(buffer + len, "id=%u ", + ntohs(match->src.u.icmp.id)); + + if (mask->dst.u.icmp.type) + len += sprintf(buffer + len, "type=%u ", + ntohs(match->dst.u.icmp.type)); + + if (mask->dst.u.icmp.code) + len += sprintf(buffer + len, "code=%u ", + ntohs(match->dst.u.icmp.code)); + + return len; +} + +static unsigned int +icmp_print_range(char *buffer, const struct ip_nat_range *range) +{ + if (range->min.icmp.id != 0 || range->max.icmp.id != 0xFFFF) + return sprintf(buffer, "id %u-%u ", + ntohs(range->min.icmp.id), + ntohs(range->max.icmp.id)); + else return 0; +} + +struct ip_nat_protocol ip_nat_protocol_icmp += { { NULL, NULL }, "ICMP", IPPROTO_ICMP, + icmp_manip_pkt, + icmp_in_range, + icmp_unique_tuple, + icmp_print, + icmp_print_range +}; diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c new file mode 100644 index 000000000000..7ff6ccb50b37 --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -0,0 +1,143 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int +tcp_in_range(const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype, + const union ip_conntrack_manip_proto *min, + const union ip_conntrack_manip_proto *max) +{ + u_int16_t port; + + if (maniptype == IP_NAT_MANIP_SRC) + port = tuple->src.u.tcp.port; + else + port = tuple->dst.u.tcp.port; + + return ntohs(port) >= ntohs(min->tcp.port) + && ntohs(port) <= ntohs(max->tcp.port); +} + +static int +tcp_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_nat_range *range, + enum ip_nat_manip_type maniptype, + const struct ip_conntrack *conntrack) +{ + static u_int16_t port = 0, *portptr; + unsigned int range_size, min, i; + + if (maniptype == IP_NAT_MANIP_SRC) + portptr = &tuple->src.u.tcp.port; + else + portptr = &tuple->dst.u.tcp.port; + + /* If no range specified... */ + if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { + /* If it's dst rewrite, can't change port */ + if (maniptype == IP_NAT_MANIP_DST) + return 0; + + /* Map privileged onto privileged. */ + if (ntohs(*portptr) < 1024) { + /* Loose convention: >> 512 is credential passing */ + if (ntohs(*portptr)<512) { + min = 1; + range_size = 511 - min + 1; + } else { + min = 600; + range_size = 1023 - min + 1; + } + } else { + min = 1024; + range_size = 65535 - 1024 + 1; + } + } else { + min = ntohs(range->min.tcp.port); + range_size = ntohs(range->max.tcp.port) - min + 1; + } + + for (i = 0; i < range_size; i++, port++) { + *portptr = htons(min + port % range_size); + if (!ip_nat_used_tuple(tuple, conntrack)) { + return 1; + } + } + return 0; +} + +static void +tcp_manip_pkt(struct iphdr *iph, size_t len, + const struct ip_conntrack_manip *manip, + enum ip_nat_manip_type maniptype) +{ + struct tcphdr *hdr = (struct tcphdr *)((u_int32_t *)iph + iph->ihl); + u_int32_t oldip; + u_int16_t *portptr; + + if (maniptype == IP_NAT_MANIP_SRC) { + /* Get rid of src ip and src pt */ + oldip = iph->saddr; + portptr = &hdr->source; + } else { + /* Get rid of dst ip and dst pt */ + oldip = iph->daddr; + portptr = &hdr->dest; + } + hdr->check = ip_nat_cheat_check(~oldip, manip->ip, + ip_nat_cheat_check(*portptr ^ 0xFFFF, + manip->u.tcp.port, + hdr->check)); + *portptr = manip->u.tcp.port; +} + +static unsigned int +tcp_print(char *buffer, + const struct ip_conntrack_tuple *match, + const struct ip_conntrack_tuple *mask) +{ + unsigned int len = 0; + + if (mask->src.u.tcp.port) + len += sprintf(buffer + len, "srcpt=%u ", + ntohs(match->src.u.tcp.port)); + + + if (mask->dst.u.tcp.port) + len += sprintf(buffer + len, "dstpt=%u ", + ntohs(match->dst.u.tcp.port)); + + return len; +} + +static unsigned int +tcp_print_range(char *buffer, const struct ip_nat_range *range) +{ + if (range->min.tcp.port != 0 || range->max.tcp.port != 0xFFFF) { + if (range->min.tcp.port == range->max.tcp.port) + return sprintf(buffer, "port %u ", + ntohs(range->min.tcp.port)); + else + return sprintf(buffer, "ports %u-%u ", + ntohs(range->min.tcp.port), + ntohs(range->max.tcp.port)); + } + else return 0; +} + +struct ip_nat_protocol ip_nat_protocol_tcp += { { NULL, NULL }, "TCP", IPPROTO_TCP, + tcp_manip_pkt, + tcp_in_range, + tcp_unique_tuple, + tcp_print, + tcp_print_range +}; diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c new file mode 100644 index 000000000000..e0dc25910a2c --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int +udp_in_range(const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype, + const union ip_conntrack_manip_proto *min, + const union ip_conntrack_manip_proto *max) +{ + u_int16_t port; + + if (maniptype == IP_NAT_MANIP_SRC) + port = tuple->src.u.udp.port; + else + port = tuple->dst.u.udp.port; + + return ntohs(port) >= ntohs(min->udp.port) + && ntohs(port) <= ntohs(max->udp.port); +} + +static int +udp_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_nat_range *range, + enum ip_nat_manip_type maniptype, + const struct ip_conntrack *conntrack) +{ + static u_int16_t port = 0, *portptr; + unsigned int range_size, min, i; + + if (maniptype == IP_NAT_MANIP_SRC) + portptr = &tuple->src.u.udp.port; + else + portptr = &tuple->dst.u.udp.port; + + /* If no range specified... */ + if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { + /* If it's dst rewrite, can't change port */ + if (maniptype == IP_NAT_MANIP_DST) + return 0; + + if (ntohs(*portptr) < 1024) { + /* Loose convention: >> 512 is credential passing */ + if (ntohs(*portptr)<512) { + min = 1; + range_size = 511 - min + 1; + } else { + min = 600; + range_size = 1023 - min + 1; + } + } else { + min = 1024; + range_size = 65535 - 1024 + 1; + } + } else { + min = ntohs(range->min.udp.port); + range_size = ntohs(range->max.udp.port) - min + 1; + } + + for (i = 0; i < range_size; i++, port++) { + *portptr = htons(min + port % range_size); + if (!ip_nat_used_tuple(tuple, conntrack)) + return 1; + } + return 0; +} + +static void +udp_manip_pkt(struct iphdr *iph, size_t len, + const struct ip_conntrack_manip *manip, + enum ip_nat_manip_type maniptype) +{ + struct udphdr *hdr = (struct udphdr *)((u_int32_t *)iph + iph->ihl); + u_int32_t oldip; + u_int16_t *portptr; + + if (maniptype == IP_NAT_MANIP_SRC) { + /* Get rid of src ip and src pt */ + oldip = iph->saddr; + portptr = &hdr->source; + } else { + /* Get rid of dst ip and dst pt */ + oldip = iph->daddr; + portptr = &hdr->dest; + } + hdr->check = ip_nat_cheat_check(~oldip, manip->ip, + ip_nat_cheat_check(*portptr ^ 0xFFFF, + manip->u.udp.port, + hdr->check)); + *portptr = manip->u.udp.port; +} + +static unsigned int +udp_print(char *buffer, + const struct ip_conntrack_tuple *match, + const struct ip_conntrack_tuple *mask) +{ + unsigned int len = 0; + + if (mask->src.u.udp.port) + len += sprintf(buffer + len, "srcpt=%u ", + ntohs(match->src.u.udp.port)); + + + if (mask->dst.u.udp.port) + len += sprintf(buffer + len, "dstpt=%u ", + ntohs(match->dst.u.udp.port)); + + return len; +} + +static unsigned int +udp_print_range(char *buffer, const struct ip_nat_range *range) +{ + if (range->min.udp.port != 0 || range->max.udp.port != 0xFFFF) { + if (range->min.udp.port == range->max.udp.port) + return sprintf(buffer, "port %u ", + ntohs(range->min.udp.port)); + else + return sprintf(buffer, "ports %u-%u ", + ntohs(range->min.udp.port), + ntohs(range->max.udp.port)); + } + else return 0; +} + +struct ip_nat_protocol ip_nat_protocol_udp += { { NULL, NULL }, "UDP", IPPROTO_UDP, + udp_manip_pkt, + udp_in_range, + udp_unique_tuple, + udp_print, + udp_print_range +}; diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c new file mode 100644 index 000000000000..0e39070368f9 --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c @@ -0,0 +1,61 @@ +/* The "unknown" protocol. This is what is used for protocols we + * don't understand. It's returned by find_proto(). + */ + +#include +#include +#include +#include + +#include +#include +#include + +static int unknown_in_range(const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type manip_type, + const union ip_conntrack_manip_proto *min, + const union ip_conntrack_manip_proto *max) +{ + return 1; +} + +static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_nat_range *range, + enum ip_nat_manip_type maniptype, + const struct ip_conntrack *conntrack) +{ + /* Sorry: we can't help you; if it's not unique, we can't frob + anything. */ + return 0; +} + +static void +unknown_manip_pkt(struct iphdr *iph, size_t len, + const struct ip_conntrack_manip *manip, + enum ip_nat_manip_type maniptype) +{ + return; +} + +static unsigned int +unknown_print(char *buffer, + const struct ip_conntrack_tuple *match, + const struct ip_conntrack_tuple *mask) +{ + return 0; +} + +static unsigned int +unknown_print_range(char *buffer, const struct ip_nat_range *range) +{ + return 0; +} + +struct ip_nat_protocol unknown_nat_protocol = { + { NULL, NULL }, "unknown", 0, + unknown_manip_pkt, + unknown_in_range, + unknown_unique_tuple, + unknown_print, + unknown_print_range +}; diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c new file mode 100644 index 000000000000..9246f23c0f9a --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -0,0 +1,329 @@ +/* Everything about the rules for NAT. */ +#define __NO_VERSION__ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) + +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +#define NAT_VALID_HOOKS ((1<rangesize - 1))))) { + DEBUGP("SNAT: Target size %u wrong for %u ranges\n", + targinfosize, mr->rangesize); + return 0; + } + + if (hook_mask & ~(1 << NF_IP_POST_ROUTING)) { + DEBUGP("SNAT: hook mask 0x%x bad\n", hook_mask); + return 0; + } + return 1; +} + +static int ipt_dnat_checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + struct ip_nat_multi_range *mr = targinfo; + + /* Must be a valid range */ + if (targinfosize < sizeof(struct ip_nat_multi_range)) { + DEBUGP("DNAT: Target size %u too small\n", targinfosize); + return 0; + } + + if (targinfosize != IPT_ALIGN((sizeof(struct ip_nat_multi_range) + + (sizeof(struct ip_nat_range) + * (mr->rangesize - 1))))) { + DEBUGP("DNAT: Target size %u wrong for %u ranges\n", + targinfosize, mr->rangesize); + return 0; + } + + if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))) { + DEBUGP("DNAT: hook mask 0x%x bad\n", hook_mask); + return 0; + } + return 1; +} + +static inline unsigned int +alloc_null_binding(struct ip_conntrack *conntrack, + struct ip_nat_info *info, + unsigned int hooknum) +{ + /* Force range to this IP; let proto decide mapping for + per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + Use reply in case it's already been mangled (eg local packet). + */ + u_int32_t ip + = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC + ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip + : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); + struct ip_nat_multi_range mr + = { 1, { { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } } } }; + + DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n", conntrack, + IP_PARTS(ip)); + return ip_nat_setup_info(conntrack, &mr, hooknum); +} + +static inline int call_expect(const struct ip_nat_expect *i, + struct sk_buff **pskb, + unsigned int hooknum, + struct ip_conntrack *ct, + struct ip_nat_info *info, + struct ip_conntrack *master, + struct ip_nat_info *masterinfo, + unsigned int *verdict) +{ + return i->expect(pskb, hooknum, ct, info, master, masterinfo, + verdict); +} + +int ip_nat_rule_find(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + struct ip_conntrack *ct, + struct ip_nat_info *info) +{ + int ret; + + /* Master won't vanish while this ctrack still alive */ + if (ct->master.master) { + struct ip_conntrack *master; + + master = (struct ip_conntrack *)ct->master.master; + if (LIST_FIND(&nat_expect_list, + call_expect, + struct ip_nat_expect *, + pskb, hooknum, ct, info, + master, &master->nat.info, &ret)) + return ret; + } + ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL); + if (ret == NF_ACCEPT) { + if (!(info->initialized & (1 << HOOK2MANIP(hooknum)))) + /* NUL mapping */ + ret = alloc_null_binding(ct, info, hooknum); + } + return ret; +} + +int ip_nat_expect_register(struct ip_nat_expect *expect) +{ + WRITE_LOCK(&ip_nat_lock); + list_prepend(&nat_expect_list, expect); + WRITE_UNLOCK(&ip_nat_lock); + + return 0; +} + +void ip_nat_expect_unregister(struct ip_nat_expect *expect) +{ + WRITE_LOCK(&ip_nat_lock); + LIST_DELETE(&nat_expect_list, expect); + WRITE_UNLOCK(&ip_nat_lock); +} + +static struct ipt_target ipt_snat_reg += { { NULL, NULL }, "SNAT", ipt_snat_target, ipt_snat_checkentry, NULL }; +static struct ipt_target ipt_dnat_reg += { { NULL, NULL }, "DNAT", ipt_dnat_target, ipt_dnat_checkentry, NULL }; + +int __init ip_nat_rule_init(void) +{ + int ret; + + ret = ipt_register_table(&nat_table); + if (ret != 0) + return ret; + ret = ipt_register_target(&ipt_snat_reg); + if (ret != 0) + goto unregister_table; + + ret = ipt_register_target(&ipt_dnat_reg); + if (ret != 0) + goto unregister_snat; + + return ret; + + unregister_snat: + ipt_unregister_target(&ipt_snat_reg); + unregister_table: + ipt_unregister_table(&nat_table); + + return ret; +} + +void ip_nat_rule_cleanup(void) +{ + ipt_unregister_target(&ipt_dnat_reg); + ipt_unregister_target(&ipt_snat_reg); + ipt_unregister_table(&nat_table); +} diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c new file mode 100644 index 000000000000..d6e8258add2e --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -0,0 +1,274 @@ +/* This file contains all the functions required for the standalone + ip_nat module. + + These are not required by the compatibility layer. +*/ + +/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General + Public Licence. */ + +#ifdef MODULE +#define EXPORT_SYMTAB +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) + +#include +#include +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +#define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING" \ + : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \ + : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT" \ + : "*ERROR*"))) + +static unsigned int +ip_nat_fn(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + struct ip_nat_info *info; + /* maniptype == SRC for postrouting. */ + enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum); + + /* We never see fragments: conntrack defrags on pre-routing + and local-out, and ip_nat_out protects post-routing. */ + IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off + & __constant_htons(IP_MF|IP_OFFSET))); + + /* FIXME: One day, fill in properly. --RR */ + (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED; + + /* If we had a hardware checksum before, it's now invalid */ + if ((*pskb)->pkt_type != PACKET_LOOPBACK) + (*pskb)->ip_summed = CHECKSUM_NONE; + + ct = ip_conntrack_get(*pskb, &ctinfo); + /* Can't track? Maybe out of memory: this would make NAT + unreliable. */ + if (!ct) + return NF_DROP; + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED+IP_CT_IS_REPLY: + if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { + icmp_reply_translation(*pskb, ct, hooknum, + CTINFO2DIR(ctinfo)); + return NF_ACCEPT; + } + /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + case IP_CT_NEW: + info = &ct->nat.info; + + WRITE_LOCK(&ip_nat_lock); + /* Seen it before? This can happen for loopback, retrans, + or local packets.. */ + if (!(info->initialized & (1 << maniptype))) { + int in_hashes = info->initialized; + unsigned int ret; + + ret = ip_nat_rule_find(pskb, hooknum, in, out, + ct, info); + if (ret != NF_ACCEPT) { + WRITE_UNLOCK(&ip_nat_lock); + return ret; + } + + if (in_hashes) { + IP_NF_ASSERT(info->bysource.conntrack); + replace_in_hashes(ct, info); + } else { + place_in_hashes(ct, info); + } + } else + DEBUGP("Already setup manip %s for ct %p\n", + maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", + ct); + WRITE_UNLOCK(&ip_nat_lock); + break; + + default: + /* ESTABLISHED */ + IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED + || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); + info = &ct->nat.info; + } + + IP_NF_ASSERT(info); + return do_bindings(ct, ctinfo, info, hooknum, pskb); +} + +static unsigned int +ip_nat_out(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* We can hit fragment here; forwarded packets get + defragmented by connection tracking coming in, then + fragmented (grr) by the forward code. + + In future: If we have nfct != NULL, AND we have NAT + initialized, AND there is no helper, then we can do full + NAPT on the head, and IP-address-only NAT on the rest. + + I'm starting to have nightmares about fragments. */ + + if ((*pskb)->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) { + *pskb = ip_ct_gather_frags(*pskb); + + if (!*pskb) + return NF_STOLEN; + } + + return ip_nat_fn(hooknum, pskb, in, out, okfn); +} + +/* We must be after connection tracking and before packet filtering. */ + +/* Before packet filtering, change destination */ +static struct nf_hook_ops ip_nat_in_ops += { { NULL, NULL }, ip_nat_fn, PF_INET, NF_IP_PRE_ROUTING, NF_IP_PRI_NAT_DST }; +/* After packet filtering, change source */ +static struct nf_hook_ops ip_nat_out_ops += { { NULL, NULL }, ip_nat_out, PF_INET, NF_IP_POST_ROUTING, NF_IP_PRI_NAT_SRC}; +/* Before packet filtering, change destination */ +static struct nf_hook_ops ip_nat_local_out_ops += { { NULL, NULL }, ip_nat_fn, PF_INET, NF_IP_LOCAL_OUT, NF_IP_PRI_NAT_DST }; + +/* Protocol registration. */ +int ip_nat_protocol_register(struct ip_nat_protocol *proto) +{ + int ret = 0; + struct list_head *i; + + WRITE_LOCK(&ip_nat_lock); + for (i = protos.next; i != &protos; i = i->next) { + if (((struct ip_nat_protocol *)i)->protonum + == proto->protonum) { + ret = -EBUSY; + goto out; + } + } + + list_prepend(&protos, proto); + MOD_INC_USE_COUNT; + + out: + WRITE_UNLOCK(&ip_nat_lock); + return ret; +} + +/* Noone stores the protocol anywhere; simply delete it. */ +void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) +{ + WRITE_LOCK(&ip_nat_lock); + LIST_DELETE(&protos, proto); + WRITE_UNLOCK(&ip_nat_lock); + + /* Someone could be still looking at the proto in a bh. */ + br_write_lock_bh(BR_NETPROTO_LOCK); + br_write_unlock_bh(BR_NETPROTO_LOCK); + + MOD_DEC_USE_COUNT; +} + +static int init_or_cleanup(int init) +{ + int ret = 0; + + if (!init) goto cleanup; + + ret = ip_nat_rule_init(); + if (ret < 0) { + printk("ip_nat_init: can't setup rules.\n"); + goto cleanup_nothing; + } + ret = ip_nat_init(); + if (ret < 0) { + printk("ip_nat_init: can't setup rules.\n"); + goto cleanup_rule_init; + } + ret = nf_register_hook(&ip_nat_in_ops); + if (ret < 0) { + printk("ip_nat_init: can't register in hook.\n"); + goto cleanup_nat; + } + ret = nf_register_hook(&ip_nat_out_ops); + if (ret < 0) { + printk("ip_nat_init: can't register out hook.\n"); + goto cleanup_inops; + } + ret = nf_register_hook(&ip_nat_local_out_ops); + if (ret < 0) { + printk("ip_nat_init: can't register local out hook.\n"); + goto cleanup_outops; + } + __MOD_INC_USE_COUNT(ip_conntrack_module); + return ret; + + cleanup: + __MOD_DEC_USE_COUNT(ip_conntrack_module); + nf_unregister_hook(&ip_nat_local_out_ops); + cleanup_outops: + nf_unregister_hook(&ip_nat_out_ops); + cleanup_inops: + nf_unregister_hook(&ip_nat_in_ops); + cleanup_nat: + ip_nat_cleanup(); + cleanup_rule_init: + ip_nat_rule_cleanup(); + cleanup_nothing: + MUST_BE_READ_WRITE_UNLOCKED(&ip_nat_lock); + return ret; +} + +static int __init init(void) +{ + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +module_init(init); +module_exit(fini); + +EXPORT_SYMBOL(ip_nat_setup_info); +EXPORT_SYMBOL(ip_nat_helper_register); +EXPORT_SYMBOL(ip_nat_helper_unregister); +EXPORT_SYMBOL(ip_nat_expect_register); +EXPORT_SYMBOL(ip_nat_expect_unregister); +EXPORT_SYMBOL(ip_nat_cheat_check); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c new file mode 100644 index 000000000000..d5ca01aa6f30 --- /dev/null +++ b/net/ipv4/netfilter/ip_queue.c @@ -0,0 +1,752 @@ +/* + * This is a module which is used for queueing IPv4 packets and + * communicating with userspace via netlink. + * + * (C) 2000 James Morris + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +EXPORT_NO_SYMBOLS; + +#define IPQ_THR_NAME "kipq" +#define IPQ_NAME "ip_queue" +#define IPQ_QMAX_DEFAULT 1024 + +#define IPQ_PROC_FS_NAME "ip_queue" + +#define NET_IPQ_QMAX 2088 +#define NET_IPQ_QMAX_NAME "ip_queue_maxlen" + +typedef struct ipq_queue_element { + struct list_head list; /* Links element into queue */ + unsigned char state; /* State of this element */ + int verdict; /* Current verdict */ + struct nf_info *info; /* Extra info from netfilter */ + struct sk_buff *skb; /* Packet inside */ +} ipq_queue_element_t; + +typedef int (*ipq_send_cb_t)(ipq_queue_element_t *e); + +typedef struct ipq_peer { + pid_t pid; /* PID of userland peer */ + unsigned char died; /* We think the peer died */ + unsigned char copy_mode; /* Copy packet as well as metadata? */ + size_t copy_range; /* Range past metadata to copy */ + ipq_send_cb_t send; /* Callback for sending data to peer */ +} ipq_peer_t; + +typedef struct ipq_thread { + pid_t pid; /* PID of kernel thread */ + unsigned char terminate; /* Termination flag */ + unsigned char running; /* Running flag */ + wait_queue_head_t wq; /* I/O wait queue */ + void (*process)(void *data); /* Queue processing function */ +} ipq_thread_t; + +typedef struct ipq_queue { + int len; /* Current queue len */ + int *maxlen; /* Maximum queue len, via sysctl */ + unsigned char state; /* Current queue state */ + struct list_head list; /* Head of packet queue */ + spinlock_t lock; /* Queue spinlock */ + ipq_peer_t peer; /* Userland peer */ + ipq_thread_t thread; /* Thread context */ +} ipq_queue_t; + + +/**************************************************************************** +* +* Kernel thread +* +****************************************************************************/ + +static void ipq_thread_init(char *thread_name) +{ + lock_kernel(); + exit_files(current); + daemonize(); + strcpy(current->comm, thread_name); + unlock_kernel(); + spin_lock_irq(¤t->sigmask_lock); + flush_signals(current); + sigfillset(¤t->blocked); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); +} + +static int ipq_thread_start(void *data) +{ + ipq_queue_t *q = (ipq_queue_t *)data; + + q->thread.running = 1; + ipq_thread_init(IPQ_THR_NAME); + q->thread.pid = current->pid; + while (!q->thread.terminate) { + interruptible_sleep_on(&q->thread.wq); + q->thread.process(q); + } + q->thread.running = 0; + return 0; +} + +static void ipq_thread_stop(ipq_queue_t *q) +{ + if (!(q->thread.pid || q->thread.running)) + return; + q->state = IPQ_QS_FLUSH; + q->thread.terminate = 1; + wake_up_interruptible(&q->thread.wq); + current->state = TASK_INTERRUPTIBLE; + while (q->thread.running) { + schedule_timeout(HZ/10); + current->state = TASK_RUNNING; + } +} + +static int ipq_thread_create(ipq_queue_t *q) +{ + int status = kernel_thread(ipq_thread_start, q, 0); + return (status < 0) ? status : 0; +} + + +/**************************************************************************** + * + * Packet queue + * + ****************************************************************************/ + +/* Must be called under spinlock */ +static __inline__ void +ipq_dequeue(ipq_queue_t *q, + ipq_queue_element_t *e) +{ + list_del(&e->list); + nf_reinject(e->skb, e->info, e->verdict); + kfree(e); + q->len--; +} + +/* Must be called under spinlock */ +static __inline__ void +ipq_queue_drop(ipq_queue_t *q, + ipq_queue_element_t *e) +{ + e->verdict = NF_DROP; + ipq_dequeue(q, e); +} + +static int +ipq_notify_peer(ipq_queue_t *q, + ipq_queue_element_t *e) +{ + int status = q->peer.send(e); + + if (status >= 0) { + e->state = IPQ_PS_WAITING; + return status; + } + if (status == -ERESTARTSYS || status == -EAGAIN) + return 0; + printk(KERN_INFO "%s: error notifying peer %d, resetting " + "state and flushing queue\n", IPQ_NAME, q->peer.pid); + q->state = IPQ_QS_FLUSH; + q->peer.died = 1; + q->peer.pid = 0; + q->peer.copy_mode = IPQ_COPY_META; + q->peer.copy_range = 0; + return status; +} + +static void +ipq_queue_process(void *data) +{ + struct list_head *i; + ipq_queue_t *q = (ipq_queue_t *)data; + +restart: + if (q->state == IPQ_QS_HOLD) + return; + spin_lock_bh(&q->lock); + for (i = q->list.prev; i != &q->list; i = i->prev) { + ipq_queue_element_t *e = (ipq_queue_element_t *)i; + + if (q->state == IPQ_QS_FLUSH) { + QDEBUG("flushing packet %p\n", e); + ipq_queue_drop(q, e); + continue; + } + switch (e->state) { + case IPQ_PS_NEW: { + int status = ipq_notify_peer(q, e); + if (status < 0) { + spin_unlock_bh(&q->lock); + goto restart; + } + break; + } + case IPQ_PS_VERDICT: + ipq_dequeue(q, e); + break; + case IPQ_PS_WAITING: + break; + default: + printk(KERN_INFO "%s: dropping stuck packet %p " + "with ps=%d qs=%d\n", IPQ_NAME, + e, e->state, q->state); + ipq_queue_drop(q, e); + } + } + spin_unlock_bh(&q->lock); + if (q->state == IPQ_QS_FLUSH) + q->state = IPQ_QS_HOLD; +} + +static ipq_queue_t * +ipq_queue_create(nf_queue_outfn_t outfn, + ipq_send_cb_t send_cb, + int *errp, + int *sysctl_qmax) +{ + int status; + ipq_queue_t *q; + + *errp = 0; + q = kmalloc(sizeof(ipq_queue_t), GFP_KERNEL); + if (q == NULL) { + *errp = -ENOMEM; + return NULL; + } + q->thread.terminate = 0; + q->thread.running = 0; + q->thread.process = ipq_queue_process; + init_waitqueue_head(&q->thread.wq); + q->peer.pid = 0; + q->peer.died = 0; + q->peer.copy_mode = IPQ_COPY_META; + q->peer.copy_range = 0; + q->peer.send = send_cb; + q->len = 0; + q->maxlen = sysctl_qmax; + q->state = IPQ_QS_HOLD; + INIT_LIST_HEAD(&q->list); + spin_lock_init(&q->lock); + status = nf_register_queue_handler(PF_INET, outfn, q); + if (status < 0) { + *errp = -EBUSY; + kfree(q); + return NULL; + } + status = ipq_thread_create(q); + if (status < 0) { + nf_unregister_queue_handler(PF_INET); + *errp = status; + kfree(q); + return NULL; + } + return q; +} + +static int +ipq_enqueue(ipq_queue_t *q, + struct sk_buff *skb, + struct nf_info *info) +{ + ipq_queue_element_t *e = NULL; + + e = kmalloc(sizeof(*e), GFP_ATOMIC); + if (e == NULL) { + printk(KERN_ERR "%s: out of memory in %s\n", + IPQ_NAME, __FUNCTION__); + return -ENOMEM; + } + e->state = IPQ_PS_NEW; + e->verdict = NF_DROP; + e->info = info; + e->skb = skb; + spin_lock_bh(&q->lock); + if (q->len >= *q->maxlen) { + spin_unlock_bh(&q->lock); + printk(KERN_WARNING "%s: queue full at %d entries, " + "dropping packet.\n", IPQ_NAME, q->len); + kfree(e); + nf_reinject(skb, info, NF_DROP); + return 0; + } + list_add(&e->list, &q->list); + q->len++; + spin_unlock_bh(&q->lock); + wake_up_interruptible(&q->thread.wq); + return 0; +} + +/* FIXME: need to find a way to notify user during module unload */ +static void +ipq_queue_destroy(ipq_queue_t *q) +{ + ipq_thread_stop(q); + nf_unregister_queue_handler(PF_INET); + kfree(q); +} + +static int +ipq_queue_mangle_ipv4(unsigned char *buf, + ipq_verdict_msg_t *v, + ipq_queue_element_t *e) +{ + struct iphdr *user_iph = (struct iphdr *)buf; + + if (v->data_len < sizeof(*user_iph)) + return 0; + + if (e->skb->nh.iph->check != user_iph->check) { + int diff = v->data_len - e->skb->len; + + if (diff < 0) + skb_trim(e->skb, v->data_len); + else if (diff > 0) { + if (v->data_len > 0xFFFF) { + e->verdict = NF_DROP; + return -EINVAL; + } + if (diff > skb_tailroom(e->skb)) { + struct sk_buff *newskb; + + /* Ack, we waste a memcpy() of data here */ + newskb = skb_copy_expand(e->skb, + skb_headroom(e->skb), + diff, + GFP_ATOMIC); + if (newskb == NULL) { + printk(KERN_WARNING "%s: OOM in %s, " + "dropping packet\n", + IPQ_THR_NAME, __FUNCTION__); + e->verdict = NF_DROP; + return -ENOMEM; + } + kfree_skb(e->skb); + e->skb = newskb; + } + skb_put(e->skb, diff); + } + memcpy(e->skb->data, buf, v->data_len); + e->skb->nfcache |= NFC_ALTERED; + } + return 0; +} + +static int +ipq_queue_set_verdict(ipq_queue_t *q, + ipq_verdict_msg_t *v, + unsigned char *buf, + unsigned int len) +{ + struct list_head *i; + + if (v->value < 0 || v->value > NF_MAX_VERDICT) + return -EINVAL; + spin_lock_bh(&q->lock); + for (i = q->list.next; i != &q->list; i = i->next) { + ipq_queue_element_t *e = (ipq_queue_element_t *)i; + + if (v->id == (unsigned long )e) { + int status = 0; + e->state = IPQ_PS_VERDICT; + e->verdict = v->value; + + if (buf && v->data_len == len) + status = ipq_queue_mangle_ipv4(buf, v, e); + spin_unlock_bh(&q->lock); + return status; + } + } + spin_unlock_bh(&q->lock); + return -ENOENT; +} + +static int +ipq_receive_peer(ipq_queue_t *q, + ipq_peer_msg_t *m, + unsigned char type, + unsigned int len) +{ + if (q->state == IPQ_QS_FLUSH) + return -EBUSY; + + if (len < sizeof(ipq_peer_msg_t)) + return -EINVAL; + + switch (type) { + case IPQM_MODE: + switch (m->msg.mode.value) { + case IPQ_COPY_NONE: + q->peer.copy_mode = IPQ_COPY_NONE; + q->peer.copy_range = 0; + q->state = IPQ_QS_FLUSH; + break; + case IPQ_COPY_META: + if (q->state == IPQ_QS_FLUSH) + return -EAGAIN; + q->peer.copy_mode = IPQ_COPY_META; + q->peer.copy_range = 0; + q->state = IPQ_QS_COPY; + break; + case IPQ_COPY_PACKET: + if (q->state == IPQ_QS_FLUSH) + return -EAGAIN; + q->peer.copy_mode = IPQ_COPY_PACKET; + q->peer.copy_range = m->msg.mode.range; + q->state = IPQ_QS_COPY; + break; + default: + return -EINVAL; + } + break; + case IPQM_VERDICT: { + int status; + unsigned char *data = NULL; + + if (m->msg.verdict.value > NF_MAX_VERDICT) + return -EINVAL; + if (m->msg.verdict.data_len) + data = (unsigned char *)m + sizeof(*m); + status = ipq_queue_set_verdict(q, &m->msg.verdict, + data, len - sizeof(*m)); + if (status < 0) + return status; + break; + } + default: + return -EINVAL; + } + wake_up_interruptible(&q->thread.wq); + return 0; +} + + +/**************************************************************************** + * + * Netfilter interface + * + ****************************************************************************/ + +/* + * Packets arrive here from netfilter for queuing to userspace. + * All of them must be fed back via nf_reinject() or Alexey will kill Rusty. + */ +static int +receive_netfilter(struct sk_buff *skb, + struct nf_info *info, + void *data) +{ + ipq_queue_t *q = (ipq_queue_t *)data; + + if (q->state == IPQ_QS_FLUSH) + return -EBUSY; + return ipq_enqueue(q, skb, info); +} + +/**************************************************************************** + * + * Netlink interface. + * + ****************************************************************************/ + +static struct sk_buff * +netlink_build_message(ipq_queue_element_t *e, + int *errp); + +extern __inline__ void +receive_user_skb(struct sk_buff *skb); + +static int +netlink_send_peer(ipq_queue_element_t *e); + +static struct sock *nfnl = NULL; +ipq_queue_t *nlq = NULL; + +static int +netlink_send_peer(ipq_queue_element_t *e) +{ + int status = 0; + struct sk_buff *skb; + + if (!nlq->peer.pid) + return -EINVAL; + skb = netlink_build_message(e, &status); + if (skb == NULL) + return status; + return netlink_unicast(nfnl, skb, nlq->peer.pid, 0); +} + +static struct sk_buff * +netlink_build_message(ipq_queue_element_t *e, + int *errp) +{ + unsigned char *old_tail; + size_t size = 0; + size_t data_len = 0; + struct sk_buff *skb; + ipq_packet_msg_t *pm; + struct nlmsghdr *nlh; + + switch (nlq->peer.copy_mode) { + size_t copy_range; + + case IPQ_COPY_META: + size = NLMSG_SPACE(sizeof(*pm)); + data_len = 0; + break; + case IPQ_COPY_PACKET: + copy_range = nlq->peer.copy_range; + if (copy_range == 0 || copy_range > e->skb->len) + data_len = e->skb->len; + else + data_len = copy_range; + size = NLMSG_SPACE(sizeof(*pm) + data_len); + break; + case IPQ_COPY_NONE: + default: + *errp = -EINVAL; + return NULL; + } + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) + goto nlmsg_failure; + old_tail = skb->tail; + nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); + pm = NLMSG_DATA(nlh); + memset(pm, 0, sizeof(*pm)); + pm->packet_id = (unsigned long )e; + pm->data_len = data_len; + pm->timestamp_sec = e->skb->stamp.tv_sec; + pm->timestamp_usec = e->skb->stamp.tv_usec; + pm->hook = e->info->hook; + if (e->info->indev) strcpy(pm->indev_name, e->info->indev->name); + else pm->indev_name[0] = '\0'; + if (e->info->outdev) strcpy(pm->outdev_name, e->info->outdev->name); + else pm->outdev_name[0] = '\0'; + if (data_len) + memcpy(++pm, e->skb->data, data_len); + nlh->nlmsg_len = skb->tail - old_tail; + NETLINK_CB(skb).dst_groups = 0; + return skb; +nlmsg_failure: + if (skb) + kfree(skb); + *errp = 0; + printk(KERN_ERR "%s: error creating netlink message\n", IPQ_NAME); + return NULL; +} + +#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0); +/* + * FIXME: ping old peer if we detect a new peer then resend. + */ +extern __inline__ void +receive_user_skb(struct sk_buff *skb) +{ + int status, type; + struct nlmsghdr *nlh; + + nlh = (struct nlmsghdr *)skb->data; + if (nlh->nlmsg_len < sizeof(*nlh) + || skb->len < nlh->nlmsg_len + || nlh->nlmsg_pid <= 0 + || !(nlh->nlmsg_flags & NLM_F_REQUEST) + || nlh->nlmsg_flags & NLM_F_MULTI) + RCV_SKB_FAIL(-EINVAL); + if (nlh->nlmsg_flags & MSG_TRUNC) + RCV_SKB_FAIL(-ECOMM); + type = nlh->nlmsg_type; + if (type < NLMSG_NOOP || type >= IPQM_MAX) + RCV_SKB_FAIL(-EINVAL); + if (type <= IPQM_BASE) + return; + if(!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) + RCV_SKB_FAIL(-EPERM); + if (nlq->peer.pid && !nlq->peer.died + && (nlq->peer.pid != nlh->nlmsg_pid)) + printk(KERN_WARNING "%s: peer pid changed from %d to %d\n", + IPQ_NAME, nlq->peer.pid, nlh->nlmsg_pid); + nlq->peer.pid = nlh->nlmsg_pid; + nlq->peer.died = 0; + status = ipq_receive_peer(nlq, NLMSG_DATA(nlh), + type, skb->len - NLMSG_LENGTH(0)); + if (status < 0) + RCV_SKB_FAIL(status); + if (nlh->nlmsg_flags & NLM_F_ACK) + netlink_ack(skb, nlh, 0); + return; +} + +/* Note: we are only dealing with single part messages at the moment. */ +static void +receive_user_sk(struct sock *sk, + int len) +{ + do { + struct sk_buff *skb; + + if (rtnl_shlock_nowait()) + return; + while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) { + receive_user_skb(skb); + kfree_skb(skb); + } + up(&rtnl_sem); + } while (nfnl && nfnl->receive_queue.qlen); +} + + +/**************************************************************************** + * + * System events + * + ****************************************************************************/ + +static int +receive_event(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + if (event == NETDEV_UNREGISTER) + if (nlq) + ipq_thread_stop(nlq); + return NOTIFY_DONE; +} + +struct notifier_block ipq_dev_notifier = { + receive_event, + NULL, + 0 +}; + + +/**************************************************************************** + * + * Sysctl - queue tuning. + * + ****************************************************************************/ + +static int sysctl_maxlen = IPQ_QMAX_DEFAULT; + +static struct ctl_table_header *ipq_sysctl_header; + +static ctl_table ipq_table[] = { + { NET_IPQ_QMAX, NET_IPQ_QMAX_NAME, &sysctl_maxlen, + sizeof(sysctl_maxlen), 0644, NULL, proc_dointvec }, + { 0 } +}; + +static ctl_table ipq_dir_table[] = { + {NET_IPV4, "ipv4", NULL, 0, 0555, ipq_table, 0, 0, 0, 0, 0}, + { 0 } +}; + +static ctl_table ipq_root_table[] = { + {CTL_NET, "net", NULL, 0, 0555, ipq_dir_table, 0, 0, 0, 0, 0}, + { 0 } +}; + +/**************************************************************************** + * + * Procfs - debugging info. + * + ****************************************************************************/ + +static int +ipq_get_info(char *buffer, char **start, off_t offset, int length) +{ + int len; + + spin_lock_bh(&nlq->lock); + len = sprintf(buffer, + "Thread pid : %d\n" + "Thread terminate : %d\n" + "Thread running : %d\n" + "Peer pid : %d\n" + "Peer died : %d\n" + "Peer copy mode : %d\n" + "Peer copy range : %d\n" + "Queue length : %d\n" + "Queue max. length : %d\n" + "Queue state : %d\n", + nlq->thread.pid, + nlq->thread.terminate, + nlq->thread.running, + nlq->peer.pid, + nlq->peer.died, + nlq->peer.copy_mode, + nlq->peer.copy_range, + nlq->len, + *nlq->maxlen, + nlq->state); + spin_unlock_bh(&nlq->lock); + *start = buffer + offset; + len -= offset; + if (len > length) + len = length; + else if (len < 0) + len = 0; + return len; +} + +/**************************************************************************** + * + * Module stuff. + * + ****************************************************************************/ + +static int __init init(void) +{ + int status = 0; + + nfnl = netlink_kernel_create(NETLINK_FIREWALL, receive_user_sk); + if (nfnl == NULL) { + printk(KERN_ERR "%s: initialisation failed: unable to " + "create kernel netlink socket\n", IPQ_NAME); + return -ENOMEM; + } + nlq = ipq_queue_create(receive_netfilter, + netlink_send_peer, &status, &sysctl_maxlen); + if (nlq == NULL) { + printk(KERN_ERR "%s: initialisation failed: unable to " + "initialise queue\n", IPQ_NAME); + sock_release(nfnl->socket); + return status; + } + register_netdevice_notifier(&ipq_dev_notifier); + proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); + ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); + return status; +} + +static void __exit fini(void) +{ + unregister_sysctl_table(ipq_sysctl_header); + proc_net_remove(IPQ_PROC_FS_NAME); + unregister_netdevice_notifier(&ipq_dev_notifier); + ipq_queue_destroy(nlq); + sock_release(nfnl->socket); +} + +MODULE_DESCRIPTION("IPv4 packet queue handler"); +module_init(init); +module_exit(fini); + diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c new file mode 100644 index 000000000000..c999e0423390 --- /dev/null +++ b/net/ipv4/netfilter/ip_tables.c @@ -0,0 +1,1664 @@ +/* + * Packet matching code. + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifndef IP_OFFSET +#define IP_OFFSET 0x1FFF +#endif + +/*#define DEBUG_IP_FIREWALL*/ +/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ +/*#define DEBUG_IP_FIREWALL_USER*/ + +#ifdef DEBUG_IP_FIREWALL +#define dprintf(format, args...) printk(format , ## args) +#else +#define dprintf(format, args...) +#endif + +#ifdef DEBUG_IP_FIREWALL_USER +#define duprintf(format, args...) printk(format , ## args) +#else +#define duprintf(format, args...) +#endif + +#ifdef CONFIG_NETFILTER_DEBUG +#define IP_NF_ASSERT(x) \ +do { \ + if (!(x)) \ + printk("IPT_ASSERT: %s:%s:%u\n", \ + __FUNCTION__, __FILE__, __LINE__); \ +} while(0) +#else +#define IP_NF_ASSERT(x) +#endif +#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) + +/* Mutex protects lists (only traversed in user context). */ +static DECLARE_MUTEX(ipt_mutex); + +/* Must have mutex */ +#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0) +#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0) +#include +#include + +#if 0 +/* All the better to debug you with... */ +#define static +#define inline +#endif + +/* Locking is simple: we assume at worst case there will be one packet + in user context and one from bottom halves (or soft irq if Alexey's + softnet patch was applied). + + We keep a set of rules for each CPU, so we can avoid write-locking + them; doing a readlock_bh() stops packets coming through if we're + in user context. + + To be cache friendly on SMP, we arrange them like so: + [ n-entries ] + ... cache-align padding ... + [ n-entries ] + + Hence the start of any table is given by get_table() below. */ + +/* The table itself */ +struct ipt_table_info +{ + /* Size per table */ + unsigned int size; + /* Number of entries: FIXME. --RR */ + unsigned int number; + + /* Entry points and underflows */ + unsigned int hook_entry[NF_IP_NUMHOOKS]; + unsigned int underflow[NF_IP_NUMHOOKS]; + + char padding[SMP_ALIGN((NF_IP_NUMHOOKS*2+2)*sizeof(unsigned int))]; + + /* ipt_entry tables: one per CPU */ + char entries[0]; +}; + +static LIST_HEAD(ipt_target); +static LIST_HEAD(ipt_match); +static LIST_HEAD(ipt_tables); +#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) + +#ifdef CONFIG_SMP +#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*cpu_number_map(p)) +#else +#define TABLE_OFFSET(t,p) 0 +#endif + +#if 0 +#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) +#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) +#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0) +#endif + +/* Returns whether matches rule or not. */ +static inline int +ip_packet_match(const struct iphdr *ip, + const char *indev, + const char *outdev, + const struct ipt_ip *ipinfo, + int isfrag) +{ + size_t i; + unsigned long ret; + +#define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg)) + + if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, + IPT_INV_SRCIP) + || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, + IPT_INV_DSTIP)) { + dprintf("Source or dest mismatch.\n"); + + dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n", + NIPQUAD(ip->saddr), + NIPQUAD(ipinfo->smsk.s_addr), + NIPQUAD(ipinfo->src.s_addr), + ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : ""); + dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n", + NIPQUAD(ip->daddr), + NIPQUAD(ipinfo->dmsk.s_addr), + NIPQUAD(ipinfo->dst.s_addr), + ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : ""); + return 0; + } + + /* Look for ifname matches; this should unroll nicely. */ + for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { + ret |= (((const unsigned long *)indev)[i] + ^ ((const unsigned long *)ipinfo->iniface)[i]) + & ((const unsigned long *)ipinfo->iniface_mask)[i]; + } + + if (FWINV(ret != 0, IPT_INV_VIA_IN)) { + dprintf("VIA in mismatch (%s vs %s).%s\n", + indev, ipinfo->iniface, + ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":""); + return 0; + } + + for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { + ret |= (((const unsigned long *)outdev)[i] + ^ ((const unsigned long *)ipinfo->outiface)[i]) + & ((const unsigned long *)ipinfo->outiface_mask)[i]; + } + + if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { + dprintf("VIA out mismatch (%s vs %s).%s\n", + outdev, ipinfo->outiface, + ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":""); + return 0; + } + + /* Check specific protocol */ + if (ipinfo->proto + && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { + dprintf("Packet protocol %hi does not match %hi.%s\n", + ip->protocol, ipinfo->proto, + ipinfo->invflags&IPT_INV_PROTO ? " (INV)":""); + return 0; + } + + /* If we have a fragment rule but the packet is not a fragment + * then we return zero */ + if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) { + dprintf("Fragment rule but not fragment.%s\n", + ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : ""); + return 0; + } + + return 1; +} + +static inline int +ip_checkentry(const struct ipt_ip *ip) +{ + if (ip->flags & ~IPT_F_MASK) { + duprintf("Unknown flag bits set: %08X\n", + ip->flags & ~IPT_F_MASK); + return 0; + } + if (ip->invflags & ~IPT_INV_MASK) { + duprintf("Unknown invflag bits set: %08X\n", + ip->invflags & ~IPT_INV_MASK); + return 0; + } + return 1; +} + +static unsigned int +ipt_error(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + if (net_ratelimit()) + printk("ip_tables: error: `%s'\n", (char *)targinfo); + + return NF_DROP; +} + +static inline +int do_match(struct ipt_entry_match *m, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + /* Stop iteration if it doesn't match */ + if (!m->u.match->match(skb, in, out, m->data, + offset, hdr, datalen, hotdrop)) + return 1; + else + return 0; +} + +static inline struct ipt_entry * +get_entry(void *base, unsigned int offset) +{ + return (struct ipt_entry *)(base + offset); +} + +/* Returns one of the generic firewall policies, like NF_ACCEPT. */ +unsigned int +ipt_do_table(struct sk_buff **pskb, + unsigned int hook, + const struct net_device *in, + const struct net_device *out, + struct ipt_table *table, + void *userdata) +{ + static const char nulldevname[IFNAMSIZ] = { 0 }; + u_int16_t offset; + struct iphdr *ip; + void *protohdr; + u_int16_t datalen; + int hotdrop = 0; + /* Initializing verdict to NF_DROP keeps gcc happy. */ + unsigned int verdict = NF_DROP; + const char *indev, *outdev; + void *table_base; + struct ipt_entry *e, *back; + + /* Initialization */ + ip = (*pskb)->nh.iph; + protohdr = (u_int32_t *)ip + ip->ihl; + datalen = (*pskb)->len - ip->ihl * 4; + indev = in ? in->name : nulldevname; + outdev = out ? out->name : nulldevname; + /* We handle fragments by dealing with the first fragment as + * if it was a normal packet. All other fragments are treated + * normally, except that they will NEVER match rules that ask + * things we don't know, ie. tcp syn flag or ports). If the + * rule is also a fragment-specific rule, non-fragments won't + * match it. */ + offset = ntohs(ip->frag_off) & IP_OFFSET; + + read_lock_bh(&table->lock); + IP_NF_ASSERT(table->valid_hooks & (1 << hook)); + table_base = (void *)table->private->entries + + TABLE_OFFSET(table->private, smp_processor_id()); + e = get_entry(table_base, table->private->hook_entry[hook]); + + /* Check noone else using our table */ + IP_NF_ASSERT(((struct ipt_entry *)table_base)->comefrom == 0xdead57ac); +#ifdef CONFIG_NETFILTER_DEBUG + ((struct ipt_entry *)table_base)->comefrom = 0x57acc001; +#endif + + /* For return from builtin chain */ + back = get_entry(table_base, table->private->underflow[hook]); + + do { + IP_NF_ASSERT(e); + IP_NF_ASSERT(back); + (*pskb)->nfcache |= e->nfcache; + if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) { + struct ipt_entry_target *t; + + if (IPT_MATCH_ITERATE(e, do_match, + *pskb, in, out, + offset, protohdr, + datalen, &hotdrop) != 0) + goto no_match; + + ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); + + t = ipt_get_target(e); + IP_NF_ASSERT(t->u.target); + /* Standard target? */ + if (!t->u.target->target) { + int v; + + v = ((struct ipt_standard_target *)t)->verdict; + if (v < 0) { + /* Pop from stack? */ + if (v != IPT_RETURN) { + verdict = (unsigned)(-v) - 1; + break; + } + e = back; + back = get_entry(table_base, + back->comefrom); + continue; + } + if (table_base + v + != (void *)e + e->next_offset) { + /* Save old back ptr in next entry */ + struct ipt_entry *next + = (void *)e + e->next_offset; + next->comefrom + = (void *)back - table_base; + /* set back pointer to next entry */ + back = next; + } + + e = get_entry(table_base, v); + } else { + verdict = t->u.target->target(pskb, hook, + in, out, + t->data, + userdata); + + /* Target might have changed stuff. */ + ip = (*pskb)->nh.iph; + protohdr = (u_int32_t *)ip + ip->ihl; + datalen = (*pskb)->len - ip->ihl * 4; + + if (verdict == IPT_CONTINUE) + e = (void *)e + e->next_offset; + else + /* Verdict */ + break; + } + } else { + + no_match: + e = (void *)e + e->next_offset; + } + } while (!hotdrop); + +#ifdef CONFIG_NETFILTER_DEBUG + ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac; +#endif + read_unlock_bh(&table->lock); + +#ifdef DEBUG_ALLOW_ALL + return NF_ACCEPT; +#else + if (hotdrop) + return NF_DROP; + else return verdict; +#endif +} + +/* If it succeeds, returns element and locks mutex */ +static inline void * +find_inlist_lock_noload(struct list_head *head, + const char *name, + int *error, + struct semaphore *mutex) +{ + void *ret; + +#if 0 + duprintf("find_inlist: searching for `%s' in %s.\n", + name, head == &ipt_target ? "ipt_target" + : head == &ipt_match ? "ipt_match" + : head == &ipt_tables ? "ipt_tables" : "UNKNOWN"); +#endif + + *error = down_interruptible(mutex); + if (*error != 0) + return NULL; + + ret = list_named_find(head, name); + if (!ret) { + *error = -ENOENT; + up(mutex); + } + return ret; +} + +#ifndef CONFIG_KMOD +#define find_inlist_lock(h,n,e,p,m) find_inlist_lock_noload((h),(n),(e),(m)) +#else +static void * +find_inlist_lock(struct list_head *head, + const char *name, + const char *prefix, + int *error, + struct semaphore *mutex) +{ + void *ret; + + ret = find_inlist_lock_noload(head, name, error, mutex); + if (!ret) { + char modulename[IPT_FUNCTION_MAXNAMELEN + strlen(prefix) + 1]; + strcpy(modulename, prefix); + strcat(modulename, name); + duprintf("find_inlist: loading `%s'.\n", modulename); + request_module(modulename); + ret = find_inlist_lock_noload(head, name, error, mutex); + } + + return ret; +} +#endif + +static inline struct ipt_table * +find_table_lock(const char *name, int *error, struct semaphore *mutex) +{ + return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex); +} + +static inline struct ipt_match * +find_match_lock(const char *name, int *error, struct semaphore *mutex) +{ + return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex); +} + +static inline struct ipt_target * +find_target_lock(const char *name, int *error, struct semaphore *mutex) +{ + return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex); +} + +/* All zeroes == unconditional rule. */ +static inline int +unconditional(const struct ipt_ip *ip) +{ + unsigned int i; + + for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++) + if (((__u32 *)ip)[i]) + return 0; + + return 1; +} + +/* Figures out from what hook each rule can be called: returns 0 if + there are loops. Puts hook bitmask in comefrom. */ +static int +mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks) +{ + unsigned int hook; + + /* No recursion; use packet counter to save back ptrs (reset + to 0 as we leave), and comefrom to save source hook bitmask */ + for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) { + unsigned int pos = newinfo->hook_entry[hook]; + struct ipt_entry *e + = (struct ipt_entry *)(newinfo->entries + pos); + + if (!(valid_hooks & (1 << hook))) + continue; + + /* Set initial back pointer. */ + e->counters.pcnt = pos; + + for (;;) { + struct ipt_standard_target *t + = (void *)ipt_get_target(e); + + if (e->comefrom & (1 << NF_IP_NUMHOOKS)) { + printk("iptables: loop hook %u pos %u %08X.\n", + hook, pos, e->comefrom); + return 0; + } + e->comefrom + |= ((1 << hook) | (1 << NF_IP_NUMHOOKS)); + + /* Unconditional return/END. */ + if (e->target_offset == sizeof(struct ipt_entry) + && (strcmp(t->target.u.name, IPT_STANDARD_TARGET) + == 0) + && t->verdict < 0 + && unconditional(&e->ip)) { + unsigned int oldpos, size; + + /* Return: backtrack through the last + big jump. */ + do { + e->comefrom ^= (1<comefrom + & (1 << NF_IP_NUMHOOKS)) { + duprintf("Back unset " + "on hook %u " + "rule %u\n", + hook, pos); + } +#endif + oldpos = pos; + pos = e->counters.pcnt; + e->counters.pcnt = 0; + + /* We're at the start. */ + if (pos == oldpos) + goto next; + + e = (struct ipt_entry *) + (newinfo->entries + pos); + } while (oldpos == pos + e->next_offset); + + /* Move along one */ + size = e->next_offset; + e = (struct ipt_entry *) + (newinfo->entries + pos + size); + e->counters.pcnt = pos; + pos += size; + } else { + int newpos = t->verdict; + + if (strcmp(t->target.u.name, + IPT_STANDARD_TARGET) == 0 + && newpos >= 0) { + /* This a jump; chase it. */ + duprintf("Jump rule %u -> %u\n", + pos, newpos); + } else { + /* ... this is a fallthru */ + newpos = pos + e->next_offset; + } + e = (struct ipt_entry *) + (newinfo->entries + newpos); + e->counters.pcnt = pos; + pos = newpos; + } + } + next: + duprintf("Finished chain %u\n", hook); + } + return 1; +} + +static inline int +cleanup_match(struct ipt_entry_match *m, unsigned int *i) +{ + if (i && (*i)-- == 0) + return 1; + + if (m->u.match->me) + __MOD_DEC_USE_COUNT(m->u.match->me); + + return 0; +} + +static inline int +standard_check(const struct ipt_entry_target *t, + unsigned int max_offset) +{ + struct ipt_standard_target *targ = (void *)t; + + /* Check standard info. */ + if (t->target_size != sizeof(struct ipt_standard_target)) { + duprintf("standard_check: target size %u != %u\n", + t->target_size, sizeof(struct ipt_standard_target)); + return 0; + } + + if (targ->verdict >= 0 + && targ->verdict > max_offset - sizeof(struct ipt_entry)) { + duprintf("ipt_standard_check: bad verdict (%i)\n", + targ->verdict); + return 0; + } + + if (targ->verdict < -NF_MAX_VERDICT - 1) { + duprintf("ipt_standard_check: bad negative verdict (%i)\n", + targ->verdict); + return 0; + } + return 1; +} + +static inline int +check_match(struct ipt_entry_match *m, + const char *name, + const struct ipt_ip *ip, + unsigned int hookmask, + unsigned int *i) +{ + int ret; + struct ipt_match *match; + + match = find_match_lock(m->u.name, &ret, &ipt_mutex); + if (!match) { + duprintf("check_match: `%s' not found\n", m->u.name); + return ret; + } + if (match->me) + __MOD_INC_USE_COUNT(match->me); + m->u.match = match; + up(&ipt_mutex); + + if (m->u.match->checkentry + && !m->u.match->checkentry(name, ip, m->data, + m->match_size - sizeof(*m), + hookmask)) { + if (m->u.match->me) + __MOD_DEC_USE_COUNT(m->u.match->me); + duprintf("ip_tables: check failed for `%s'.\n", + m->u.match->name); + return -EINVAL; + } + + (*i)++; + return 0; +} + +static struct ipt_target ipt_standard_target; + +static inline int +check_entry(struct ipt_entry *e, const char *name, unsigned int size, + unsigned int *i) +{ + struct ipt_entry_target *t; + struct ipt_target *target; + int ret; + unsigned int j; + + if (!ip_checkentry(&e->ip)) { + duprintf("ip_tables: ip check failed %p %s.\n", e, name); + return -EINVAL; + } + + j = 0; + ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j); + if (ret != 0) + goto cleanup_matches; + + t = ipt_get_target(e); + target = find_target_lock(t->u.name, &ret, &ipt_mutex); + if (!target) { + duprintf("check_entry: `%s' not found\n", t->u.name); + up(&ipt_mutex); + return ret; + } + if (target->me) + __MOD_INC_USE_COUNT(target->me); + t->u.target = target; + up(&ipt_mutex); + + if (t->u.target == &ipt_standard_target) { + if (!standard_check(t, size)) { + ret = -EINVAL; + goto cleanup_matches; + } + } else if (t->u.target->checkentry + && !t->u.target->checkentry(name, e, t->data, + t->target_size - sizeof(*t), + e->comefrom)) { + if (t->u.target->me) + __MOD_DEC_USE_COUNT(t->u.target->me); + duprintf("ip_tables: check failed for `%s'.\n", + t->u.target->name); + ret = -EINVAL; + goto cleanup_matches; + } + + (*i)++; + return 0; + + cleanup_matches: + IPT_MATCH_ITERATE(e, cleanup_match, &j); + return ret; +} + +static inline int +check_entry_size_and_hooks(struct ipt_entry *e, + struct ipt_table_info *newinfo, + unsigned char *base, + unsigned char *limit, + const unsigned int *hook_entries, + const unsigned int *underflows, + unsigned int *i) +{ + unsigned int h; + + if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 + || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { + duprintf("Bad offset %p\n", e); + return -EINVAL; + } + + if (e->next_offset + < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) { + duprintf("checking: element %p size %u\n", + e, e->next_offset); + return -EINVAL; + } + + /* Check hooks & underflows */ + for (h = 0; h < NF_IP_NUMHOOKS; h++) { + if ((unsigned char *)e - base == hook_entries[h]) + newinfo->hook_entry[h] = hook_entries[h]; + if ((unsigned char *)e - base == underflows[h]) + newinfo->underflow[h] = underflows[h]; + } + + /* FIXME: underflows must be unconditional, standard verdicts + < 0 (not IPT_RETURN). --RR */ + + /* Clear counters and comefrom */ + e->counters = ((struct ipt_counters) { 0, 0 }); + e->comefrom = 0; + + (*i)++; + return 0; +} + +static inline int +cleanup_entry(struct ipt_entry *e, unsigned int *i) +{ + struct ipt_entry_target *t; + + if (i && (*i)-- == 0) + return 1; + + /* Cleanup all matches */ + IPT_MATCH_ITERATE(e, cleanup_match, NULL); + t = ipt_get_target(e); + if (t->u.target->me) + __MOD_DEC_USE_COUNT(t->u.target->me); + + return 0; +} + +/* Checks and translates the user-supplied table segment (held in + newinfo) */ +static int +translate_table(const char *name, + unsigned int valid_hooks, + struct ipt_table_info *newinfo, + unsigned int size, + unsigned int number, + const unsigned int *hook_entries, + const unsigned int *underflows) +{ + unsigned int i; + int ret; + + newinfo->size = size; + newinfo->number = number; + + /* Init all hooks to impossible value. */ + for (i = 0; i < NF_IP_NUMHOOKS; i++) { + newinfo->hook_entry[i] = 0xFFFFFFFF; + newinfo->underflow[i] = 0xFFFFFFFF; + } + + duprintf("translate_table: size %u\n", newinfo->size); + i = 0; + /* Walk through entries, checking offsets. */ + ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, + check_entry_size_and_hooks, + newinfo, + newinfo->entries, + newinfo->entries + size, + hook_entries, underflows, &i); + if (ret != 0) + return ret; + + if (i != number) { + duprintf("translate_table: %u not %u entries\n", + i, number); + return -EINVAL; + } + + /* Check hooks all assigned */ + for (i = 0; i < NF_IP_NUMHOOKS; i++) { + /* Only hooks which are valid */ + if (!(valid_hooks & (1 << i))) + continue; + if (newinfo->hook_entry[i] == 0xFFFFFFFF) { + duprintf("Invalid hook entry %u %u\n", + i, hook_entries[i]); + return -EINVAL; + } + if (newinfo->underflow[i] == 0xFFFFFFFF) { + duprintf("Invalid underflow %u %u\n", + i, underflows[i]); + return -EINVAL; + } + } + + if (!mark_source_chains(newinfo, valid_hooks)) + return -ELOOP; + + /* Finally, each sanity check must pass */ + i = 0; + ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, + check_entry, name, size, &i); + + if (ret != 0) { + IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, + cleanup_entry, &i); + return ret; + } + + /* And one copy for every other CPU */ + for (i = 1; i < smp_num_cpus; i++) { + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size*i), + newinfo->entries, + SMP_ALIGN(newinfo->size)); + } + + return ret; +} + +static struct ipt_table_info * +replace_table(struct ipt_table *table, + unsigned int num_counters, + struct ipt_table_info *newinfo, + int *error) +{ + struct ipt_table_info *oldinfo; + +#ifdef CONFIG_NETFILTER_DEBUG + { + struct ipt_entry *table_base; + unsigned int i; + + for (i = 0; i < smp_num_cpus; i++) { + table_base = + (void *)newinfo->entries + + TABLE_OFFSET(newinfo, i); + + table_base->comefrom = 0xdead57ac; + } + } +#endif + + /* Do the substitution. */ + write_lock_bh(&table->lock); + /* Check inside lock: is the old number correct? */ + if (num_counters != table->private->number) { + duprintf("num_counters != table->private->number (%u/%u)\n", + num_counters, table->private->number); + write_unlock_bh(&table->lock); + *error = -EAGAIN; + return NULL; + } + oldinfo = table->private; + table->private = newinfo; + write_unlock_bh(&table->lock); + + return oldinfo; +} + +/* Gets counters. */ +static inline int +add_entry_to_counter(const struct ipt_entry *e, + struct ipt_counters total[], + unsigned int *i) +{ + ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); + + (*i)++; + return 0; +} + +static void +get_counters(const struct ipt_table_info *t, + struct ipt_counters counters[]) +{ + unsigned int cpu; + unsigned int i; + + for (cpu = 0; cpu < smp_num_cpus; cpu++) { + i = 0; + IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), + t->size, + add_entry_to_counter, + counters, + &i); + } +} + +static int +copy_entries_to_user(unsigned int total_size, + struct ipt_table *table, + void *userptr) +{ + unsigned int off, num, countersize; + struct ipt_entry *e; + struct ipt_counters *counters; + int ret = 0; + + /* We need atomic snapshot of counters: rest doesn't change + (other than comefrom, which userspace doesn't care + about). */ + countersize = sizeof(struct ipt_counters) * table->private->number; + counters = vmalloc(countersize); + + if (counters == NULL) + return -ENOMEM; + + /* First, sum counters... */ + memset(counters, 0, countersize); + write_lock_bh(&table->lock); + get_counters(table->private, counters); + write_unlock_bh(&table->lock); + + /* ... then copy entire thing from CPU 0... */ + if (copy_to_user(userptr, table->private->entries, total_size) != 0) { + ret = -EFAULT; + goto free_counters; + } + + /* FIXME: use iterator macros --RR */ + /* ... then go back and fix counters and names */ + for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ + unsigned int i; + struct ipt_entry_match *m; + struct ipt_entry_target *t; + + e = (struct ipt_entry *)(table->private->entries + off); + if (copy_to_user(userptr + off + + offsetof(struct ipt_entry, counters), + &counters[num], + sizeof(counters[num])) != 0) { + ret = -EFAULT; + goto free_counters; + } + + for (i = sizeof(struct ipt_entry); + i < e->target_offset; + i += m->match_size) { + m = (void *)e + i; + + if (copy_to_user(userptr + off + i + + offsetof(struct ipt_entry_match, + u.name), + m->u.match->name, + strlen(m->u.match->name)+1) != 0) { + ret = -EFAULT; + goto free_counters; + } + } + + t = ipt_get_target(e); + if (copy_to_user(userptr + off + e->target_offset + + offsetof(struct ipt_entry_target, + u.name), + t->u.target->name, + strlen(t->u.target->name)+1) != 0) { + ret = -EFAULT; + goto free_counters; + } + } + + free_counters: + vfree(counters); + return ret; +} + +static int +get_entries(const struct ipt_get_entries *entries, + struct ipt_get_entries *uptr) +{ + int ret; + struct ipt_table *t; + + t = find_table_lock(entries->name, &ret, &ipt_mutex); + if (t) { + duprintf("t->private->number = %u\n", + t->private->number); + if (entries->size == t->private->size) + ret = copy_entries_to_user(t->private->size, + t, uptr->entries); + else { + duprintf("get_entries: I've got %u not %u!\n", + t->private->size, + entries->size); + ret = -EINVAL; + } + up(&ipt_mutex); + } else + duprintf("get_entries: Can't find %s!\n", + entries->name); + + return ret; +} + +static int +do_replace(void *user, unsigned int len) +{ + int ret; + struct ipt_replace tmp; + struct ipt_table *t; + struct ipt_table_info *newinfo, *oldinfo; + struct ipt_counters *counters; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; + + newinfo = vmalloc(sizeof(struct ipt_table_info) + + SMP_ALIGN(tmp.size) * smp_num_cpus); + if (!newinfo) + return -ENOMEM; + + if (copy_from_user(newinfo->entries, user + sizeof(tmp), + tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters)); + if (!counters) { + ret = -ENOMEM; + goto free_newinfo; + } + memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters)); + + ret = translate_table(tmp.name, tmp.valid_hooks, + newinfo, tmp.size, tmp.num_entries, + tmp.hook_entry, tmp.underflow); + if (ret != 0) + goto free_newinfo_counters; + + duprintf("ip_tables: Translated table\n"); + + t = find_table_lock(tmp.name, &ret, &ipt_mutex); + if (!t) + goto free_newinfo_counters_untrans; + + /* You lied! */ + if (tmp.valid_hooks != t->valid_hooks) { + duprintf("Valid hook crap: %08X vs %08X\n", + tmp.valid_hooks, t->valid_hooks); + ret = -EINVAL; + goto free_newinfo_counters_untrans_unlock; + } + + oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret); + if (!oldinfo) + goto free_newinfo_counters_untrans_unlock; + + /* Get the old counters. */ + get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ + IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); + vfree(oldinfo); + /* Silent error: too late now. */ + copy_to_user(tmp.counters, counters, + sizeof(struct ipt_counters) * tmp.num_counters); + + up(&ipt_mutex); + return 0; + + free_newinfo_counters_untrans_unlock: + up(&ipt_mutex); + free_newinfo_counters_untrans: + IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); + free_newinfo_counters: + vfree(counters); + free_newinfo: + vfree(newinfo); + return ret; +} + +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static inline int +add_counter_to_entry(struct ipt_entry *e, + const struct ipt_counters addme[], + unsigned int *i) +{ +#if 0 + duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n", + *i, + (long unsigned int)e->counters.pcnt, + (long unsigned int)e->counters.bcnt, + (long unsigned int)addme[*i].pcnt, + (long unsigned int)addme[*i].bcnt); +#endif + + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +static int +do_add_counters(void *user, unsigned int len) +{ + unsigned int i; + struct ipt_counters_info tmp, *paddc; + struct ipt_table *t; + int ret; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; + + if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters)) + return -EINVAL; + + paddc = vmalloc(len); + if (!paddc) + return -ENOMEM; + + if (copy_from_user(paddc, user, len) != 0) { + ret = -EFAULT; + goto free; + } + + t = find_table_lock(tmp.name, &ret, &ipt_mutex); + if (!t) + goto free; + + write_lock_bh(&t->lock); + if (t->private->number != paddc->num_counters) { + ret = -EINVAL; + goto unlock_up_free; + } + + i = 0; + IPT_ENTRY_ITERATE(t->private->entries, + t->private->size, + add_counter_to_entry, + paddc->counters, + &i); + unlock_up_free: + write_unlock_bh(&t->lock); + up(&ipt_mutex); + free: + vfree(paddc); + + return ret; +} + +static int +do_ipt_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len) +{ + int ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IPT_SO_SET_REPLACE: + ret = do_replace(user, len); + break; + + case IPT_SO_SET_ADD_COUNTERS: + ret = do_add_counters(user, len); + break; + + default: + duprintf("do_ipt_set_ctl: unknown request %i\n", cmd); + ret = -EINVAL; + } + + return ret; +} + +static int +do_ipt_get_ctl(struct sock *sk, int cmd, void *user, int *len) +{ + int ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IPT_SO_GET_INFO: { + char name[IPT_TABLE_MAXNAMELEN]; + struct ipt_table *t; + + if (*len != sizeof(struct ipt_getinfo)) { + duprintf("length %u != %u\n", *len, + sizeof(struct ipt_getinfo)); + ret = -EINVAL; + break; + } + + if (copy_from_user(name, user, sizeof(name)) != 0) { + ret = -EFAULT; + break; + } + t = find_table_lock(name, &ret, &ipt_mutex); + if (t) { + struct ipt_getinfo info; + + info.valid_hooks = t->valid_hooks; + memcpy(info.hook_entry, t->private->hook_entry, + sizeof(info.hook_entry)); + memcpy(info.underflow, t->private->underflow, + sizeof(info.underflow)); + info.num_entries = t->private->number; + info.size = t->private->size; + strcpy(info.name, name); + + if (copy_to_user(user, &info, *len) != 0) + ret = -EFAULT; + else + ret = 0; + + up(&ipt_mutex); + } + } + break; + + case IPT_SO_GET_ENTRIES: { + struct ipt_get_entries get; + + if (*len < sizeof(get)) { + duprintf("get_entries: %u < %u\n", *len, sizeof(get)); + ret = -EINVAL; + } else if (copy_from_user(&get, user, sizeof(get)) != 0) { + ret = -EFAULT; + } else if (*len != sizeof(struct ipt_get_entries) + get.size) { + duprintf("get_entries: %u != %u\n", *len, + sizeof(struct ipt_get_entries) + get.size); + ret = -EINVAL; + } else + ret = get_entries(&get, user); + break; + } + + default: + duprintf("do_ipt_get_ctl: unknown request %i\n", cmd); + ret = -EINVAL; + } + + return ret; +} + +/* Registration hooks for targets. */ +int +ipt_register_target(struct ipt_target *target) +{ + int ret; + + ret = down_interruptible(&ipt_mutex); + if (ret != 0) + return ret; + + if (list_named_insert(&ipt_target, target)) { + MOD_INC_USE_COUNT; + ret = 0; + } else { + duprintf("ipt_register_target: `%s' already in list!\n", + target->name); + ret = -EINVAL; + } + up(&ipt_mutex); + return ret; +} + +void +ipt_unregister_target(struct ipt_target *target) +{ + down(&ipt_mutex); + LIST_DELETE(&ipt_target, target); + up(&ipt_mutex); + MOD_DEC_USE_COUNT; +} + +int +ipt_register_match(struct ipt_match *match) +{ + int ret; + + ret = down_interruptible(&ipt_mutex); + if (ret != 0) + return ret; + + if (list_named_insert(&ipt_match, match)) { + MOD_INC_USE_COUNT; + ret = 0; + } else { + duprintf("ipt_register_match: `%s' already in list!\n", + match->name); + ret = -EINVAL; + } + up(&ipt_mutex); + + return ret; +} + +void +ipt_unregister_match(struct ipt_match *match) +{ + down(&ipt_mutex); + LIST_DELETE(&ipt_match, match); + up(&ipt_mutex); + MOD_DEC_USE_COUNT; +} + +int ipt_register_table(struct ipt_table *table) +{ + int ret; + struct ipt_table_info *newinfo; + static struct ipt_table_info bootstrap + = { 0, 0, { 0 }, { 0 }, { }, { } }; + + newinfo = vmalloc(sizeof(struct ipt_table_info) + + SMP_ALIGN(table->table->size) * smp_num_cpus); + if (!newinfo) { + ret = -ENOMEM; + return ret; + } + memcpy(newinfo->entries, table->table->entries, table->table->size); + + ret = translate_table(table->name, table->valid_hooks, + newinfo, table->table->size, + table->table->num_entries, + table->table->hook_entry, + table->table->underflow); + if (ret != 0) { + vfree(newinfo); + return ret; + } + + ret = down_interruptible(&ipt_mutex); + if (ret != 0) { + vfree(newinfo); + return ret; + } + + /* Don't autoload: we'd eat our tail... */ + if (list_named_find(&ipt_tables, table->name)) { + ret = -EEXIST; + goto free_unlock; + } + + /* Simplifies replace_table code. */ + table->private = &bootstrap; + if (!replace_table(table, 0, newinfo, &ret)) + goto free_unlock; + + duprintf("table->private->number = %u\n", + table->private->number); + + table->lock = RW_LOCK_UNLOCKED; + list_prepend(&ipt_tables, table); + MOD_INC_USE_COUNT; + + unlock: + up(&ipt_mutex); + return ret; + + free_unlock: + vfree(newinfo); + goto unlock; +} + +void ipt_unregister_table(struct ipt_table *table) +{ + down(&ipt_mutex); + LIST_DELETE(&ipt_tables, table); + up(&ipt_mutex); + + /* Decrease module usage counts and free resources */ + IPT_ENTRY_ITERATE(table->private->entries, table->private->size, + cleanup_entry, NULL); + vfree(table->private); + MOD_DEC_USE_COUNT; +} + +/* Returns 1 if the port is matched by the range, 0 otherwise */ +static inline int +port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert) +{ + int ret; + + ret = (port >= min && port <= max) ^ invert; + return ret; +} + +static int +tcp_find_option(u_int8_t option, + const struct tcphdr *tcp, + u_int16_t datalen, + int invert, + int *hotdrop) +{ + unsigned int i = sizeof(struct tcphdr); + const u_int8_t *opt = (u_int8_t *)tcp; + + duprintf("tcp_match: finding option\n"); + /* If we don't have the whole header, drop packet. */ + if (tcp->doff * 4 > datalen) { + *hotdrop = 1; + return 0; + } + + while (i < tcp->doff * 4) { + if (opt[i] == option) return !invert; + if (opt[i] < 2) i++; + else i += opt[i+1]?:1; + } + + return invert; +} + +static int +tcp_match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct tcphdr *tcp = hdr; + const struct ipt_tcp *tcpinfo = matchinfo; + + /* To quote Alan: + + Don't allow a fragment of TCP 8 bytes in. Nobody normal + causes this. Its a cracker trying to break in by doing a + flag overwrite to pass the direction checks. + */ + + if (offset == 1) { + duprintf("Dropping evil TCP offset=1 frag.\n"); + *hotdrop = 1; + return 0; + } else if (offset == 0 && datalen < sizeof(struct tcphdr)) { + /* We've been asked to examine this packet, and we + can't. Hence, no choice but to drop. */ + duprintf("Dropping evil TCP offset=0 tinygram.\n"); + *hotdrop = 1; + return 0; + } + + /* FIXME: Try tcp doff >> packet len against various stacks --RR */ + +#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg)) + + /* Must not be a fragment. */ + return !offset + && port_match(tcpinfo->spts[0], tcpinfo->spts[1], + ntohs(tcp->source), + !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)) + && port_match(tcpinfo->dpts[0], tcpinfo->dpts[1], + ntohs(tcp->dest), + !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)) + && FWINVTCP((((unsigned char *)tcp)[13] + & tcpinfo->flg_mask) + == tcpinfo->flg_cmp, + IPT_TCP_INV_FLAGS) + && (!tcpinfo->option + || tcp_find_option(tcpinfo->option, tcp, datalen, + tcpinfo->invflags + & IPT_TCP_INV_OPTION, + hotdrop)); +} + +/* Called when user tries to insert an entry of this type. */ +static int +tcp_checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + const struct ipt_tcp *tcpinfo = matchinfo; + + /* Must specify proto == TCP, and no unknown invflags */ + return ip->proto == IPPROTO_TCP + && !(ip->invflags & IPT_INV_PROTO) + && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp)) + && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK); +} + +static int +udp_match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct udphdr *udp = hdr; + const struct ipt_udp *udpinfo = matchinfo; + + if (offset == 0 && datalen < sizeof(struct udphdr)) { + /* We've been asked to examine this packet, and we + can't. Hence, no choice but to drop. */ + duprintf("Dropping evil UDP tinygram.\n"); + *hotdrop = 1; + return 0; + } + + /* Must not be a fragment. */ + return !offset + && port_match(udpinfo->spts[0], udpinfo->spts[1], + ntohs(udp->source), + !!(udpinfo->invflags & IPT_UDP_INV_SRCPT)) + && port_match(udpinfo->dpts[0], udpinfo->dpts[1], + ntohs(udp->dest), + !!(udpinfo->invflags & IPT_UDP_INV_DSTPT)); +} + +/* Called when user tries to insert an entry of this type. */ +static int +udp_checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchinfosize, + unsigned int hook_mask) +{ + const struct ipt_udp *udpinfo = matchinfo; + + /* Must specify proto == UDP, and no unknown invflags */ + if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) { + duprintf("ipt_udp: Protocol %u != %u\n", ip->proto, + IPPROTO_UDP); + return 0; + } + if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) { + duprintf("ipt_udp: matchsize %u != %u\n", + matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp))); + return 0; + } + if (udpinfo->invflags & ~IPT_UDP_INV_MASK) { + duprintf("ipt_udp: unknown flags %X\n", + udpinfo->invflags); + return 0; + } + + return 1; +} + +/* Returns 1 if the type and code is matched by the range, 0 otherwise */ +static inline int +icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, + u_int8_t type, u_int8_t code, + int invert) +{ + return (type == test_type && code >= min_code && code <= max_code) + ^ invert; +} + +static int +icmp_match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct icmphdr *icmp = hdr; + const struct ipt_icmp *icmpinfo = matchinfo; + + if (offset == 0 && datalen < 2) { + /* We've been asked to examine this packet, and we + can't. Hence, no choice but to drop. */ + duprintf("Dropping evil ICMP tinygram.\n"); + *hotdrop = 1; + return 0; + } + + /* Must not be a fragment. */ + return !offset + && icmp_type_code_match(icmpinfo->type, + icmpinfo->code[0], + icmpinfo->code[1], + icmp->type, icmp->code, + !!(icmpinfo->invflags&IPT_ICMP_INV)); +} + +/* Called when user tries to insert an entry of this type. */ +static int +icmp_checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + const struct ipt_icmp *icmpinfo = matchinfo; + + /* Must specify proto == ICMP, and no unknown invflags */ + return ip->proto == IPPROTO_ICMP + && !(ip->invflags & IPT_INV_PROTO) + && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp)) + && !(icmpinfo->invflags & ~IPT_ICMP_INV); +} + +/* The built-in targets: standard (NULL) and error. */ +static struct ipt_target ipt_standard_target += { { NULL, NULL }, IPT_STANDARD_TARGET, NULL, NULL, NULL }; +static struct ipt_target ipt_error_target += { { NULL, NULL }, IPT_ERROR_TARGET, ipt_error, NULL, NULL }; + +static struct nf_sockopt_ops ipt_sockopts += { { NULL, NULL }, PF_INET, IPT_BASE_CTL, IPT_SO_SET_MAX+1, do_ipt_set_ctl, + IPT_BASE_CTL, IPT_SO_GET_MAX+1, do_ipt_get_ctl, 0, NULL }; + +static struct ipt_match tcp_matchstruct += { { NULL, NULL }, "tcp", &tcp_match, &tcp_checkentry, NULL }; +static struct ipt_match udp_matchstruct += { { NULL, NULL }, "udp", &udp_match, &udp_checkentry, NULL }; +static struct ipt_match icmp_matchstruct += { { NULL, NULL }, "icmp", &icmp_match, &icmp_checkentry, NULL }; + +static int __init init(void) +{ + int ret; + + /* Noone else will be downing sem now, so we won't sleep */ + down(&ipt_mutex); + list_append(&ipt_target, &ipt_standard_target); + list_append(&ipt_target, &ipt_error_target); + list_append(&ipt_match, &tcp_matchstruct); + list_append(&ipt_match, &udp_matchstruct); + list_append(&ipt_match, &icmp_matchstruct); + up(&ipt_mutex); + + /* Register setsockopt */ + ret = nf_register_sockopt(&ipt_sockopts); + if (ret < 0) { + duprintf("Unable to register sockopts.\n"); + return ret; + } + + printk("iptables: (c)2000 Netfilter core team\n"); + return 0; +} + +static void __exit fini(void) +{ + nf_unregister_sockopt(&ipt_sockopts); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ipchains_core.c b/net/ipv4/netfilter/ipchains_core.c new file mode 100644 index 000000000000..02bd7ad839df --- /dev/null +++ b/net/ipv4/netfilter/ipchains_core.c @@ -0,0 +1,1768 @@ +/* Minor modifications to fit on compatibility framework: + Rusty.Russell@rustcorp.com.au +*/ + +/* + * This code is heavily based on the code on the old ip_fw.c code; see below for + * copyrights and attributions of the old code. This code is basically GPL. + * + * 15-Aug-1997: Major changes to allow graphs for firewall rules. + * Paul Russell and + * Michael Neuling + * 24-Aug-1997: Generalised protocol handling (not just TCP/UDP/ICMP). + * Added explicit RETURN from chains. + * Removed TOS mangling (done in ipchains 1.0.1). + * Fixed read & reset bug by reworking proc handling. + * Paul Russell + * 28-Sep-1997: Added packet marking for net sched code. + * Removed fw_via comparisons: all done on device name now, + * similar to changes in ip_fw.c in DaveM's CVS970924 tree. + * Paul Russell + * 2-Nov-1997: Moved types across to __u16, etc. + * Added inverse flags. + * Fixed fragment bug (in args to port_match). + * Changed mark to only one flag (MARKABS). + * 21-Nov-1997: Added ability to test ICMP code. + * 19-Jan-1998: Added wildcard interfaces. + * 6-Feb-1998: Merged 2.0 and 2.1 versions. + * Initialised ip_masq for 2.0.x version. + * Added explicit NETLINK option for 2.1.x version. + * Added packet and byte counters for policy matches. + * 26-Feb-1998: Fixed race conditions, added SMP support. + * 18-Mar-1998: Fix SMP, fix race condition fix. + * 1-May-1998: Remove caching of device pointer. + * 12-May-1998: Allow tiny fragment case for TCP/UDP. + * 15-May-1998: Treat short packets as fragments, don't just block. + * 3-Jan-1999: Fixed serious procfs security hole -- users should never + * be allowed to view the chains! + * Marc Santoro + * 29-Jan-1999: Locally generated bogus IPs dealt with, rather than crash + * during dump_packet. --RR. + * 19-May-1999: Star Wars: The Phantom Menace opened. Rule num + * printed in log (modified from Michael Hasenstein's patch). + * Added SYN in log message. --RR + * 23-Jul-1999: Fixed small fragment security exposure opened on 15-May-1998. + * John McDonald + * Thomas Lopatic + */ + +/* + * + * The origina Linux port was done Alan Cox, with changes/fixes from + * Pauline Middlelink, Jos Vos, Thomas Quinot, Wouter Gadeyne, Juan + * Jose Ciarlante, Bernd Eckenfels, Keith Owens and others. + * + * Copyright from the original FreeBSD version follows: + * + * Copyright (c) 1993 Daniel Boulet + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* Understanding locking in this code: (thanks to Alan Cox for using + * little words to explain this to me). -- PR + * + * In UP, there can be two packets traversing the chains: + * 1) A packet from the current userspace context + * 2) A packet off the bh handlers (timer or net). + * + * For SMP (kernel v2.1+), multiply this by # CPUs. + * + * [Note that this in not correct for 2.2 - because the socket code always + * uses lock_kernel() to serialize, and bottom halves (timers and net_bhs) + * only run on one CPU at a time. This will probably change for 2.3. + * It is still good to use spinlocks because that avoids the global cli() + * for updating the tables, which is rather costly in SMP kernels -AK] + * + * This means counters and backchains can get corrupted if no precautions + * are taken. + * + * To actually alter a chain on UP, we need only do a cli(), as this will + * stop a bh handler firing, as we are in the current userspace context + * (coming from a setsockopt()). + * + * On SMP, we need a write_lock_irqsave(), which is a simple cli() in + * UP. + * + * For backchains and counters, we use an array, indexed by + * [cpu_number_map[smp_processor_id()]*2 + !in_interrupt()]; the array is of + * size [smp_num_cpus*2]. For v2.0, smp_num_cpus is effectively 1. So, + * confident of uniqueness, we modify counters even though we only + * have a read lock (to read the counters, you need a write lock, + * though). */ + +/* Why I didn't use straight locking... -- PR + * + * The backchains can be separated out of the ip_chains structure, and + * allocated as needed inside ip_fw_check(). + * + * The counters, however, can't. Trying to lock these means blocking + * interrupts every time we want to access them. This would suck HARD + * performance-wise. Not locking them leads to possible corruption, + * made worse on 32-bit machines (counters are 64-bit). */ + +/*#define DEBUG_IP_FIREWALL*/ +/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ +/*#define DEBUG_IP_FIREWALL_USER*/ +/*#define DEBUG_IP_FIREWALL_LOCKING*/ + +static struct sock *ipfwsk; + +#ifdef CONFIG_SMP +#define SLOT_NUMBER() (cpu_number_map(smp_processor_id())*2 + !in_interrupt()) +#else /* !SMP */ +#define SLOT_NUMBER() (!in_interrupt()) +#endif /* CONFIG_SMP */ +#define NUM_SLOTS (smp_num_cpus*2) + +#define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \ + + NUM_SLOTS*sizeof(struct ip_reent)) +#define SIZEOF_STRUCT_IP_FW_KERNEL (sizeof(struct ip_fwkernel) \ + + NUM_SLOTS*sizeof(struct ip_counters)) + +#ifdef DEBUG_IP_FIREWALL_LOCKING +static unsigned int fwc_rlocks, fwc_wlocks; +#define FWC_DEBUG_LOCK(d) \ +do { \ + FWC_DONT_HAVE_LOCK(d); \ + d |= (1 << SLOT_NUMBER()); \ +} while (0) + +#define FWC_DEBUG_UNLOCK(d) \ +do { \ + FWC_HAVE_LOCK(d); \ + d &= ~(1 << SLOT_NUMBER()); \ +} while (0) + +#define FWC_DONT_HAVE_LOCK(d) \ +do { \ + if ((d) & (1 << SLOT_NUMBER())) \ + printk("%s:%i: Got lock on %i already!\n", \ + __FILE__, __LINE__, SLOT_NUMBER()); \ +} while(0) + +#define FWC_HAVE_LOCK(d) \ +do { \ + if (!((d) & (1 << SLOT_NUMBER()))) \ + printk("%s:%i:No lock on %i!\n", \ + __FILE__, __LINE__, SLOT_NUMBER()); \ +} while (0) + +#else +#define FWC_DEBUG_LOCK(d) do { } while(0) +#define FWC_DEBUG_UNLOCK(d) do { } while(0) +#define FWC_DONT_HAVE_LOCK(d) do { } while(0) +#define FWC_HAVE_LOCK(d) do { } while(0) +#endif /*DEBUG_IP_FIRWALL_LOCKING*/ + +#define FWC_READ_LOCK(l) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock(l); } while (0) +#define FWC_WRITE_LOCK(l) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock(l); } while (0) +#define FWC_READ_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock_irqsave(l,f); } while (0) +#define FWC_WRITE_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock_irqsave(l,f); } while (0) +#define FWC_READ_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock(l); } while (0) +#define FWC_WRITE_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock(l); } while (0) +#define FWC_READ_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock_irqrestore(l,f); } while (0) +#define FWC_WRITE_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock_irqrestore(l,f); } while (0) + +struct ip_chain; + +struct ip_counters +{ + __u64 pcnt, bcnt; /* Packet and byte counters */ +}; + +struct ip_fwkernel +{ + struct ip_fw ipfw; + struct ip_fwkernel *next; /* where to go next if current + * rule doesn't match */ + struct ip_chain *branch; /* which branch to jump to if + * current rule matches */ + int simplebranch; /* Use this if branch == NULL */ + struct ip_counters counters[0]; /* Actually several of these */ +}; + +struct ip_reent +{ + struct ip_chain *prevchain; /* Pointer to referencing chain */ + struct ip_fwkernel *prevrule; /* Pointer to referencing rule */ + struct ip_counters counters; +}; + +struct ip_chain +{ + ip_chainlabel label; /* Defines the label for each block */ + struct ip_chain *next; /* Pointer to next block */ + struct ip_fwkernel *chain; /* Pointer to first rule in block */ + __u32 refcount; /* Number of refernces to block */ + int policy; /* Default rule for chain. Only * + * used in built in chains */ + struct ip_reent reent[0]; /* Actually several of these */ +}; + +/* + * Implement IP packet firewall + */ + +#ifdef DEBUG_IP_FIREWALL +#define dprintf(format, args...) printk(format , ## args) +#else +#define dprintf(format, args...) +#endif + +#ifdef DEBUG_IP_FIREWALL_USER +#define duprintf(format, args...) printk(format , ## args) +#else +#define duprintf(format, args...) +#endif + +/* Lock around ip_fw_chains linked list structure */ +rwlock_t ip_fw_lock = RW_LOCK_UNLOCKED; + +/* Head of linked list of fw rules */ +static struct ip_chain *ip_fw_chains; + +#define IP_FW_INPUT_CHAIN ip_fw_chains +#define IP_FW_FORWARD_CHAIN (ip_fw_chains->next) +#define IP_FW_OUTPUT_CHAIN (ip_fw_chains->next->next) + +/* Returns 1 if the port is matched by the range, 0 otherwise */ +extern inline int port_match(__u16 min, __u16 max, __u16 port, + int frag, int invert) +{ + if (frag) /* Fragments fail ANY port test. */ + return (min == 0 && max == 0xFFFF); + else return (port >= min && port <= max) ^ invert; +} + +/* Returns whether matches rule or not. */ +static int ip_rule_match(struct ip_fwkernel *f, + const char *ifname, + struct iphdr *ip, + char tcpsyn, + __u16 src_port, __u16 dst_port, + char isfrag) +{ +#define FWINV(bool,invflg) ((bool) ^ !!(f->ipfw.fw_invflg & invflg)) + /* + * This is a bit simpler as we don't have to walk + * an interface chain as you do in BSD - same logic + * however. + */ + + if (FWINV((ip->saddr&f->ipfw.fw_smsk.s_addr) != f->ipfw.fw_src.s_addr, + IP_FW_INV_SRCIP) + || FWINV((ip->daddr&f->ipfw.fw_dmsk.s_addr)!=f->ipfw.fw_dst.s_addr, + IP_FW_INV_DSTIP)) { + dprintf("Source or dest mismatch.\n"); + + dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr, + f->ipfw.fw_smsk.s_addr, f->ipfw.fw_src.s_addr, + f->ipfw.fw_invflg & IP_FW_INV_SRCIP ? " (INV)" : ""); + dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr, + f->ipfw.fw_dmsk.s_addr, f->ipfw.fw_dst.s_addr, + f->ipfw.fw_invflg & IP_FW_INV_DSTIP ? " (INV)" : ""); + return 0; + } + + /* + * Look for a VIA device match + */ + if (f->ipfw.fw_flg & IP_FW_F_WILDIF) { + if (FWINV(strncmp(ifname, f->ipfw.fw_vianame, + strlen(f->ipfw.fw_vianame)) != 0, + IP_FW_INV_VIA)) { + dprintf("Wildcard interface mismatch.%s\n", + f->ipfw.fw_invflg & IP_FW_INV_VIA ? " (INV)" : ""); + return 0; /* Mismatch */ + } + } + else if (FWINV(strcmp(ifname, f->ipfw.fw_vianame) != 0, + IP_FW_INV_VIA)) { + dprintf("Interface name does not match.%s\n", + f->ipfw.fw_invflg & IP_FW_INV_VIA + ? " (INV)" : ""); + return 0; /* Mismatch */ + } + + /* + * Ok the chain addresses match. + */ + + /* If we have a fragment rule but the packet is not a fragment + * the we return zero */ + if (FWINV((f->ipfw.fw_flg&IP_FW_F_FRAG) && !isfrag, IP_FW_INV_FRAG)) { + dprintf("Fragment rule but not fragment.%s\n", + f->ipfw.fw_invflg & IP_FW_INV_FRAG ? " (INV)" : ""); + return 0; + } + + /* Fragment NEVER passes a SYN test, even an inverted one. */ + if (FWINV((f->ipfw.fw_flg&IP_FW_F_TCPSYN) && !tcpsyn, IP_FW_INV_SYN) + || (isfrag && (f->ipfw.fw_flg&IP_FW_F_TCPSYN))) { + dprintf("Rule requires SYN and packet has no SYN.%s\n", + f->ipfw.fw_invflg & IP_FW_INV_SYN ? " (INV)" : ""); + return 0; + } + + if (f->ipfw.fw_proto) { + /* + * Specific firewall - packet's protocol + * must match firewall's. + */ + + if (FWINV(ip->protocol!=f->ipfw.fw_proto, IP_FW_INV_PROTO)) { + dprintf("Packet protocol %hi does not match %hi.%s\n", + ip->protocol, f->ipfw.fw_proto, + f->ipfw.fw_invflg&IP_FW_INV_PROTO ? " (INV)":""); + return 0; + } + + /* For non TCP/UDP/ICMP, port range is max anyway. */ + if (!port_match(f->ipfw.fw_spts[0], + f->ipfw.fw_spts[1], + src_port, isfrag, + !!(f->ipfw.fw_invflg&IP_FW_INV_SRCPT)) + || !port_match(f->ipfw.fw_dpts[0], + f->ipfw.fw_dpts[1], + dst_port, isfrag, + !!(f->ipfw.fw_invflg + &IP_FW_INV_DSTPT))) { + dprintf("Port match failed.\n"); + return 0; + } + } + + dprintf("Match succeeded.\n"); + return 1; +} + +static const char *branchname(struct ip_chain *branch,int simplebranch) +{ + if (branch) + return branch->label; + switch (simplebranch) + { + case FW_BLOCK: return IP_FW_LABEL_BLOCK; + case FW_ACCEPT: return IP_FW_LABEL_ACCEPT; + case FW_REJECT: return IP_FW_LABEL_REJECT; + case FW_REDIRECT: return IP_FW_LABEL_REDIRECT; + case FW_MASQUERADE: return IP_FW_LABEL_MASQUERADE; + case FW_SKIP: return "-"; + case FW_SKIP+1: return IP_FW_LABEL_RETURN; + default: + return "UNKNOWN"; + } +} + +/* + * VERY ugly piece of code which actually + * makes kernel printf for matching packets... + */ +static void dump_packet(const struct iphdr *ip, + const char *ifname, + struct ip_fwkernel *f, + const ip_chainlabel chainlabel, + __u16 src_port, + __u16 dst_port, + unsigned int count, + int syn) +{ + __u32 *opt = (__u32 *) (ip + 1); + int opti; + + if (f) { + printk(KERN_INFO "Packet log: %s ",chainlabel); + printk("%s ",branchname(f->branch,f->simplebranch)); + if (f->simplebranch==FW_REDIRECT) + printk("%d ",f->ipfw.fw_redirpt); + } + + printk("%s PROTO=%d %d.%d.%d.%d:%hu %d.%d.%d.%d:%hu" + " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu", + ifname, ip->protocol, + (ntohl(ip->saddr)>>24)&0xFF, + (ntohl(ip->saddr)>>16)&0xFF, + (ntohl(ip->saddr)>>8)&0xFF, + (ntohl(ip->saddr))&0xFF, + src_port, + (ntohl(ip->daddr)>>24)&0xFF, + (ntohl(ip->daddr)>>16)&0xFF, + (ntohl(ip->daddr)>>8)&0xFF, + (ntohl(ip->daddr))&0xFF, + dst_port, + ntohs(ip->tot_len), ip->tos, ntohs(ip->id), + ntohs(ip->frag_off), ip->ttl); + + for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++) + printk(" O=0x%8.8X", *opt++); + printk(" %s(#%d)\n", syn ? "SYN " : /* "PENANCE" */ "", count); +} + +/* function for checking chain labels for user space. */ +static int check_label(ip_chainlabel label) +{ + unsigned int i; + /* strlen must be < IP_FW_MAX_LABEL_LENGTH. */ + for (i = 0; i < IP_FW_MAX_LABEL_LENGTH + 1; i++) + if (label[i] == '\0') return 1; + + return 0; +} + +/* This function returns a pointer to the first chain with a label + * that matches the one given. */ +static struct ip_chain *find_label(ip_chainlabel label) +{ + struct ip_chain *tmp; + FWC_HAVE_LOCK(fwc_rlocks | fwc_wlocks); + for (tmp = ip_fw_chains; tmp; tmp = tmp->next) + if (strcmp(tmp->label,label) == 0) + break; + return tmp; +} + +/* This function returns a boolean which when true sets answer to one + of the FW_*. */ +static int find_special(ip_chainlabel label, int *answer) +{ + if (label[0] == '\0') { + *answer = FW_SKIP; /* => pass-through rule */ + return 1; + } else if (strcmp(label,IP_FW_LABEL_ACCEPT) == 0) { + *answer = FW_ACCEPT; + return 1; + } else if (strcmp(label,IP_FW_LABEL_BLOCK) == 0) { + *answer = FW_BLOCK; + return 1; + } else if (strcmp(label,IP_FW_LABEL_REJECT) == 0) { + *answer = FW_REJECT; + return 1; + } else if (strcmp(label,IP_FW_LABEL_REDIRECT) == 0) { + *answer = FW_REDIRECT; + return 1; + } else if (strcmp(label,IP_FW_LABEL_MASQUERADE) == 0) { + *answer = FW_MASQUERADE; + return 1; + } else if (strcmp(label, IP_FW_LABEL_RETURN) == 0) { + *answer = FW_SKIP+1; + return 1; + } else { + return 0; + } +} + +/* This function cleans up the prevchain and prevrule. If the verbose + * flag is set then he names of the chains will be printed as it + * cleans up. */ +static void cleanup(struct ip_chain *chain, + const int verbose, + unsigned int slot) +{ + struct ip_chain *tmpchain = chain->reent[slot].prevchain; + if (verbose) + printk(KERN_ERR "Chain backtrace: "); + while (tmpchain) { + if (verbose) + printk("%s<-",chain->label); + chain->reent[slot].prevchain = NULL; + chain = tmpchain; + tmpchain = chain->reent[slot].prevchain; + } + if (verbose) + printk("%s\n",chain->label); +} + +static inline int +ip_fw_domatch(struct ip_fwkernel *f, + struct iphdr *ip, + const char *rif, + const ip_chainlabel label, + struct sk_buff *skb, + unsigned int slot, + __u16 src_port, __u16 dst_port, + unsigned int count, + int tcpsyn) +{ + f->counters[slot].bcnt+=ntohs(ip->tot_len); + f->counters[slot].pcnt++; + if (f->ipfw.fw_flg & IP_FW_F_PRN) { + dump_packet(ip,rif,f,label,src_port,dst_port,count,tcpsyn); + } + ip->tos = (ip->tos & f->ipfw.fw_tosand) ^ f->ipfw.fw_tosxor; + +/* This functionality is useless in stock 2.0.x series, but we don't + * discard the mark thing altogether, to avoid breaking ipchains (and, + * more importantly, the ipfwadm wrapper) --PR */ + if (f->ipfw.fw_flg & IP_FW_F_MARKABS) { + skb->nfmark = f->ipfw.fw_mark; + } else { + skb->nfmark += f->ipfw.fw_mark; + } + if (f->ipfw.fw_flg & IP_FW_F_NETLINK) { +#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) + size_t len = min(f->ipfw.fw_outputsize, ntohs(ip->tot_len)) + + sizeof(__u32) + sizeof(skb->nfmark) + IFNAMSIZ; + struct sk_buff *outskb=alloc_skb(len, GFP_ATOMIC); + + duprintf("Sending packet out NETLINK (length = %u).\n", + (unsigned int)len); + if (outskb) { + /* Prepend length, mark & interface */ + skb_put(outskb, len); + *((__u32 *)outskb->data) = (__u32)len; + *((__u32 *)(outskb->data+sizeof(__u32))) = skb->nfmark; + strcpy(outskb->data+sizeof(__u32)*2, rif); + memcpy(outskb->data+sizeof(__u32)*2+IFNAMSIZ, ip, + len-(sizeof(__u32)*2+IFNAMSIZ)); + netlink_broadcast(ipfwsk, outskb, 0, ~0, GFP_KERNEL); + } + else { +#endif + if (net_ratelimit()) + printk(KERN_WARNING "ip_fw: packet drop due to " + "netlink failure\n"); + return 0; +#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) + } +#endif + } + return 1; +} + +/* + * Returns one of the generic firewall policies, like FW_ACCEPT. + * + * The testing is either false for normal firewall mode or true for + * user checking mode (counters are not updated, TOS & mark not done). + */ +static int +ip_fw_check(struct iphdr *ip, + const char *rif, + __u16 *redirport, + struct ip_chain *chain, + struct sk_buff *skb, + unsigned int slot, + int testing) +{ + struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl); + struct udphdr *udp=(struct udphdr *)((__u32 *)ip+ip->ihl); + struct icmphdr *icmp=(struct icmphdr *)((__u32 *)ip+ip->ihl); + __u32 src, dst; + __u16 src_port = 0xFFFF, dst_port = 0xFFFF; + char tcpsyn=0; + __u16 offset; + unsigned char oldtos; + struct ip_fwkernel *f; + int ret = FW_SKIP+2; + unsigned int count; + + /* We handle fragments by dealing with the first fragment as + * if it was a normal packet. All other fragments are treated + * normally, except that they will NEVER match rules that ask + * things we don't know, ie. tcp syn flag or ports). If the + * rule is also a fragment-specific rule, non-fragments won't + * match it. */ + + offset = ntohs(ip->frag_off) & IP_OFFSET; + + /* + * Don't allow a fragment of TCP 8 bytes in. Nobody + * normal causes this. Its a cracker trying to break + * in by doing a flag overwrite to pass the direction + * checks. + */ + if (offset == 1 && ip->protocol == IPPROTO_TCP) { + if (!testing && net_ratelimit()) { + printk("Suspect TCP fragment.\n"); + dump_packet(ip,rif,NULL,NULL,0,0,0,0); + } + return FW_BLOCK; + } + + /* If we can't investigate ports, treat as fragment. It's + * either a trucated whole packet, or a truncated first + * fragment, or a TCP first fragment of length 8-15, in which + * case the above rule stops reassembly. + */ + if (offset == 0) { + unsigned int size_req; + switch (ip->protocol) { + case IPPROTO_TCP: + /* Don't care about things past flags word */ + size_req = 16; + break; + + case IPPROTO_UDP: + case IPPROTO_ICMP: + size_req = 8; + break; + + default: + size_req = 0; + } + + /* If it is a truncated first fragment then it can be + * used to rewrite port information, and thus should + * be blocked. + */ + if (ntohs(ip->tot_len) < (ip->ihl<<2)+size_req) { + if (!testing && net_ratelimit()) { + printk("Suspect short first fragment.\n"); + dump_packet(ip,rif,NULL,NULL,0,0,0,0); + } + return FW_BLOCK; + } + } + + src = ip->saddr; + dst = ip->daddr; + oldtos = ip->tos; + + /* + * If we got interface from which packet came + * we can use the address directly. Linux 2.1 now uses address + * chains per device too, but unlike BSD we first check if the + * incoming packet matches a device address and the routing + * table before calling the firewall. + */ + + dprintf("Packet "); + switch(ip->protocol) + { + case IPPROTO_TCP: + dprintf("TCP "); + if (!offset) { + src_port=ntohs(tcp->source); + dst_port=ntohs(tcp->dest); + + /* Connection initilisation can only + * be made when the syn bit is set and + * neither of the ack or reset is + * set. */ + if(tcp->syn && !(tcp->ack || tcp->rst)) + tcpsyn=1; + } + break; + case IPPROTO_UDP: + dprintf("UDP "); + if (!offset) { + src_port=ntohs(udp->source); + dst_port=ntohs(udp->dest); + } + break; + case IPPROTO_ICMP: + if (!offset) { + src_port=(__u16)icmp->type; + dst_port=(__u16)icmp->code; + } + dprintf("ICMP "); + break; + default: + dprintf("p=%d ",ip->protocol); + break; + } +#ifdef DEBUG_IP_FIREWALL + print_ip(ip->saddr); + + if (offset) + dprintf(":fragment (%i) ", ((int)offset)<<2); + else if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP + || ip->protocol==IPPROTO_ICMP) + dprintf(":%hu:%hu", src_port, dst_port); + dprintf("\n"); +#endif + + if (!testing) FWC_READ_LOCK(&ip_fw_lock); + else FWC_HAVE_LOCK(fwc_rlocks); + + f = chain->chain; + do { + count = 0; + for (; f; f = f->next) { + count++; + if (ip_rule_match(f,rif,ip, + tcpsyn,src_port,dst_port,offset)) { + if (!testing + && !ip_fw_domatch(f, ip, rif, chain->label, + skb, slot, + src_port, dst_port, + count, tcpsyn)) { + ret = FW_BLOCK; + goto out; + } + break; + } + } + if (f) { + if (f->branch) { + /* Do sanity check to see if we have + * already set prevchain and if so we + * must be in a loop */ + if (f->branch->reent[slot].prevchain) { + if (!testing) { + printk(KERN_ERR + "IP firewall: " + "Loop detected " + "at `%s'.\n", + f->branch->label); + cleanup(chain, 1, slot); + ret = FW_BLOCK; + } else { + cleanup(chain, 0, slot); + ret = FW_SKIP+1; + } + } + else { + f->branch->reent[slot].prevchain + = chain; + f->branch->reent[slot].prevrule + = f->next; + chain = f->branch; + f = chain->chain; + } + } + else if (f->simplebranch == FW_SKIP) + f = f->next; + else if (f->simplebranch == FW_SKIP+1) { + /* Just like falling off the chain */ + goto fall_off_chain; + } else { + cleanup(chain, 0, slot); + ret = f->simplebranch; + } + } /* f == NULL */ + else { + fall_off_chain: + if (chain->reent[slot].prevchain) { + struct ip_chain *tmp = chain; + f = chain->reent[slot].prevrule; + chain = chain->reent[slot].prevchain; + tmp->reent[slot].prevchain = NULL; + } + else { + ret = chain->policy; + if (!testing) { + chain->reent[slot].counters.pcnt++; + chain->reent[slot].counters.bcnt + += ntohs(ip->tot_len); + } + } + } + } while (ret == FW_SKIP+2); + + out: + if (!testing) FWC_READ_UNLOCK(&ip_fw_lock); + + /* Recalculate checksum if not going to reject, and TOS changed. */ + if (ip->tos != oldtos + && ret != FW_REJECT && ret != FW_BLOCK + && !testing) + ip_send_check(ip); + + if (ret == FW_REDIRECT && redirport) { + if ((*redirport = htons(f->ipfw.fw_redirpt)) == 0) { + /* Wildcard redirection. + * Note that redirport will become + * 0xFFFF for non-TCP/UDP packets. + */ + *redirport = htons(dst_port); + } + } + +#ifdef DEBUG_ALLOW_ALL + return (testing ? ret : FW_ACCEPT); +#else + return ret; +#endif +} + +/* Must have write lock & interrupts off for any of these */ + +/* This function sets all the byte counters in a chain to zero. The + * input is a pointer to the chain required for zeroing */ +static int zero_fw_chain(struct ip_chain *chainptr) +{ + struct ip_fwkernel *i; + + FWC_HAVE_LOCK(fwc_wlocks); + for (i = chainptr->chain; i; i = i->next) + memset(i->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS); + return 0; +} + +static int clear_fw_chain(struct ip_chain *chainptr) +{ + struct ip_fwkernel *i= chainptr->chain; + + FWC_HAVE_LOCK(fwc_wlocks); + chainptr->chain=NULL; + + while (i) { + struct ip_fwkernel *tmp = i->next; + if (i->branch) + i->branch->refcount--; + kfree(i); + i = tmp; + } + return 0; +} + +static int replace_in_chain(struct ip_chain *chainptr, + struct ip_fwkernel *frwl, + __u32 position) +{ + struct ip_fwkernel *f = chainptr->chain; + + FWC_HAVE_LOCK(fwc_wlocks); + + while (--position && f != NULL) f = f->next; + if (f == NULL) + return EINVAL; + + if (f->branch) f->branch->refcount--; + if (frwl->branch) frwl->branch->refcount++; + + frwl->next = f->next; + memcpy(f,frwl,sizeof(struct ip_fwkernel)); + kfree(frwl); + return 0; +} + +static int append_to_chain(struct ip_chain *chainptr, struct ip_fwkernel *rule) +{ + struct ip_fwkernel *i; + + FWC_HAVE_LOCK(fwc_wlocks); + /* Special case if no rules already present */ + if (chainptr->chain == NULL) { + + /* If pointer writes are atomic then turning off + * interupts is not necessary. */ + chainptr->chain = rule; + if (rule->branch) rule->branch->refcount++; + return 0; + } + + /* Find the rule before the end of the chain */ + for (i = chainptr->chain; i->next; i = i->next); + i->next = rule; + if (rule->branch) rule->branch->refcount++; + return 0; +} + +/* This function inserts a rule at the position of position in the + * chain refenced by chainptr. If position is 1 then this rule will + * become the new rule one. */ +static int insert_in_chain(struct ip_chain *chainptr, + struct ip_fwkernel *frwl, + __u32 position) +{ + struct ip_fwkernel *f = chainptr->chain; + + FWC_HAVE_LOCK(fwc_wlocks); + /* special case if the position is number 1 */ + if (position == 1) { + frwl->next = chainptr->chain; + if (frwl->branch) frwl->branch->refcount++; + chainptr->chain = frwl; + return 0; + } + position--; + while (--position && f != NULL) f = f->next; + if (f == NULL) + return EINVAL; + if (frwl->branch) frwl->branch->refcount++; + frwl->next = f->next; + + f->next = frwl; + return 0; +} + +/* This function deletes the a rule from a given rulenum and chain. + * With rulenum = 1 is the first rule is deleted. */ + +static int del_num_from_chain(struct ip_chain *chainptr, __u32 rulenum) +{ + struct ip_fwkernel *i=chainptr->chain,*tmp; + + FWC_HAVE_LOCK(fwc_wlocks); + + if (!chainptr->chain) + return ENOENT; + + /* Need a special case for the first rule */ + if (rulenum == 1) { + /* store temp to allow for freeing up of memory */ + tmp = chainptr->chain; + if (chainptr->chain->branch) chainptr->chain->branch->refcount--; + chainptr->chain = chainptr->chain->next; + kfree(tmp); /* free memory that is now unused */ + } else { + rulenum--; + while (--rulenum && i->next ) i = i->next; + if (!i->next) + return ENOENT; + tmp = i->next; + if (i->next->branch) + i->next->branch->refcount--; + i->next = i->next->next; + kfree(tmp); + } + return 0; +} + + +/* This function deletes the a rule from a given rule and chain. + * The rule that is deleted is the first occursance of that rule. */ +static int del_rule_from_chain(struct ip_chain *chainptr, + struct ip_fwkernel *frwl) +{ + struct ip_fwkernel *ltmp,*ftmp = chainptr->chain ; + int was_found; + + FWC_HAVE_LOCK(fwc_wlocks); + + /* Sure, we should compare marks, but since the `ipfwadm' + * script uses it for an unholy hack... well, life is easier + * this way. We also mask it out of the flags word. --PR */ + for (ltmp=NULL, was_found=0; + !was_found && ftmp != NULL; + ltmp = ftmp,ftmp = ftmp->next) { + if (ftmp->ipfw.fw_src.s_addr!=frwl->ipfw.fw_src.s_addr + || ftmp->ipfw.fw_dst.s_addr!=frwl->ipfw.fw_dst.s_addr + || ftmp->ipfw.fw_smsk.s_addr!=frwl->ipfw.fw_smsk.s_addr + || ftmp->ipfw.fw_dmsk.s_addr!=frwl->ipfw.fw_dmsk.s_addr +#if 0 + || ftmp->ipfw.fw_flg!=frwl->ipfw.fw_flg +#else + || ((ftmp->ipfw.fw_flg & ~IP_FW_F_MARKABS) + != (frwl->ipfw.fw_flg & ~IP_FW_F_MARKABS)) +#endif + || ftmp->ipfw.fw_invflg!=frwl->ipfw.fw_invflg + || ftmp->ipfw.fw_proto!=frwl->ipfw.fw_proto +#if 0 + || ftmp->ipfw.fw_mark!=frwl->ipfw.fw_mark +#endif + || ftmp->ipfw.fw_redirpt!=frwl->ipfw.fw_redirpt + || ftmp->ipfw.fw_spts[0]!=frwl->ipfw.fw_spts[0] + || ftmp->ipfw.fw_spts[1]!=frwl->ipfw.fw_spts[1] + || ftmp->ipfw.fw_dpts[0]!=frwl->ipfw.fw_dpts[0] + || ftmp->ipfw.fw_dpts[1]!=frwl->ipfw.fw_dpts[1] + || ftmp->ipfw.fw_outputsize!=frwl->ipfw.fw_outputsize) { + duprintf("del_rule_from_chain: mismatch:" + "src:%u/%u dst:%u/%u smsk:%u/%u dmsk:%u/%u " + "flg:%hX/%hX invflg:%hX/%hX proto:%u/%u " + "mark:%u/%u " + "ports:%hu-%hu/%hu-%hu %hu-%hu/%hu-%hu " + "outputsize:%hu-%hu\n", + ftmp->ipfw.fw_src.s_addr, + frwl->ipfw.fw_src.s_addr, + ftmp->ipfw.fw_dst.s_addr, + frwl->ipfw.fw_dst.s_addr, + ftmp->ipfw.fw_smsk.s_addr, + frwl->ipfw.fw_smsk.s_addr, + ftmp->ipfw.fw_dmsk.s_addr, + frwl->ipfw.fw_dmsk.s_addr, + ftmp->ipfw.fw_flg, + frwl->ipfw.fw_flg, + ftmp->ipfw.fw_invflg, + frwl->ipfw.fw_invflg, + ftmp->ipfw.fw_proto, + frwl->ipfw.fw_proto, + ftmp->ipfw.fw_mark, + frwl->ipfw.fw_mark, + ftmp->ipfw.fw_spts[0], + frwl->ipfw.fw_spts[0], + ftmp->ipfw.fw_spts[1], + frwl->ipfw.fw_spts[1], + ftmp->ipfw.fw_dpts[0], + frwl->ipfw.fw_dpts[0], + ftmp->ipfw.fw_dpts[1], + frwl->ipfw.fw_dpts[1], + ftmp->ipfw.fw_outputsize, + frwl->ipfw.fw_outputsize); + continue; + } + + if (strncmp(ftmp->ipfw.fw_vianame, + frwl->ipfw.fw_vianame, + IFNAMSIZ)) { + duprintf("del_rule_from_chain: if mismatch: %s/%s\n", + ftmp->ipfw.fw_vianame, + frwl->ipfw.fw_vianame); + continue; + } + if (ftmp->branch != frwl->branch) { + duprintf("del_rule_from_chain: branch mismatch: " + "%s/%s\n", + ftmp->branch?ftmp->branch->label:"(null)", + frwl->branch?frwl->branch->label:"(null)"); + continue; + } + if (ftmp->branch == NULL + && ftmp->simplebranch != frwl->simplebranch) { + duprintf("del_rule_from_chain: simplebranch mismatch: " + "%i/%i\n", + ftmp->simplebranch, frwl->simplebranch); + continue; + } + was_found = 1; + if (ftmp->branch) + ftmp->branch->refcount--; + if (ltmp) + ltmp->next = ftmp->next; + else + chainptr->chain = ftmp->next; + kfree(ftmp); + break; + } + + if (was_found) + return 0; + else { + duprintf("del_rule_from_chain: no matching rule found\n"); + return EINVAL; + } +} + +/* This function takes the label of a chain and deletes the first + * chain with that name. No special cases required for the built in + * chains as they have their refcount initilised to 1 so that they are + * never deleted. */ +static int del_chain(ip_chainlabel label) +{ + struct ip_chain *tmp,*tmp2; + + FWC_HAVE_LOCK(fwc_wlocks); + /* Corner case: return EBUSY not ENOENT for first elem ("input") */ + if (strcmp(label, ip_fw_chains->label) == 0) + return EBUSY; + + for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next) + if(strcmp(tmp->next->label,label) == 0) + break; + + tmp2 = tmp->next; + if (!tmp2) + return ENOENT; + + if (tmp2->refcount) + return EBUSY; + + if (tmp2->chain) + return ENOTEMPTY; + + tmp->next = tmp2->next; + kfree(tmp2); + return 0; +} + +/* This is a function to initilise a chain. Built in rules start with + * refcount = 1 so that they cannot be deleted. User defined rules + * start with refcount = 0 so they can be deleted. */ +static struct ip_chain *ip_init_chain(ip_chainlabel name, + __u32 ref, + int policy) +{ + unsigned int i; + struct ip_chain *label + = kmalloc(SIZEOF_STRUCT_IP_CHAIN, GFP_KERNEL); + if (label == NULL) + panic("Can't kmalloc for firewall chains.\n"); + strcpy(label->label,name); + label->next = NULL; + label->chain = NULL; + label->refcount = ref; + label->policy = policy; + for (i = 0; i < smp_num_cpus*2; i++) { + label->reent[i].counters.pcnt = label->reent[i].counters.bcnt + = 0; + label->reent[i].prevchain = NULL; + label->reent[i].prevrule = NULL; + } + + return label; +} + +/* This is a function for reating a new chain. The chains is not + * created if a chain of the same name already exists */ +static int create_chain(ip_chainlabel label) +{ + struct ip_chain *tmp; + + if (!check_label(label)) + return EINVAL; + + FWC_HAVE_LOCK(fwc_wlocks); + for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next) + if (strcmp(tmp->label,label) == 0) + return EEXIST; + + if (strcmp(tmp->label,label) == 0) + return EEXIST; + + tmp->next = ip_init_chain(label, 0, FW_SKIP); /* refcount is + * zero since this is a + * user defined chain * + * and therefore can be + * deleted */ + return 0; +} + +/* This function simply changes the policy on one of the built in + * chains. checking must be done before this is call to ensure that + * chainptr is pointing to one of the three possible chains */ +static int change_policy(struct ip_chain *chainptr, int policy) +{ + FWC_HAVE_LOCK(fwc_wlocks); + chainptr->policy = policy; + return 0; +} + +/* This function takes an ip_fwuser and converts it to a ip_fwkernel. It also + * performs some checks in the structure. */ +static struct ip_fwkernel *convert_ipfw(struct ip_fwuser *fwuser, int *errno) +{ + struct ip_fwkernel *fwkern; + + if ( (fwuser->ipfw.fw_flg & ~IP_FW_F_MASK) != 0 ) { + duprintf("convert_ipfw: undefined flag bits set (flags=%x)\n", + fwuser->ipfw.fw_flg); + *errno = EINVAL; + return NULL; + } + +#ifdef DEBUG_IP_FIREWALL_USER + /* These are sanity checks that don't really matter. + * We can get rid of these once testing is complete. + */ + if ((fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN) + && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO) + || fwuser->ipfw.fw_proto != IPPROTO_TCP)) { + duprintf("convert_ipfw: TCP SYN flag set but proto != TCP!\n"); + *errno = EINVAL; + return NULL; + } + + if (strcmp(fwuser->label, IP_FW_LABEL_REDIRECT) != 0 + && fwuser->ipfw.fw_redirpt != 0) { + duprintf("convert_ipfw: Target not REDIR but redirpt != 0!\n"); + *errno = EINVAL; + return NULL; + } + + if ((!(fwuser->ipfw.fw_flg & IP_FW_F_FRAG) + && (fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG)) + || (!(fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN) + && (fwuser->ipfw.fw_invflg & IP_FW_INV_SYN))) { + duprintf("convert_ipfw: Can't have INV flag if flag unset!\n"); + *errno = EINVAL; + return NULL; + } + + if (((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCPT) + && fwuser->ipfw.fw_spts[0] == 0 + && fwuser->ipfw.fw_spts[1] == 0xFFFF) + || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTPT) + && fwuser->ipfw.fw_dpts[0] == 0 + && fwuser->ipfw.fw_dpts[1] == 0xFFFF) + || ((fwuser->ipfw.fw_invflg & IP_FW_INV_VIA) + && (fwuser->ipfw.fw_vianame)[0] == '\0') + || ((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCIP) + && fwuser->ipfw.fw_smsk.s_addr == 0) + || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTIP) + && fwuser->ipfw.fw_dmsk.s_addr == 0)) { + duprintf("convert_ipfw: INV flag makes rule unmatchable!\n"); + *errno = EINVAL; + return NULL; + } + + if ((fwuser->ipfw.fw_flg & IP_FW_F_FRAG) + && !(fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG) + && (fwuser->ipfw.fw_spts[0] != 0 + || fwuser->ipfw.fw_spts[1] != 0xFFFF + || fwuser->ipfw.fw_dpts[0] != 0 + || fwuser->ipfw.fw_dpts[1] != 0xFFFF + || (fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN))) { + duprintf("convert_ipfw: Can't test ports or SYN with frag!\n"); + *errno = EINVAL; + return NULL; + } +#endif + + if ((fwuser->ipfw.fw_spts[0] != 0 + || fwuser->ipfw.fw_spts[1] != 0xFFFF + || fwuser->ipfw.fw_dpts[0] != 0 + || fwuser->ipfw.fw_dpts[1] != 0xFFFF) + && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO) + || (fwuser->ipfw.fw_proto != IPPROTO_TCP + && fwuser->ipfw.fw_proto != IPPROTO_UDP + && fwuser->ipfw.fw_proto != IPPROTO_ICMP))) { + duprintf("convert_ipfw: Can only test ports for TCP/UDP/ICMP!\n"); + *errno = EINVAL; + return NULL; + } + + fwkern = kmalloc(SIZEOF_STRUCT_IP_FW_KERNEL, GFP_KERNEL); + if (!fwkern) { + duprintf("convert_ipfw: kmalloc failed!\n"); + *errno = ENOMEM; + return NULL; + } + memcpy(&fwkern->ipfw,&fwuser->ipfw,sizeof(struct ip_fw)); + + if (!find_special(fwuser->label, &fwkern->simplebranch)) { + fwkern->branch = find_label(fwuser->label); + if (!fwkern->branch) { + duprintf("convert_ipfw: chain doesn't exist `%s'.\n", + fwuser->label); + kfree(fwkern); + *errno = ENOENT; + return NULL; + } else if (fwkern->branch == IP_FW_INPUT_CHAIN + || fwkern->branch == IP_FW_FORWARD_CHAIN + || fwkern->branch == IP_FW_OUTPUT_CHAIN) { + duprintf("convert_ipfw: Can't branch to builtin chain `%s'.\n", + fwuser->label); + kfree(fwkern); + *errno = ENOENT; + return NULL; + } + } else + fwkern->branch = NULL; + memset(fwkern->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS); + + /* Handle empty vianame by making it a wildcard */ + if ((fwkern->ipfw.fw_vianame)[0] == '\0') + fwkern->ipfw.fw_flg |= IP_FW_F_WILDIF; + + fwkern->next = NULL; + return fwkern; +} + +int ip_fw_ctl(int cmd, void *m, int len) +{ + int ret; + struct ip_chain *chain; + unsigned long flags; + + FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); + + switch (cmd) { + case IP_FW_FLUSH: + if (len != sizeof(ip_chainlabel) || !check_label(m)) + ret = EINVAL; + else if ((chain = find_label(m)) == NULL) + ret = ENOENT; + else ret = clear_fw_chain(chain); + break; + + case IP_FW_ZERO: + if (len != sizeof(ip_chainlabel) || !check_label(m)) + ret = EINVAL; + else if ((chain = find_label(m)) == NULL) + ret = ENOENT; + else ret = zero_fw_chain(chain); + break; + + case IP_FW_CHECK: { + struct ip_fwtest *new = m; + struct iphdr *ip; + + /* Don't need write lock. */ + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + + if (len != sizeof(struct ip_fwtest) || !check_label(m)) + return EINVAL; + + /* Need readlock to do find_label */ + FWC_READ_LOCK(&ip_fw_lock); + + if ((chain = find_label(new->fwt_label)) == NULL) + ret = ENOENT; + else { + ip = &(new->fwt_packet.fwp_iph); + + if (ip->ihl != sizeof(struct iphdr) / sizeof(int)) { + duprintf("ip_fw_ctl: ip->ihl=%d, want %d\n", + ip->ihl, + sizeof(struct iphdr) / sizeof(int)); + ret = EINVAL; + } + else { + ret = ip_fw_check(ip, new->fwt_packet.fwp_vianame, + NULL, chain, + NULL, SLOT_NUMBER(), 1); + switch (ret) { + case FW_ACCEPT: + ret = 0; break; + case FW_REDIRECT: + ret = ECONNABORTED; break; + case FW_MASQUERADE: + ret = ECONNRESET; break; + case FW_REJECT: + ret = ECONNREFUSED; break; + /* Hack to help diag; these only get + returned when testing. */ + case FW_SKIP+1: + ret = ELOOP; break; + case FW_SKIP: + ret = ENFILE; break; + default: /* FW_BLOCK */ + ret = ETIMEDOUT; break; + } + } + } + FWC_READ_UNLOCK(&ip_fw_lock); + return ret; + } + + case IP_FW_MASQ_TIMEOUTS: { + ret = ip_fw_masq_timeouts(m, len); + } + break; + + case IP_FW_REPLACE: { + struct ip_fwkernel *ip_fwkern; + struct ip_fwnew *new = m; + + if (len != sizeof(struct ip_fwnew) + || !check_label(new->fwn_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwn_label)) == NULL) + ret = ENOENT; + else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret)) + != NULL) + ret = replace_in_chain(chain, ip_fwkern, + new->fwn_rulenum); + } + break; + + case IP_FW_APPEND: { + struct ip_fwchange *new = m; + struct ip_fwkernel *ip_fwkern; + + if (len != sizeof(struct ip_fwchange) + || !check_label(new->fwc_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwc_label)) == NULL) + ret = ENOENT; + else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret)) + != NULL) + ret = append_to_chain(chain, ip_fwkern); + } + break; + + case IP_FW_INSERT: { + struct ip_fwkernel *ip_fwkern; + struct ip_fwnew *new = m; + + if (len != sizeof(struct ip_fwnew) + || !check_label(new->fwn_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwn_label)) == NULL) + ret = ENOENT; + else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret)) + != NULL) + ret = insert_in_chain(chain, ip_fwkern, + new->fwn_rulenum); + } + break; + + case IP_FW_DELETE: { + struct ip_fwchange *new = m; + struct ip_fwkernel *ip_fwkern; + + if (len != sizeof(struct ip_fwchange) + || !check_label(new->fwc_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwc_label)) == NULL) + ret = ENOENT; + else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret)) + != NULL) { + ret = del_rule_from_chain(chain, ip_fwkern); + kfree(ip_fwkern); + } + } + break; + + case IP_FW_DELETE_NUM: { + struct ip_fwdelnum *new = m; + + if (len != sizeof(struct ip_fwdelnum) + || !check_label(new->fwd_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwd_label)) == NULL) + ret = ENOENT; + else ret = del_num_from_chain(chain, new->fwd_rulenum); + } + break; + + case IP_FW_CREATECHAIN: { + if (len != sizeof(ip_chainlabel)) { + duprintf("create_chain: bad size %i\n", len); + ret = EINVAL; + } + else ret = create_chain(m); + } + break; + + case IP_FW_DELETECHAIN: { + if (len != sizeof(ip_chainlabel)) { + duprintf("delete_chain: bad size %i\n", len); + ret = EINVAL; + } + else ret = del_chain(m); + } + break; + + case IP_FW_POLICY: { + struct ip_fwpolicy *new = m; + + if (len != sizeof(struct ip_fwpolicy) + || !check_label(new->fwp_label)) + ret = EINVAL; + else if ((chain = find_label(new->fwp_label)) == NULL) + ret = ENOENT; + else if (chain != IP_FW_INPUT_CHAIN + && chain != IP_FW_FORWARD_CHAIN + && chain != IP_FW_OUTPUT_CHAIN) { + duprintf("change_policy: can't change policy on user" + " defined chain.\n"); + ret = EINVAL; + } + else { + int pol = FW_SKIP; + find_special(new->fwp_policy, &pol); + + switch(pol) { + case FW_MASQUERADE: + if (chain != IP_FW_FORWARD_CHAIN) { + ret = EINVAL; + break; + } + /* Fall thru... */ + case FW_BLOCK: + case FW_ACCEPT: + case FW_REJECT: + ret = change_policy(chain, pol); + break; + default: + duprintf("change_policy: bad policy `%s'\n", + new->fwp_policy); + ret = EINVAL; + } + } + break; + } + default: + duprintf("ip_fw_ctl: unknown request %d\n",cmd); + ret = ENOPROTOOPT; + } + + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + return ret; +} + +/* Returns bytes used - doesn't NUL terminate */ +static int dump_rule(char *buffer, + const char *chainlabel, + const struct ip_fwkernel *rule) +{ + int len; + unsigned int i; + __u64 packets = 0, bytes = 0; + + FWC_HAVE_LOCK(fwc_wlocks); + for (i = 0; i < NUM_SLOTS; i++) { + packets += rule->counters[i].pcnt; + bytes += rule->counters[i].bcnt; + } + + len=sprintf(buffer, + "%9s " /* Chain name */ + "%08X/%08X->%08X/%08X " /* Source & Destination IPs */ + "%.16s " /* Interface */ + "%X %X " /* fw_flg and fw_invflg fields */ + "%u " /* Protocol */ + "%-9u %-9u %-9u %-9u " /* Packet & byte counters */ + "%u-%u %u-%u " /* Source & Dest port ranges */ + "A%02X X%02X " /* TOS and and xor masks */ + "%08X " /* Redirection port */ + "%u " /* fw_mark field */ + "%u " /* output size */ + "%9s\n", /* Target */ + chainlabel, + ntohl(rule->ipfw.fw_src.s_addr), + ntohl(rule->ipfw.fw_smsk.s_addr), + ntohl(rule->ipfw.fw_dst.s_addr), + ntohl(rule->ipfw.fw_dmsk.s_addr), + (rule->ipfw.fw_vianame)[0] ? rule->ipfw.fw_vianame : "-", + rule->ipfw.fw_flg, + rule->ipfw.fw_invflg, + rule->ipfw.fw_proto, + (__u32)(packets >> 32), (__u32)packets, + (__u32)(bytes >> 32), (__u32)bytes, + rule->ipfw.fw_spts[0], rule->ipfw.fw_spts[1], + rule->ipfw.fw_dpts[0], rule->ipfw.fw_dpts[1], + rule->ipfw.fw_tosand, rule->ipfw.fw_tosxor, + rule->ipfw.fw_redirpt, + rule->ipfw.fw_mark, + rule->ipfw.fw_outputsize, + branchname(rule->branch,rule->simplebranch)); + + duprintf("dump_rule: %i bytes done.\n", len); + return len; +} + +/* File offset is actually in records, not bytes. */ +static int ip_chain_procinfo(char *buffer, char **start, + off_t offset, int length +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,29) + , int reset +#endif + ) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,29) + /* FIXME: No more `atomic' read and reset. Wonderful 8-( --RR */ + int reset = 0; +#endif + struct ip_chain *i; + struct ip_fwkernel *j = ip_fw_chains->chain; + unsigned long flags; + int len = 0; + int last_len = 0; + off_t upto = 0; + + duprintf("Offset starts at %lu\n", offset); + duprintf("ip_fw_chains is 0x%0lX\n", (unsigned long int)ip_fw_chains); + + /* Need a write lock to lock out ``readers'' which update counters. */ + FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); + + for (i = ip_fw_chains; i; i = i->next) { + for (j = i->chain; j; j = j->next) { + if (upto == offset) break; + duprintf("Skipping rule in chain `%s'\n", + i->label); + upto++; + } + if (upto == offset) break; + } + + /* Don't init j first time, or once i = NULL */ + for (; i; (void)((i = i->next) && (j = i->chain))) { + duprintf("Dumping chain `%s'\n", i->label); + for (; j; j = j->next, upto++, last_len = len) + { + len += dump_rule(buffer+len, i->label, j); + if (len > length) { + duprintf("Dumped to %i (past %i). " + "Moving back to %i.\n", + len, length, last_len); + len = last_len; + goto outside; + } + else if (reset) + memset(j->counters, 0, + sizeof(struct ip_counters)*NUM_SLOTS); + } + } +outside: + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + buffer[len] = '\0'; + + duprintf("ip_chain_procinfo: Length = %i (of %i). Offset = %li.\n", + len, length, upto); + /* `start' hack - see fs/proc/generic.c line ~165 */ + *start=(char *)((unsigned int)upto-offset); + return len; +} + +static int ip_chain_name_procinfo(char *buffer, char **start, + off_t offset, int length) +{ + struct ip_chain *i; + int len = 0,last_len = 0; + off_t pos = 0,begin = 0; + unsigned long flags; + + /* Need a write lock to lock out ``readers'' which update counters. */ + FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); + + for (i = ip_fw_chains; i; i = i->next) + { + unsigned int j; + __u32 packetsHi = 0, packetsLo = 0, bytesHi = 0, bytesLo = 0; + + for (j = 0; j < NUM_SLOTS; j++) { + packetsLo += i->reent[j].counters.pcnt & 0xFFFFFFFF; + packetsHi += ((i->reent[j].counters.pcnt >> 32) + & 0xFFFFFFFF); + bytesLo += i->reent[j].counters.bcnt & 0xFFFFFFFF; + bytesHi += ((i->reent[j].counters.bcnt >> 32) + & 0xFFFFFFFF); + } + + /* print the label and the policy */ + len+=sprintf(buffer+len,"%s %s %i %u %u %u %u\n", + i->label,branchname(NULL, i->policy),i->refcount, + packetsHi, packetsLo, bytesHi, bytesLo); + pos=begin+len; + if(posoffset+length) { + len = last_len; + break; + } + + last_len = len; + } + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + + *start = buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} + +/* + * Interface to the generic firewall chains. + */ +int ipfw_input_check(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb) +{ + return ip_fw_check(phdr, dev->name, + arg, IP_FW_INPUT_CHAIN, *pskb, SLOT_NUMBER(), 0); +} + +int ipfw_output_check(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb) +{ + /* Locally generated bogus packets by root. . */ + if (((struct iphdr *)phdr)->ihl * 4 < sizeof(struct iphdr) + || (*pskb)->len < sizeof(struct iphdr)) + return FW_ACCEPT; + return ip_fw_check(phdr, dev->name, + arg, IP_FW_OUTPUT_CHAIN, *pskb, SLOT_NUMBER(), 0); +} + +int ipfw_forward_check(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb) +{ + return ip_fw_check(phdr, dev->name, + arg, IP_FW_FORWARD_CHAIN, *pskb, SLOT_NUMBER(), 0); +} + +struct firewall_ops ipfw_ops= +{ + NULL, + ipfw_forward_check, + ipfw_input_check, + ipfw_output_check, + NULL, + NULL +}; + +int ipfw_init_or_cleanup(int init) +{ + int ret = 0; + unsigned long flags; + + FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags); + + if (!init) goto cleanup; + +#ifdef DEBUG_IP_FIREWALL_LOCKING + fwc_wlocks = fwc_rlocks = 0; +#endif + +#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) + ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL); + if (ipfwsk == NULL) + goto cleanup_nothing; +#endif + + ret = register_firewall(PF_INET, &ipfw_ops); + if (ret < 0) + goto cleanup_netlink; + + proc_net_create(IP_FW_PROC_CHAINS, S_IFREG | S_IRUSR | S_IWUSR, ip_chain_procinfo); + proc_net_create(IP_FW_PROC_CHAIN_NAMES, S_IFREG | S_IRUSR | S_IWUSR, ip_chain_name_procinfo); + + IP_FW_INPUT_CHAIN = ip_init_chain(IP_FW_LABEL_INPUT, 1, FW_ACCEPT); + IP_FW_FORWARD_CHAIN = ip_init_chain(IP_FW_LABEL_FORWARD, 1, FW_ACCEPT); + IP_FW_OUTPUT_CHAIN = ip_init_chain(IP_FW_LABEL_OUTPUT, 1, FW_ACCEPT); + + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + return ret; + + cleanup: + while (ip_fw_chains) { + struct ip_chain *next = ip_fw_chains->next; + + clear_fw_chain(ip_fw_chains); + kfree(ip_fw_chains); + ip_fw_chains = next; + } + + proc_net_remove(IP_FW_PROC_CHAINS); + proc_net_remove(IP_FW_PROC_CHAIN_NAMES); + + unregister_firewall(PF_INET, &ipfw_ops); + + cleanup_netlink: +#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) + sock_release(ipfwsk->socket); + + cleanup_nothing: +#endif + FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags); + return ret; +} diff --git a/net/ipv4/netfilter/ipfwadm_core.c b/net/ipv4/netfilter/ipfwadm_core.c new file mode 100644 index 000000000000..a1f4e16cf4ff --- /dev/null +++ b/net/ipv4/netfilter/ipfwadm_core.c @@ -0,0 +1,1410 @@ +/* Minor modifications to fit on compatibility framework: + Rusty.Russell@rustcorp.com.au +*/ + +#define CONFIG_IP_FIREWALL +#define CONFIG_IP_FIREWALL_VERBOSE +#define CONFIG_IP_MASQUERADE +#define CONFIG_IP_ACCT +#define CONFIG_IP_TRANSPARENT_PROXY +#define CONFIG_IP_FIREWALL_NETLINK + +/* + * IP firewalling code. This is taken from 4.4BSD. Please note the + * copyright message below. As per the GPL it must be maintained + * and the licenses thus do not conflict. While this port is subject + * to the GPL I also place my modifications under the original + * license in recognition of the original copyright. + * -- Alan Cox. + * + * $Id: ipfwadm_core.c,v 1.1 2000/03/17 14:42:00 davem Exp $ + * + * Ported from BSD to Linux, + * Alan Cox 22/Nov/1994. + * Zeroing /proc and other additions + * Jos Vos 4/Feb/1995. + * Merged and included the FreeBSD-Current changes at Ugen's request + * (but hey it's a lot cleaner now). Ugen would prefer in some ways + * we waited for his final product but since Linux 1.2.0 is about to + * appear it's not practical - Read: It works, it's not clean but please + * don't consider it to be his standard of finished work. + * Alan Cox 12/Feb/1995 + * Porting bidirectional entries from BSD, fixing accounting issues, + * adding struct ip_fwpkt for checking packets with interface address + * Jos Vos 5/Mar/1995. + * Established connections (ACK check), ACK check on bidirectional rules, + * ICMP type check. + * Wilfred Mollenvanger 7/7/1995. + * TCP attack protection. + * Alan Cox 25/8/95, based on information from bugtraq. + * ICMP type printk, IP_FW_F_APPEND + * Bernd Eckenfels 1996-01-31 + * Split blocking chain into input and output chains, add new "insert" and + * "append" commands to replace semi-intelligent "add" command, let "delete". + * only delete the first matching entry, use 0xFFFF (0xFF) as ports (ICMP + * types) when counting packets being 2nd and further fragments. + * Jos Vos 8/2/1996. + * Add support for matching on device names. + * Jos Vos 15/2/1996. + * Transparent proxying support. + * Willy Konynenberg 10/5/96. + * Make separate accounting on incoming and outgoing packets possible. + * Jos Vos 18/5/1996. + * Added trap out of bad frames. + * Alan Cox 17/11/1996 + * + * + * Masquerading functionality + * + * Copyright (c) 1994 Pauline Middelink + * + * The pieces which added masquerading functionality are totally + * my responsibility and have nothing to with the original authors + * copyright or doing. + * + * Parts distributed under GPL. + * + * Fixes: + * Pauline Middelink : Added masquerading. + * Alan Cox : Fixed an error in the merge. + * Thomas Quinot : Fixed port spoofing. + * Alan Cox : Cleaned up retransmits in spoofing. + * Alan Cox : Cleaned up length setting. + * Wouter Gadeyne : Fixed masquerading support of ftp PORT commands + * + * Juan Jose Ciarlante : Masquerading code moved to ip_masq.c + * Andi Kleen : Print frag_offsets and the ip flags properly. + * + * All the real work was done by ..... + * + */ + + +/* + * Copyright (c) 1993 Daniel Boulet + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Implement IP packet firewall + */ + +#ifdef DEBUG_IP_FIREWALL +#define dprintf1(a) printk(a) +#define dprintf2(a1,a2) printk(a1,a2) +#define dprintf3(a1,a2,a3) printk(a1,a2,a3) +#define dprintf4(a1,a2,a3,a4) printk(a1,a2,a3,a4) +#else +#define dprintf1(a) +#define dprintf2(a1,a2) +#define dprintf3(a1,a2,a3) +#define dprintf4(a1,a2,a3,a4) +#endif + +#define print_ip(a) printk("%d.%d.%d.%d",(ntohl(a)>>24)&0xFF,\ + (ntohl(a)>>16)&0xFF,\ + (ntohl(a)>>8)&0xFF,\ + (ntohl(a))&0xFF); + +#ifdef DEBUG_IP_FIREWALL +#define dprint_ip(a) print_ip(a) +#else +#define dprint_ip(a) +#endif + +#if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL) + +struct ip_fw *ip_fw_fwd_chain; +struct ip_fw *ip_fw_in_chain; +struct ip_fw *ip_fw_out_chain; +struct ip_fw *ip_acct_chain; +struct ip_fw *ip_masq_chain; + +static struct ip_fw **chains[] = + {&ip_fw_fwd_chain, &ip_fw_in_chain, &ip_fw_out_chain, &ip_acct_chain, + &ip_masq_chain + }; +#endif /* CONFIG_IP_ACCT || CONFIG_IP_FIREWALL */ + +#ifdef CONFIG_IP_FIREWALL +int ip_fw_fwd_policy=IP_FW_F_ACCEPT; +int ip_fw_in_policy=IP_FW_F_ACCEPT; +int ip_fw_out_policy=IP_FW_F_ACCEPT; + +static int *policies[] = + {&ip_fw_fwd_policy, &ip_fw_in_policy, &ip_fw_out_policy}; + +#endif + +#ifdef CONFIG_IP_FIREWALL_NETLINK +struct sock *ipfwsk; +#endif + +/* + * Returns 1 if the port is matched by the vector, 0 otherwise + */ + +extern inline int port_match(unsigned short *portptr,int nports,unsigned short port,int range_flag) +{ + if (!nports) + return 1; + if ( range_flag ) + { + if ( portptr[0] <= port && port <= portptr[1] ) + { + return( 1 ); + } + nports -= 2; + portptr += 2; + } + while ( nports-- > 0 ) + { + if ( *portptr++ == port ) + { + return( 1 ); + } + } + return(0); +} + +#if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL) + +#ifdef CONFIG_IP_FIREWALL_VERBOSE + +/* + * VERY ugly piece of code which actually makes kernel printf for + * matching packets. + */ + +static char *chain_name(struct ip_fw *chain, int mode) +{ + switch (mode) { + case IP_FW_MODE_ACCT_IN: return "acct in"; + case IP_FW_MODE_ACCT_OUT: return "acct out"; + default: + if (chain == ip_fw_fwd_chain) + return "fw-fwd"; + else if (chain == ip_fw_in_chain) + return "fw-in"; + else + return "fw-out"; + } +} + +static char *rule_name(struct ip_fw *f, int mode, char *buf) +{ + if (mode == IP_FW_MODE_ACCT_IN || mode == IP_FW_MODE_ACCT_OUT) + return ""; + + if(f->fw_flg&IP_FW_F_ACCEPT) { + if(f->fw_flg&IP_FW_F_REDIR) { + sprintf(buf, "acc/r%d ", f->fw_pts[f->fw_nsp+f->fw_ndp]); + return buf; + } else if(f->fw_flg&IP_FW_F_MASQ) + return "acc/masq "; + else + return "acc "; + } else if(f->fw_flg&IP_FW_F_ICMPRPL) { + return "rej "; + } else { + return "deny "; + } +} + +static void print_packet(struct iphdr *ip, + u16 src_port, u16 dst_port, u16 icmp_type, + char *chain, char *rule, char *devname) +{ + __u32 *opt = (__u32 *) (ip + 1); + int opti; + __u16 foff = ntohs(ip->frag_off); + + printk(KERN_INFO "IP %s %s%s", chain, rule, devname); + + switch(ip->protocol) + { + case IPPROTO_TCP: + printk(" TCP "); + break; + case IPPROTO_UDP: + printk(" UDP "); + break; + case IPPROTO_ICMP: + printk(" ICMP/%d ", icmp_type); + break; + default: + printk(" PROTO=%d ", ip->protocol); + break; + } + print_ip(ip->saddr); + if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP) + printk(":%hu", src_port); + printk(" "); + print_ip(ip->daddr); + if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP) + printk(":%hu", dst_port); + printk(" L=%hu S=0x%2.2hX I=%hu FO=0x%4.4hX T=%hu", + ntohs(ip->tot_len), ip->tos, ntohs(ip->id), + foff & IP_OFFSET, ip->ttl); + if (foff & IP_DF) printk(" DF=1"); + if (foff & IP_MF) printk(" MF=1"); + for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++) + printk(" O=0x%8.8X", *opt++); + printk("\n"); +} +#endif + +/* + * Returns one of the generic firewall policies, like FW_ACCEPT. + * Also does accounting so you can feed it the accounting chain. + * + * The modes is either IP_FW_MODE_FW (normal firewall mode), + * IP_FW_MODE_ACCT_IN or IP_FW_MODE_ACCT_OUT (accounting mode, + * steps through the entire chain and handles fragments + * differently), or IP_FW_MODE_CHK (handles user-level check, + * counters are not updated). + */ + + +int ip_fw_chk(struct iphdr *ip, struct net_device *rif, __u16 *redirport, + struct ip_fw *chain, int policy, int mode) +{ + struct ip_fw *f; + struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl); + struct udphdr *udp=(struct udphdr *)((__u32 *)ip+ip->ihl); + struct icmphdr *icmp=(struct icmphdr *)((__u32 *)ip+ip->ihl); + __u32 src, dst; + __u16 src_port=0xFFFF, dst_port=0xFFFF, icmp_type=0xFF; + unsigned short f_prt=0, prt; + char notcpsyn=0, notcpack=0, match; + unsigned short offset; + int answer; + unsigned char tosand, tosxor; + + /* + * If the chain is empty follow policy. The BSD one + * accepts anything giving you a time window while + * flushing and rebuilding the tables. + */ + + src = ip->saddr; + dst = ip->daddr; + + /* + * This way we handle fragmented packets. + * we ignore all fragments but the first one + * so the whole packet can't be reassembled. + * This way we relay on the full info which + * stored only in first packet. + * + * Note that this theoretically allows partial packet + * spoofing. Not very dangerous but paranoid people may + * wish to play with this. It also allows the so called + * "fragment bomb" denial of service attack on some types + * of system. + */ + + offset = ntohs(ip->frag_off) & IP_OFFSET; + + /* + * Don't allow a fragment of TCP 8 bytes in. Nobody + * normal causes this. Its a cracker trying to break + * in by doing a flag overwrite to pass the direction + * checks. + */ + + if (offset == 1 && ip->protocol == IPPROTO_TCP) + return FW_BLOCK; + + if (offset!=0 && !(mode & (IP_FW_MODE_ACCT_IN|IP_FW_MODE_ACCT_OUT)) && + (ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP || + ip->protocol == IPPROTO_ICMP)) + return FW_ACCEPT; + + /* + * Header fragment for TCP is too small to check the bits. + */ + + if(ip->protocol==IPPROTO_TCP && (ip->ihl<<2)+16 > ntohs(ip->tot_len)) + return FW_BLOCK; + + /* + * Too short. + * + * But only too short for a packet with ports... + */ + + else if((ntohs(ip->tot_len)<8+(ip->ihl<<2))&&(ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP)) + return FW_BLOCK; + + src = ip->saddr; + dst = ip->daddr; + + /* + * If we got interface from which packet came + * we can use the address directly. This is unlike + * 4.4BSD derived systems that have an address chain + * per device. We have a device per address with dummy + * devices instead. + */ + + dprintf1("Packet "); + switch(ip->protocol) + { + case IPPROTO_TCP: + dprintf1("TCP "); + /* ports stay 0xFFFF if it is not the first fragment */ + if (!offset) { + src_port=ntohs(tcp->source); + dst_port=ntohs(tcp->dest); + if(!tcp->ack && !tcp->rst) + /* We do NOT have ACK, value TRUE */ + notcpack=1; + if(!tcp->syn || !notcpack) + /* We do NOT have SYN, value TRUE */ + notcpsyn=1; + } + prt=IP_FW_F_TCP; + break; + case IPPROTO_UDP: + dprintf1("UDP "); + /* ports stay 0xFFFF if it is not the first fragment */ + if (!offset) { + src_port=ntohs(udp->source); + dst_port=ntohs(udp->dest); + } + prt=IP_FW_F_UDP; + break; + case IPPROTO_ICMP: + /* icmp_type stays 255 if it is not the first fragment */ + if (!offset) + icmp_type=(__u16)(icmp->type); + dprintf2("ICMP:%d ",icmp_type); + prt=IP_FW_F_ICMP; + break; + default: + dprintf2("p=%d ",ip->protocol); + prt=IP_FW_F_ALL; + break; + } +#ifdef DEBUG_IP_FIREWALL + dprint_ip(ip->saddr); + + if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP) + /* This will print 65535 when it is not the first fragment! */ + dprintf2(":%d ", src_port); + dprint_ip(ip->daddr); + if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP) + /* This will print 65535 when it is not the first fragment! */ + dprintf2(":%d ",dst_port); + dprintf1("\n"); +#endif + + for (f=chain;f;f=f->fw_next) + { + /* + * This is a bit simpler as we don't have to walk + * an interface chain as you do in BSD - same logic + * however. + */ + + /* + * Match can become 0x01 (a "normal" match was found), + * 0x02 (a reverse match was found), and 0x03 (the + * IP addresses match in both directions). + * Now we know in which direction(s) we should look + * for a match for the TCP/UDP ports. Both directions + * might match (e.g., when both addresses are on the + * same network for which an address/mask is given), but + * the ports might only match in one direction. + * This was obviously wrong in the original BSD code. + */ + match = 0x00; + + if ((src&f->fw_smsk.s_addr)==f->fw_src.s_addr + && (dst&f->fw_dmsk.s_addr)==f->fw_dst.s_addr) + /* normal direction */ + match |= 0x01; + + if ((f->fw_flg & IP_FW_F_BIDIR) && + (dst&f->fw_smsk.s_addr)==f->fw_src.s_addr + && (src&f->fw_dmsk.s_addr)==f->fw_dst.s_addr) + /* reverse direction */ + match |= 0x02; + + if (!match) + continue; + + /* + * Look for a VIA device match + */ + if(f->fw_viadev) + { + if(rif!=f->fw_viadev) + continue; /* Mismatch */ + } + + /* This looks stupid, because we scan almost static + list, searching for static key. However, this way seems + to be only reasonable way of handling fw_via rules + (btw bsd makes the same thing). + + It will not affect performance if you will follow + the following simple rules: + + - if inteface is aliased, ALWAYS specify fw_viadev, + so that previous check will guarantee, that we will + not waste time when packet arrive on another interface. + + - avoid using fw_via.s_addr if fw_via.s_addr is owned + by an aliased interface. + + --ANK + */ + if (f->fw_via.s_addr && rif) { + struct in_ifaddr *ifa; + + if (rif->ip_ptr == NULL) + continue; /* Mismatch */ + + for (ifa = ((struct in_device*)(rif->ip_ptr))->ifa_list; + ifa; ifa = ifa->ifa_next) { + if (ifa->ifa_local == f->fw_via.s_addr) + goto ifa_ok; + } + continue; /* Mismatch */ + + ifa_ok: + } + + /* + * Ok the chain addresses match. + */ + +#ifdef CONFIG_IP_ACCT + /* + * See if we're in accounting mode and only want to + * count incoming or outgoing packets. + */ + + if (mode & (IP_FW_MODE_ACCT_IN|IP_FW_MODE_ACCT_OUT) && + ((mode == IP_FW_MODE_ACCT_IN && f->fw_flg&IP_FW_F_ACCTOUT) || + (mode == IP_FW_MODE_ACCT_OUT && f->fw_flg&IP_FW_F_ACCTIN))) + continue; + +#endif + /* + * For all non-TCP packets and/or non-first fragments, + * notcpsyn and notcpack will always be FALSE, + * so the IP_FW_F_TCPSYN and IP_FW_F_TCPACK flags + * are actually ignored for these packets. + */ + + if((f->fw_flg&IP_FW_F_TCPSYN) && notcpsyn) + continue; + + if((f->fw_flg&IP_FW_F_TCPACK) && notcpack) + continue; + + f_prt=f->fw_flg&IP_FW_F_KIND; + if (f_prt!=IP_FW_F_ALL) + { + /* + * Specific firewall - packet's protocol + * must match firewall's. + */ + + if(prt!=f_prt) + continue; + + if((prt==IP_FW_F_ICMP && + ! port_match(&f->fw_pts[0], f->fw_nsp, + icmp_type,f->fw_flg&IP_FW_F_SRNG)) || + !(prt==IP_FW_F_ICMP || ((match & 0x01) && + port_match(&f->fw_pts[0], f->fw_nsp, src_port, + f->fw_flg&IP_FW_F_SRNG) && + port_match(&f->fw_pts[f->fw_nsp], f->fw_ndp, dst_port, + f->fw_flg&IP_FW_F_DRNG)) || ((match & 0x02) && + port_match(&f->fw_pts[0], f->fw_nsp, dst_port, + f->fw_flg&IP_FW_F_SRNG) && + port_match(&f->fw_pts[f->fw_nsp], f->fw_ndp, src_port, + f->fw_flg&IP_FW_F_DRNG)))) + { + continue; + } + } + +#ifdef CONFIG_IP_FIREWALL_VERBOSE + if (f->fw_flg & IP_FW_F_PRN) + { + char buf[16]; + + print_packet(ip, src_port, dst_port, icmp_type, + chain_name(chain, mode), + rule_name(f, mode, buf), + rif ? rif->name : "-"); + } +#endif + if (mode != IP_FW_MODE_CHK) { + f->fw_bcnt+=ntohs(ip->tot_len); + f->fw_pcnt++; + } + if (!(mode & (IP_FW_MODE_ACCT_IN|IP_FW_MODE_ACCT_OUT))) + break; + } /* Loop */ + + if (!(mode & (IP_FW_MODE_ACCT_IN|IP_FW_MODE_ACCT_OUT))) { + + /* + * We rely on policy defined in the rejecting entry or, if no match + * was found, we rely on the general policy variable for this type + * of firewall. + */ + + if (f!=NULL) { + policy=f->fw_flg; + tosand=f->fw_tosand; + tosxor=f->fw_tosxor; + } else { + tosand=0xFF; + tosxor=0x00; + } + + if (policy&IP_FW_F_ACCEPT) { + /* Adjust priority and recompute checksum */ + __u8 old_tos = ip->tos; + ip->tos = (old_tos & tosand) ^ tosxor; + if (ip->tos != old_tos) + ip_send_check(ip); +#ifdef CONFIG_IP_TRANSPARENT_PROXY + if (policy&IP_FW_F_REDIR) { + if (redirport) + if ((*redirport = htons(f->fw_pts[f->fw_nsp+f->fw_ndp])) == 0) { + /* Wildcard redirection. + * Note that redirport will become + * 0xFFFF for non-TCP/UDP packets. + */ + *redirport = htons(dst_port); + } + answer = FW_REDIRECT; + } else +#endif +#ifdef CONFIG_IP_MASQUERADE + if (policy&IP_FW_F_MASQ) + answer = FW_MASQUERADE; + else +#endif + answer = FW_ACCEPT; + + } else if(policy&IP_FW_F_ICMPRPL) + answer = FW_REJECT; + else + answer = FW_BLOCK; + +#ifdef CONFIG_IP_FIREWALL_NETLINK + if((policy&IP_FW_F_PRN) && (answer == FW_REJECT || answer == FW_BLOCK)) + { + struct sk_buff *skb=alloc_skb(128, GFP_ATOMIC); + if(skb) + { + int len=min(128,ntohs(ip->tot_len)); + skb_put(skb,len); + memcpy(skb->data,ip,len); + if(netlink_post(NETLINK_FIREWALL, skb)) + kfree_skb(skb); + } + } +#endif + return answer; + } else + /* we're doing accounting, always ok */ + return 0; +} + + +static void zero_fw_chain(struct ip_fw *chainptr) +{ + struct ip_fw *ctmp=chainptr; + while(ctmp) + { + ctmp->fw_pcnt=0L; + ctmp->fw_bcnt=0L; + ctmp=ctmp->fw_next; + } +} + +static void free_fw_chain(struct ip_fw *volatile* chainptr) +{ + unsigned long flags; + save_flags(flags); + cli(); + while ( *chainptr != NULL ) + { + struct ip_fw *ftmp; + ftmp = *chainptr; + *chainptr = ftmp->fw_next; + kfree_s(ftmp,sizeof(*ftmp)); + } + restore_flags(flags); +} + +/* Volatiles to keep some of the compiler versions amused */ + +static int insert_in_chain(struct ip_fw *volatile* chainptr, struct ip_fw *frwl,int len) +{ + struct ip_fw *ftmp; + unsigned long flags; + + save_flags(flags); + + ftmp = kmalloc( sizeof(struct ip_fw), GFP_ATOMIC ); + if ( ftmp == NULL ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: malloc said no\n"); +#endif + return( ENOMEM ); + } + + memcpy(ftmp, frwl, len); + /* + * Allow the more recent "minimise cost" flag to be + * set. [Rob van Nieuwkerk] + */ + ftmp->fw_tosand |= 0x01; + ftmp->fw_tosxor &= 0xFE; + ftmp->fw_pcnt=0L; + ftmp->fw_bcnt=0L; + + cli(); + + if ((ftmp->fw_vianame)[0]) { + if (!(ftmp->fw_viadev = dev_get_by_name(ftmp->fw_vianame))) + ftmp->fw_viadev = (struct net_device *) -1; + } else + ftmp->fw_viadev = NULL; + + ftmp->fw_next = *chainptr; + *chainptr=ftmp; + restore_flags(flags); + return(0); +} + +static int append_to_chain(struct ip_fw *volatile* chainptr, struct ip_fw *frwl,int len) +{ + struct ip_fw *ftmp; + struct ip_fw *chtmp=NULL; + struct ip_fw *volatile chtmp_prev=NULL; + unsigned long flags; + + save_flags(flags); + + ftmp = kmalloc( sizeof(struct ip_fw), GFP_ATOMIC ); + if ( ftmp == NULL ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: malloc said no\n"); +#endif + return( ENOMEM ); + } + + memcpy(ftmp, frwl, len); + /* + * Allow the more recent "minimise cost" flag to be + * set. [Rob van Nieuwkerk] + */ + ftmp->fw_tosand |= 0x01; + ftmp->fw_tosxor &= 0xFE; + ftmp->fw_pcnt=0L; + ftmp->fw_bcnt=0L; + + ftmp->fw_next = NULL; + + cli(); + + if ((ftmp->fw_vianame)[0]) { + if (!(ftmp->fw_viadev = dev_get_by_name(ftmp->fw_vianame))) + ftmp->fw_viadev = (struct net_device *) -1; + } else + ftmp->fw_viadev = NULL; + + chtmp_prev=NULL; + for (chtmp=*chainptr;chtmp!=NULL;chtmp=chtmp->fw_next) + chtmp_prev=chtmp; + + if (chtmp_prev) + chtmp_prev->fw_next=ftmp; + else + *chainptr=ftmp; + restore_flags(flags); + return(0); +} + +static int del_from_chain(struct ip_fw *volatile*chainptr, struct ip_fw *frwl) +{ + struct ip_fw *ftmp,*ltmp; + unsigned short tport1,tport2,tmpnum; + char matches,was_found; + unsigned long flags; + + save_flags(flags); + cli(); + + ftmp=*chainptr; + + if ( ftmp == NULL ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: chain is empty\n"); +#endif + restore_flags(flags); + return( EINVAL ); + } + + ltmp=NULL; + was_found=0; + + while( !was_found && ftmp != NULL ) + { + matches=1; + if (ftmp->fw_src.s_addr!=frwl->fw_src.s_addr + || ftmp->fw_dst.s_addr!=frwl->fw_dst.s_addr + || ftmp->fw_smsk.s_addr!=frwl->fw_smsk.s_addr + || ftmp->fw_dmsk.s_addr!=frwl->fw_dmsk.s_addr + || ftmp->fw_via.s_addr!=frwl->fw_via.s_addr + || ftmp->fw_flg!=frwl->fw_flg) + matches=0; + + tport1=ftmp->fw_nsp+ftmp->fw_ndp; + tport2=frwl->fw_nsp+frwl->fw_ndp; + if (tport1!=tport2) + matches=0; + else if (tport1!=0) + { + for (tmpnum=0;tmpnum < tport1 && tmpnum < IP_FW_MAX_PORTS;tmpnum++) + if (ftmp->fw_pts[tmpnum]!=frwl->fw_pts[tmpnum]) + matches=0; + } + if (strncmp(ftmp->fw_vianame, frwl->fw_vianame, IFNAMSIZ)) + matches=0; + if(matches) + { + was_found=1; + if (ltmp) + { + ltmp->fw_next=ftmp->fw_next; + kfree_s(ftmp,sizeof(*ftmp)); + ftmp=ltmp->fw_next; + } + else + { + *chainptr=ftmp->fw_next; + kfree_s(ftmp,sizeof(*ftmp)); + ftmp=*chainptr; + } + } + else + { + ltmp = ftmp; + ftmp = ftmp->fw_next; + } + } + restore_flags(flags); + if (was_found) + return 0; + else + return(EINVAL); +} + +#endif /* CONFIG_IP_ACCT || CONFIG_IP_FIREWALL */ + +struct ip_fw *check_ipfw_struct(struct ip_fw *frwl, int len) +{ + + if ( len != sizeof(struct ip_fw) ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: len=%d, want %d\n",len, sizeof(struct ip_fw)); +#endif + return(NULL); + } + + if ( (frwl->fw_flg & ~IP_FW_F_MASK) != 0 ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: undefined flag bits set (flags=%x)\n", + frwl->fw_flg); +#endif + return(NULL); + } + +#ifndef CONFIG_IP_TRANSPARENT_PROXY + if (frwl->fw_flg & IP_FW_F_REDIR) { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: unsupported flag IP_FW_F_REDIR\n"); +#endif + return(NULL); + } +#endif + +#ifndef CONFIG_IP_MASQUERADE + if (frwl->fw_flg & IP_FW_F_MASQ) { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: unsupported flag IP_FW_F_MASQ\n"); +#endif + return(NULL); + } +#endif + + if ( (frwl->fw_flg & IP_FW_F_SRNG) && frwl->fw_nsp < 2 ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: src range set but fw_nsp=%d\n", + frwl->fw_nsp); +#endif + return(NULL); + } + + if ( (frwl->fw_flg & IP_FW_F_DRNG) && frwl->fw_ndp < 2 ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: dst range set but fw_ndp=%d\n", + frwl->fw_ndp); +#endif + return(NULL); + } + + if ( frwl->fw_nsp + frwl->fw_ndp > (frwl->fw_flg & IP_FW_F_REDIR ? IP_FW_MAX_PORTS - 1 : IP_FW_MAX_PORTS) ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: too many ports (%d+%d)\n", + frwl->fw_nsp,frwl->fw_ndp); +#endif + return(NULL); + } + + return frwl; +} + + + + +#ifdef CONFIG_IP_ACCT + +int ip_acct_ctl(int stage, void *m, int len) +{ + if ( stage == IP_ACCT_FLUSH ) + { + free_fw_chain(&ip_acct_chain); + return(0); + } + if ( stage == IP_ACCT_ZERO ) + { + zero_fw_chain(ip_acct_chain); + return(0); + } + if ( stage == IP_ACCT_INSERT || stage == IP_ACCT_APPEND || + stage == IP_ACCT_DELETE ) + { + struct ip_fw *frwl; + + if (!(frwl=check_ipfw_struct(m,len))) + return (EINVAL); + + switch (stage) + { + case IP_ACCT_INSERT: + return( insert_in_chain(&ip_acct_chain,frwl,len)); + case IP_ACCT_APPEND: + return( append_to_chain(&ip_acct_chain,frwl,len)); + case IP_ACCT_DELETE: + return( del_from_chain(&ip_acct_chain,frwl)); + default: + /* + * Should be panic but... (Why ??? - AC) + */ +#ifdef DEBUG_IP_FIREWALL + printk("ip_acct_ctl: unknown request %d\n",stage); +#endif + return(EINVAL); + } + } +#ifdef DEBUG_IP_FIREWALL + printk("ip_acct_ctl: unknown request %d\n",stage); +#endif + return(EINVAL); +} +#endif + +#ifdef CONFIG_IP_FIREWALL +int ip_fw_ctl(int stage, void *m, int len) +{ + int cmd, fwtype; + + cmd = stage & IP_FW_COMMAND; + fwtype = (stage & IP_FW_TYPE) >> IP_FW_SHIFT; + + if ( cmd == IP_FW_FLUSH ) + { + free_fw_chain(chains[fwtype]); + return(0); + } + + if ( cmd == IP_FW_ZERO ) + { + zero_fw_chain(*chains[fwtype]); + return(0); + } + + if ( cmd == IP_FW_POLICY ) + { + int *tmp_policy_ptr; + tmp_policy_ptr=(int *)m; + *policies[fwtype] = *tmp_policy_ptr; + return 0; + } + + if ( cmd == IP_FW_CHECK ) + { + struct net_device *viadev; + struct ip_fwpkt *ipfwp; + struct iphdr *ip; + + if ( len != sizeof(struct ip_fwpkt) ) + { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: length=%d, expected %d\n", + len, sizeof(struct ip_fwpkt)); +#endif + return( EINVAL ); + } + + ipfwp = (struct ip_fwpkt *)m; + ip = &(ipfwp->fwp_iph); + + if ( !(viadev = dev_get_by_name(ipfwp->fwp_vianame)) ) { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: invalid device \"%s\"\n", ipfwp->fwp_vianame); +#endif + return(EINVAL); + } else if ( ip->ihl != sizeof(struct iphdr) / sizeof(int)) { +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: ip->ihl=%d, want %d\n",ip->ihl, + sizeof(struct iphdr)/sizeof(int)); +#endif + return(EINVAL); + } + + switch (ip_fw_chk(ip, viadev, NULL, *chains[fwtype], + *policies[fwtype], IP_FW_MODE_CHK)) + { + case FW_ACCEPT: + return(0); + case FW_REDIRECT: + return(ECONNABORTED); + case FW_MASQUERADE: + return(ECONNRESET); + case FW_REJECT: + return(ECONNREFUSED); + default: /* FW_BLOCK */ + return(ETIMEDOUT); + } + } + + if ( cmd == IP_FW_MASQ_TIMEOUTS ) + return ip_fw_masq_timeouts(m, len); + +/* + * Here we really working hard-adding new elements + * to blocking/forwarding chains or deleting 'em + */ + + if ( cmd == IP_FW_INSERT || cmd == IP_FW_APPEND || cmd == IP_FW_DELETE ) + { + struct ip_fw *frwl; + int fwtype; + + frwl=check_ipfw_struct(m,len); + if (frwl==NULL) + return (EINVAL); + fwtype = (stage & IP_FW_TYPE) >> IP_FW_SHIFT; + + switch (cmd) + { + case IP_FW_INSERT: + return(insert_in_chain(chains[fwtype],frwl,len)); + case IP_FW_APPEND: + return(append_to_chain(chains[fwtype],frwl,len)); + case IP_FW_DELETE: + return(del_from_chain(chains[fwtype],frwl)); + default: + /* + * Should be panic but... (Why are BSD people panic obsessed ??) + */ +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: unknown request %d\n",stage); +#endif + return(EINVAL); + } + } + +#ifdef DEBUG_IP_FIREWALL + printk("ip_fw_ctl: unknown request %d\n",stage); +#endif + return(ENOPROTOOPT); +} +#endif /* CONFIG_IP_FIREWALL */ + +#ifdef CONFIG_PROC_FS +#if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT) + +static int ip_chain_procinfo(int stage, char *buffer, char **start, + off_t offset, int length, int reset) +{ + off_t pos=0, begin=0; + struct ip_fw *i; + unsigned long flags; + int len, p; + int last_len = 0; + + + switch(stage) + { +#ifdef CONFIG_IP_FIREWALL + case IP_FW_IN: + i = ip_fw_in_chain; + len=sprintf(buffer, "IP firewall input rules, default %d\n", + ip_fw_in_policy); + break; + case IP_FW_OUT: + i = ip_fw_out_chain; + len=sprintf(buffer, "IP firewall output rules, default %d\n", + ip_fw_out_policy); + break; + case IP_FW_FWD: + i = ip_fw_fwd_chain; + len=sprintf(buffer, "IP firewall forward rules, default %d\n", + ip_fw_fwd_policy); + break; +#endif +#ifdef CONFIG_IP_ACCT + case IP_FW_ACCT: + i = ip_acct_chain; + len=sprintf(buffer,"IP accounting rules\n"); + break; +#endif + default: + /* this should never be reached, but safety first... */ + i = NULL; + len=0; + break; + } + + save_flags(flags); + cli(); + + while(i!=NULL) + { + len+=sprintf(buffer+len,"%08X/%08X->%08X/%08X %.16s %08X %X ", + ntohl(i->fw_src.s_addr),ntohl(i->fw_smsk.s_addr), + ntohl(i->fw_dst.s_addr),ntohl(i->fw_dmsk.s_addr), + (i->fw_vianame)[0] ? i->fw_vianame : "-", + ntohl(i->fw_via.s_addr), i->fw_flg); + /* 10 is enough for a 32 bit box but the counters are 64bit on + the Alpha and Ultrapenguin */ + len+=sprintf(buffer+len,"%u %u %-20lu %-20lu", + i->fw_nsp,i->fw_ndp, i->fw_pcnt,i->fw_bcnt); + for (p = 0; p < IP_FW_MAX_PORTS; p++) + len+=sprintf(buffer+len, " %u", i->fw_pts[p]); + len+=sprintf(buffer+len, " A%02X X%02X", i->fw_tosand, i->fw_tosxor); + buffer[len++]='\n'; + buffer[len]='\0'; + pos=begin+len; + if(posoffset+length) + { + len = last_len; + break; + } + else if(reset) + { + /* This needs to be done at this specific place! */ + i->fw_pcnt=0L; + i->fw_bcnt=0L; + } + last_len = len; + i=i->fw_next; + } + restore_flags(flags); + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} +#endif + +#ifdef CONFIG_IP_ACCT + +static int ip_acct_procinfo(char *buffer, char **start, off_t offset, + int length +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,29) + , int reset +#endif + ) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,29) + /* FIXME: No more `atomic' read and reset. Wonderful 8-( --RR */ + int reset = 0; +#endif + return ip_chain_procinfo(IP_FW_ACCT, buffer,start, offset,length, + reset); +} + +#endif + +#ifdef CONFIG_IP_FIREWALL + +static int ip_fw_in_procinfo(char *buffer, char **start, off_t offset, + int length +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,29) + , int reset +#endif + ) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,29) + /* FIXME: No more `atomic' read and reset. Wonderful 8-( --RR */ + int reset = 0; +#endif + return ip_chain_procinfo(IP_FW_IN, buffer,start,offset,length, + reset); +} + +static int ip_fw_out_procinfo(char *buffer, char **start, off_t offset, + int length +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,29) + , int reset +#endif + ) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,29) + /* FIXME: No more `atomic' read and reset. Wonderful 8-( --RR */ + int reset = 0; +#endif + return ip_chain_procinfo(IP_FW_OUT, buffer,start,offset,length, + reset); +} + +static int ip_fw_fwd_procinfo(char *buffer, char **start, off_t offset, + int length +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,29) + , int reset +#endif + ) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,29) + /* FIXME: No more `atomic' read and reset. Wonderful 8-( --RR */ + int reset = 0; +#endif + return ip_chain_procinfo(IP_FW_FWD, buffer,start,offset,length, + reset); +} +#endif +#endif + + +#ifdef CONFIG_IP_FIREWALL +/* + * Interface to the generic firewall chains. + */ + +int ipfw_input_check(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb) +{ + return ip_fw_chk(phdr, dev, arg, ip_fw_in_chain, ip_fw_in_policy, + IP_FW_MODE_FW); +} + +int ipfw_output_check(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb) +{ + return ip_fw_chk(phdr, dev, arg, ip_fw_out_chain, ip_fw_out_policy, + IP_FW_MODE_FW); +} + +int ipfw_forward_check(struct firewall_ops *this, int pf, + struct net_device *dev, void *phdr, void *arg, + struct sk_buff **pskb) +{ + return ip_fw_chk(phdr, dev, arg, ip_fw_fwd_chain, ip_fw_fwd_policy, + IP_FW_MODE_FW); +} + +#ifdef CONFIG_IP_ACCT +int ipfw_acct_in(struct firewall_ops *this, int pf, struct net_device *dev, + void *phdr, void *arg, struct sk_buff **pskb) +{ + return ip_fw_chk(phdr,dev,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_IN); +} + +int ipfw_acct_out(struct firewall_ops *this, int pf, struct net_device *dev, + void *phdr, void *arg, struct sk_buff **pskb) +{ + return ip_fw_chk(phdr,dev,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_OUT); +} +#endif + +struct firewall_ops ipfw_ops= +{ + NULL, + ipfw_forward_check, + ipfw_input_check, + ipfw_output_check, +#ifdef CONFIG_IP_ACCT + ipfw_acct_in, + ipfw_acct_out +#else + NULL, + NULL +#endif +}; + +#endif + +#if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL) + +int ipfw_device_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct net_device *dev=ptr; + char *devname = dev->name; + unsigned long flags; + struct ip_fw *fw; + int chn; + + save_flags(flags); + cli(); + + if (event == NETDEV_UP) { + for (chn = 0; chn < IP_FW_CHAINS; chn++) + for (fw = *chains[chn]; fw; fw = fw->fw_next) + if ((fw->fw_vianame)[0] && !strncmp(devname, + fw->fw_vianame, IFNAMSIZ)) + fw->fw_viadev = dev; + } else if (event == NETDEV_DOWN) { + for (chn = 0; chn < IP_FW_CHAINS; chn++) + for (fw = *chains[chn]; fw; fw = fw->fw_next) + /* we could compare just the pointers ... */ + if ((fw->fw_vianame)[0] && !strncmp(devname, + fw->fw_vianame, IFNAMSIZ)) + fw->fw_viadev = (struct net_device*)-1; + } + + restore_flags(flags); + return NOTIFY_DONE; +} + +static struct notifier_block ipfw_dev_notifier={ + ipfw_device_event, + NULL, + 0 +}; + +#endif + +int ipfw_init_or_cleanup(int init) +{ + int ret = 0; + + if (!init) + goto cleanup; + + ret = register_firewall(PF_INET, &ipfw_ops); + if (ret < 0) + goto cleanup_nothing; + +#ifdef CONFIG_IP_ACCT + proc_net_create("ip_acct", S_IFREG | S_IRUGO | S_IWUSR, ip_acct_procinfo); +#endif + proc_net_create("ip_input", S_IFREG | S_IRUGO | S_IWUSR, ip_fw_in_procinfo); + proc_net_create("ip_output", S_IFREG | S_IRUGO | S_IWUSR, ip_fw_out_procinfo); + proc_net_create("ip_forward", S_IFREG | S_IRUGO | S_IWUSR, ip_fw_fwd_procinfo); + + /* Register for device up/down reports */ + register_netdevice_notifier(&ipfw_dev_notifier); + +#ifdef CONFIG_IP_FIREWALL_NETLINK + ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL); +#endif + return ret; + + cleanup: +#ifdef CONFIG_IP_FIREWALL_NETLINK + sock_release(ipfwsk->socket); +#endif + unregister_netdevice_notifier(&ipfw_dev_notifier); + +#ifdef CONFIG_IP_ACCT + proc_net_remove("ip_acct"); +#endif + proc_net_remove("ip_input"); + proc_net_remove("ip_output"); + proc_net_remove("ip_forward"); + + free_fw_chain(chains[IP_FW_FWD]); + free_fw_chain(chains[IP_FW_IN]); + free_fw_chain(chains[IP_FW_OUT]); + free_fw_chain(chains[IP_FW_ACCT]); + + unregister_firewall(PF_INET, &ipfw_ops); + + cleanup_nothing: + return ret; +} diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c new file mode 100644 index 000000000000..b5585201d954 --- /dev/null +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -0,0 +1,369 @@ +/* + * This is a module which is used for logging packets. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct in_device; +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +struct esphdr { + __u32 spi; +}; /* FIXME evil kludge */ + +/* Make init and cleanup non-static, so gcc doesn't warn about unused, + but don't export the symbols */ +EXPORT_NO_SYMBOLS; + +/* Use lock to serialize, so printks don't overlap */ +static spinlock_t log_lock = SPIN_LOCK_UNLOCKED; + +/* One level of recursion won't kill us */ +static void dump_packet(const struct ipt_log_info *info, + struct iphdr *iph, unsigned int len, int recurse) +{ + void *protoh = (u_int32_t *)iph + iph->ihl; + unsigned int datalen = len - iph->ihl * 4; + + /* Important fields: + * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ + /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ + printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ", + (ntohl(iph->saddr)>>24)&0xFF, + (ntohl(iph->saddr)>>16)&0xFF, + (ntohl(iph->saddr)>>8)&0xFF, + (ntohl(iph->saddr))&0xFF, + (ntohl(iph->daddr)>>24)&0xFF, + (ntohl(iph->daddr)>>16)&0xFF, + (ntohl(iph->daddr)>>8)&0xFF, + (ntohl(iph->daddr))&0xFF); + + /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ + printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", + ntohs(iph->tot_len), iph->tos & IPTOS_TOS_MASK, + iph->tos & IPTOS_PREC_MASK, iph->ttl, ntohs(iph->id)); + + /* Max length: 6 "CE DF MF " */ + if (ntohs(iph->frag_off) & IP_CE) + printk("CE "); + if (ntohs(iph->frag_off) & IP_DF) + printk("DF "); + if (ntohs(iph->frag_off) & IP_MF) + printk("MF "); + + /* Max length: 11 "FRAG:65535 " */ + if (ntohs(iph->frag_off) & IP_OFFSET) + printk("FRAG:%u ", ntohs(iph->frag_off) & IP_OFFSET); + + if ((info->logflags & IPT_LOG_IPOPT) + && iph->ihl * 4 != sizeof(struct iphdr)) { + unsigned int i; + + /* Max length: 127 "OPT (" 15*4*2chars ") " */ + printk("OPT ("); + for (i = sizeof(struct iphdr); i < iph->ihl * 4; i++) + printk("%02X", ((u_int8_t *)iph)[i]); + printk(") "); + } + + switch (iph->protocol) { + case IPPROTO_TCP: { + struct tcphdr *tcph = protoh; + + /* Max length: 10 "PROTO=TCP " */ + printk("PROTO=TCP "); + + if (ntohs(iph->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + if (datalen < sizeof (*tcph)) { + printk("INCOMPLETE [%u bytes] ", datalen); + break; + } + + /* Max length: 20 "SPT=65535 DPT=65535 " */ + printk("SPT=%u DPT=%u ", + ntohs(tcph->source), ntohs(tcph->dest)); + /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ + if (info->logflags & IPT_LOG_TCPSEQ) + printk("SEQ=%u ACK=%u ", + ntohl(tcph->seq), ntohl(tcph->ack_seq)); + /* Max length: 13 "WINDOW=65535 " */ + printk("WINDOW=%u ", ntohs(tcph->window)); + /* Max length: 9 "RES=0x3F " */ + printk("RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(tcph) & TCP_RESERVED_BITS) >> 22)); + /* Max length: 36 "URG ACK PSH RST SYN FIN " */ + if (tcph->urg) + printk("URG "); + if (tcph->ack) + printk("ACK "); + if (tcph->psh) + printk("PSH "); + if (tcph->rst) + printk("RST "); + if (tcph->syn) + printk("SYN "); + if (tcph->fin) + printk("FIN "); + /* Max length: 11 "URGP=65535 " */ + printk("URGP=%u ", ntohs(tcph->urg_ptr)); + + if ((info->logflags & IPT_LOG_TCPOPT) + && tcph->doff * 4 != sizeof(struct tcphdr)) { + unsigned int i; + + /* Max length: 127 "OPT (" 15*4*2chars ") " */ + printk("OPT ("); + for (i =sizeof(struct tcphdr); i < tcph->doff * 4; i++) + printk("%02X", ((u_int8_t *)tcph)[i]); + printk(") "); + } + break; + } + case IPPROTO_UDP: { + struct udphdr *udph = protoh; + + /* Max length: 10 "PROTO=UDP " */ + printk("PROTO=UDP "); + + if (ntohs(iph->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + if (datalen < sizeof (*udph)) { + printk("INCOMPLETE [%u bytes] ", datalen); + break; + } + + /* Max length: 20 "SPT=65535 DPT=65535 " */ + printk("SPT=%u DPT=%u LEN=%u ", + ntohs(udph->source), ntohs(udph->dest), + ntohs(udph->len)); + break; + } + case IPPROTO_ICMP: { + struct icmphdr *icmph = protoh; + static size_t required_len[NR_ICMP_TYPES+1] + = { [ICMP_ECHOREPLY] = 4, + [ICMP_DEST_UNREACH] + = 8 + sizeof(struct iphdr) + 8, + [ICMP_SOURCE_QUENCH] + = 8 + sizeof(struct iphdr) + 8, + [ICMP_REDIRECT] + = 8 + sizeof(struct iphdr) + 8, + [ICMP_ECHO] = 4, + [ICMP_TIME_EXCEEDED] + = 8 + sizeof(struct iphdr) + 8, + [ICMP_PARAMETERPROB] + = 8 + sizeof(struct iphdr) + 8, + [ICMP_TIMESTAMP] = 20, + [ICMP_TIMESTAMPREPLY] = 20, + [ICMP_ADDRESS] = 12, + [ICMP_ADDRESSREPLY] = 12 }; + + /* Max length: 11 "PROTO=ICMP " */ + printk("PROTO=ICMP "); + + if (ntohs(iph->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + if (datalen < 4) { + printk("INCOMPLETE [%u bytes] ", datalen); + break; + } + + /* Max length: 18 "TYPE=255 CODE=255 " */ + printk("TYPE=%u CODE=%u ", icmph->type, icmph->code); + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + if (icmph->type <= NR_ICMP_TYPES + && required_len[icmph->type] + && datalen < required_len[icmph->type]) { + printk("INCOMPLETE [%u bytes] ", datalen); + break; + } + + switch (icmph->type) { + case ICMP_ECHOREPLY: + case ICMP_ECHO: + /* Max length: 19 "ID=65535 SEQ=65535 " */ + printk("ID=%u SEQ=%u ", + ntohs(icmph->un.echo.id), + ntohs(icmph->un.echo.sequence)); + break; + + case ICMP_PARAMETERPROB: + /* Max length: 14 "PARAMETER=255 " */ + printk("PARAMETER=%u ", + ntohl(icmph->un.gateway) >> 24); + break; + case ICMP_REDIRECT: + /* Max length: 24 "GATEWAY=255.255.255.255 " */ + printk("GATEWAY=%u.%u.%u.%u ", + (ntohl(icmph->un.gateway)>>24)&0xFF, + (ntohl(icmph->un.gateway)>>16)&0xFF, + (ntohl(icmph->un.gateway)>>8)&0xFF, + (ntohl(icmph->un.gateway))&0xFF); + /* Fall through */ + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_TIME_EXCEEDED: + /* Max length: 3+maxlen */ + if (recurse) { + printk("["); + dump_packet(info, + (struct iphdr *)(icmph + 1), + datalen-sizeof(struct iphdr), + 0); + printk("] "); + } + + /* Max length: 10 "MTU=65535 " */ + if (icmph->type == ICMP_DEST_UNREACH + && icmph->code == ICMP_FRAG_NEEDED) + printk("MTU=%u ", ntohs(icmph->un.frag.mtu)); + } + break; + } + /* Max Length */ + case IPPROTO_AH: + case IPPROTO_ESP: { + struct esphdr *esph = protoh; + int esp= (iph->protocol==IPPROTO_ESP); + + /* Max length: 10 "PROTO=ESP " */ + printk("PROTO=%s ",esp? "ESP" : "AH"); + + if (ntohs(iph->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + if (datalen < sizeof (*esph)) { + printk("INCOMPLETE [%u bytes] ", datalen); + break; + } + + /* Length: 15 "SPI=0xF1234567 " */ + printk("SPI=0x%x ", ntohl(esph->spi) ); + break; + } + /* Max length: 10 "PROTO 255 " */ + default: + printk("PROTO=%u ", iph->protocol); + } + + /* Proto Max log string length */ + /* IP: 40+46+6+11+127 = 230 */ + /* TCP: 10+max(25,20+30+13+9+36+11+127) = 256 */ + /* UDP: 10+max(25,20) = 35 */ + /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ + /* ESP: 10+max(25)+15 = 50 */ + /* AH: 9+max(25)+15 = 49 */ + /* unknown: 10 */ + + /* (ICMP allows recursion one level deep) */ + /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ + /* maxlen = 230+ 91 + 230 + 256 = 807 */ +} + +static unsigned int +ipt_log_target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + struct iphdr *iph = (*pskb)->nh.iph; + const struct ipt_log_info *loginfo = targinfo; + char level_string[4] = "< >"; + + level_string[1] = '0' + (loginfo->level % 8); + spin_lock_bh(&log_lock); + printk(level_string); + printk("%sIN=%s OUT=%s ", + loginfo->prefix, + in ? in->name : "", + out ? out->name : ""); + if (in && !out) { + /* MAC logging for input chain only. */ + printk("MAC="); + if ((*pskb)->dev && (*pskb)->dev->hard_header_len) { + int i; + unsigned char *p = (*pskb)->mac.raw; + for (i = 0; i < (*pskb)->dev->hard_header_len; i++,p++) + printk("%02x%c", *p, + i==(*pskb)->dev->hard_header_len - 1 + ? ' ':':'); + } + } + + dump_packet(loginfo, iph, (*pskb)->len, 1); + printk("\n"); + spin_unlock_bh(&log_lock); + + return IPT_CONTINUE; +} + +static int ipt_log_checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const struct ipt_log_info *loginfo = targinfo; + + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_log_info))) { + DEBUGP("LOG: targinfosize %u != %u\n", + targinfosize, IPT_ALIGN(sizeof(struct ipt_log_info))); + return 0; + } + + if (loginfo->level >= 8) { + DEBUGP("LOG: level %u >= 8\n", loginfo->level); + return 0; + } + + if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { + DEBUGP("LOG: prefix term %i\n", + loginfo->prefix[sizeof(loginfo->prefix)-1]); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_log_reg += { { NULL, NULL }, "LOG", ipt_log_target, ipt_log_checkentry, THIS_MODULE }; + +static int __init init(void) +{ + if (ipt_register_target(&ipt_log_reg)) + return -EINVAL; + + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_log_reg); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c new file mode 100644 index 000000000000..32906eefe2ab --- /dev/null +++ b/net/ipv4/netfilter/ipt_MARK.c @@ -0,0 +1,68 @@ +/* This is a module which is used for setting the NFMARK field of an skb. */ +#include +#include +#include +#include + +#include +#include + +EXPORT_NO_SYMBOLS; + +static unsigned int +target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + const struct ipt_mark_target_info *markinfo = targinfo; + + if((*pskb)->nfmark != markinfo->mark) { + (*pskb)->nfmark = markinfo->mark; + (*pskb)->nfcache |= NFC_ALTERED; + } + return IPT_CONTINUE; +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) { + printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_mark_target_info))); + return 0; + } + + if (strcmp(tablename, "mangle") != 0) { + printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_mark_reg += { { NULL, NULL }, "MARK", target, checkentry, THIS_MODULE }; + +static int __init init(void) +{ + if (ipt_register_target(&ipt_mark_reg)) + return -EINVAL; + + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_mark_reg); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c new file mode 100644 index 000000000000..9f94f8f443aa --- /dev/null +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -0,0 +1,171 @@ +/* Masquerade. Simple mapping which alters range to a local IP address + (depending on route). */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +EXPORT_NO_SYMBOLS; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/* Lock protects masq region inside conntrack */ +static DECLARE_RWLOCK(masq_lock); + +/* FIXME: Multiple targets. --RR */ +static int +masquerade_check(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const struct ip_nat_multi_range *mr = targinfo; + + if (targinfosize != IPT_ALIGN(sizeof(*mr))) { + DEBUGP("masquerade_check: size %u != %u.\n", + targinfosize, sizeof(*mr)); + return 0; + } + if (hook_mask & ~(1 << NF_IP_POST_ROUTING)) { + DEBUGP("masquerade_check: bad hooks %x.\n", hook_mask); + return 0; + } + if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { + DEBUGP("masquerade_check: bad MAP_IPS.\n"); + return 0; + } + if (mr->rangesize != 1) { + DEBUGP("masquerade_check: bad rangesize %u.\n", mr->rangesize); + return 0; + } + return 1; +} + +static unsigned int +masquerade_target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + const struct ip_nat_range *r; + struct ip_nat_multi_range newrange; + u_int32_t newsrc; + struct rtable *rt; + + IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING); + + /* FIXME: For the moment, don't do local packets, breaks + testsuite for 2.3.49 --RR */ + if ((*pskb)->sk) + return NF_ACCEPT; + + ct = ip_conntrack_get(*pskb, &ctinfo); + IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW + || ctinfo == IP_CT_RELATED)); + + r = targinfo; + + if (ip_route_output(&rt, (*pskb)->nh.iph->daddr, + 0, + RT_TOS((*pskb)->nh.iph->tos)|RTO_CONN, + out->ifindex) != 0) { + /* Shouldn't happen */ + printk("MASQUERADE: No route: Rusty's brain broke!\n"); + return NF_DROP; + } + + newsrc = rt->rt_src; + DEBUGP("newsrc = %u.%u.%u.%u\n", IP_PARTS(newsrc)); + ip_rt_put(rt); + + WRITE_LOCK(&masq_lock); + ct->nat.masq_index = out->ifindex; + WRITE_UNLOCK(&masq_lock); + + /* Transfer from original range. */ + newrange = ((struct ip_nat_multi_range) + { 1, { { r->flags | IP_NAT_RANGE_MAP_IPS, + newsrc, newsrc, + r->min, r->max } } }); + + /* Hand modified range to generic setup. */ + return ip_nat_setup_info(ct, &newrange, hooknum); +} + +static inline int +device_cmp(const struct ip_conntrack *i, void *ifindex) +{ + int ret; + + READ_LOCK(&masq_lock); + ret = (i->nat.masq_index == (int)(long)ifindex); + READ_UNLOCK(&masq_lock); + + return ret; +} + +int masq_device_event(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + + if (event == NETDEV_DOWN) { + /* Device was downed. Search entire table for + conntracks which were associated with that device, + and forget them. */ + IP_NF_ASSERT(dev->ifindex != 0); + + ip_ct_selective_cleanup(device_cmp, (void *)(long)dev->ifindex); + } + + return NOTIFY_DONE; +} + +static struct notifier_block masq_dev_notifier = { + masq_device_event, + NULL, + 0 +}; + +static struct ipt_target masquerade += { { NULL, NULL }, "MASQUERADE", masquerade_target, masquerade_check, + THIS_MODULE }; + +static int __init init(void) +{ + int ret; + + ret = ipt_register_target(&masquerade); + + if (ret == 0) { + /* Register for device down reports */ + register_netdevice_notifier(&masq_dev_notifier); + } + + return ret; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&masquerade); + unregister_netdevice_notifier(&masq_dev_notifier); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_MIRROR.c b/net/ipv4/netfilter/ipt_MIRROR.c new file mode 100644 index 000000000000..9dec181c1eab --- /dev/null +++ b/net/ipv4/netfilter/ipt_MIRROR.c @@ -0,0 +1,131 @@ +/* + This is a module which is used for resending packets with inverted src and dst. + + Based on code from: ip_nat_dumb.c,v 1.9 1999/08/20 + and various sources. + + Copyright (C) 2000 Emmanuel Roger + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ +#include +#include +#include +#include +#include +#include +#include +struct in_device; +#include +EXPORT_NO_SYMBOLS; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static int route_mirror(struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + struct rtable *rt; + + if (ip_route_output(&rt, iph->daddr, iph->saddr, + RT_TOS(iph->tos) | RTO_CONN, + 0)) { + return -EINVAL; + } + /* check if the interface we are living by is the same as the one we arrived on */ + + if (skb->rx_dev != rt->u.dst.dev) { + /* Drop old route. */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + return 0; + } + else return -EINVAL; +} + +static int +ip_rewrite(struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + u32 odaddr = iph->saddr; + u32 osaddr = iph->daddr; + + skb->nfcache |= NFC_ALTERED; + + /* Rewrite IP header */ + iph->daddr = odaddr; + iph->saddr = osaddr; + + return 0; +} + + +static unsigned int ipt_mirror_target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + if ((*pskb)->dst != NULL) { + if (!ip_rewrite(*pskb) && !route_mirror(*pskb)) { + ip_send(*pskb); + return NF_STOLEN; + } + } + return NF_DROP; +} + +static int ipt_mirror_checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + /* Only on INPUT, FORWARD or PRE_ROUTING, otherwise loop danger. */ + if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) + | (1 << NF_IP_FORWARD) + | (1 << NF_IP_LOCAL_IN))) { + DEBUGP("MIRROR: bad hook\n"); + return 0; + } + + if (targinfosize != IPT_ALIGN(0)) { + DEBUGP("MIRROR: targinfosize %u != 0\n", targinfosize); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_mirror_reg += { { NULL, NULL }, "MIRROR", ipt_mirror_target, ipt_mirror_checkentry, + THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_target(&ipt_mirror_reg); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_mirror_reg); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c new file mode 100644 index 000000000000..690d3a8a1a4f --- /dev/null +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -0,0 +1,104 @@ +/* Redirect. Simple mapping which alters dst to a local IP address. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +EXPORT_NO_SYMBOLS; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/* FIXME: Take multiple ranges --RR */ +static int +redirect_check(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const struct ip_nat_multi_range *mr = targinfo; + + if (targinfosize != IPT_ALIGN(sizeof(*mr))) { + DEBUGP("redirect_check: size %u.\n", targinfosize); + return 0; + } + if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))) { + DEBUGP("redirect_check: bad hooks %x.\n", hook_mask); + return 0; + } + if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { + DEBUGP("redirect_check: bad MAP_IPS.\n"); + return 0; + } + if (mr->rangesize != 1) { + DEBUGP("redirect_check: bad rangesize %u.\n", mr->rangesize); + return 0; + } + return 1; +} + +static unsigned int +redirect_target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + u_int32_t newdst; + const struct ip_nat_range *r = targinfo; + struct ip_nat_multi_range newrange; + + IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING + || hooknum == NF_IP_LOCAL_OUT); + + ct = ip_conntrack_get(*pskb, &ctinfo); + IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + + /* Local packets: make them go to loopback */ + if (hooknum == NF_IP_LOCAL_OUT) + newdst = htonl(0x7F000001); + else + /* Grab first address on interface. */ + newdst = (((struct in_device *)(*pskb)->dev->ip_ptr) + ->ifa_list->ifa_local); + + /* Transfer from original range. */ + newrange = ((struct ip_nat_multi_range) + { 1, { { r->flags | IP_NAT_RANGE_MAP_IPS, + newdst, newdst, + r->min, r->max } } }); + + /* Hand modified range to generic setup. */ + return ip_nat_setup_info(ct, &newrange, hooknum); +} + +static struct ipt_target redirect_reg += { { NULL, NULL }, "REDIRECT", redirect_target, redirect_check, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_target(&redirect_reg); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&redirect_reg); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c new file mode 100644 index 000000000000..b183e822cd45 --- /dev/null +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -0,0 +1,145 @@ +/* + * This is a module which is used for rejecting packets. + * Added support for customized reject packets (Jozsef Kadlecsik). + */ +#include +#include +#include +#include +#include +struct in_device; +#include +#include +#include +EXPORT_NO_SYMBOLS; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static unsigned int reject(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + const struct ipt_reject_info *reject = targinfo; + + switch (reject->with) { + case IPT_ICMP_NET_UNREACHABLE: + icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0); + break; + case IPT_ICMP_HOST_UNREACHABLE: + icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + break; + case IPT_ICMP_PROT_UNREACHABLE: + icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); + break; + case IPT_ICMP_PORT_UNREACHABLE: + icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + break; + case IPT_ICMP_ECHOREPLY: { + struct icmphdr *icmph = (struct icmphdr *) + ((u_int32_t *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl); + unsigned int datalen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; + + /* Not non-head frags, or truncated */ + if (((ntohs((*pskb)->nh.iph->frag_off) & IP_OFFSET) == 0) + && datalen >= 4) { + /* Usually I don't like cut & pasting code, + but dammit, my party is starting in 45 + mins! --RR */ + struct icmp_bxm icmp_param; + + icmp_param.icmph=*icmph; + icmp_param.icmph.type=ICMP_ECHOREPLY; + icmp_param.data_ptr=(icmph+1); + icmp_param.data_len=datalen; + icmp_reply(&icmp_param, *pskb); + } + } + break; + case IPT_TCP_RESET: + tcp_v4_send_reset(*pskb); + break; + } + + return NF_DROP; +} + +static inline int find_ping_match(const struct ipt_entry_match *m) +{ + const struct ipt_icmp *icmpinfo = (const struct ipt_icmp *)m->data; + + if (strcmp(m->u.match->name, "icmp") == 0 + && icmpinfo->type == ICMP_ECHO + && !(icmpinfo->invflags & IPT_ICMP_INV)) + return 1; + + return 0; +} + +static int check(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const struct ipt_reject_info *rejinfo = targinfo; + + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_icmp))) { + DEBUGP("REJECT: targinfosize %u != 0\n", targinfosize); + return 0; + } + + /* Only allow these for packet filtering. */ + if ((hook_mask & ~((1 << NF_IP_LOCAL_IN) + | (1 << NF_IP_FORWARD) + | (1 << NF_IP_LOCAL_OUT))) != 0) { + DEBUGP("REJECT: bad hook mask %X\n", hook_mask); + return 0; + } + + if (rejinfo->with == IPT_ICMP_ECHOREPLY) { + /* Must specify that it's an ICMP ping packet. */ + if (e->ip.proto != IPPROTO_ICMP + || (e->ip.invflags & IPT_INV_PROTO)) { + DEBUGP("REJECT: ECHOREPLY illegal for non-icmp\n"); + return 0; + } + /* Must contain ICMP match. */ + if (IPT_MATCH_ITERATE(e, find_ping_match) == 0) { + DEBUGP("REJECT: ECHOREPLY illegal for non-ping\n"); + return 0; + } + } else if (rejinfo->with == IPT_TCP_RESET) { + if (e->ip.proto != IPPROTO_TCP + || (e->ip.invflags & IPT_INV_PROTO)) { + DEBUGP("REJECT: TCP_RESET illegal for non-tcp\n"); + return 0; + } + } + + return 1; +} + +static struct ipt_target ipt_reject_reg += { { NULL, NULL }, "REJECT", reject, check, THIS_MODULE }; + +static int __init init(void) +{ + if (ipt_register_target(&ipt_reject_reg)) + return -EINVAL; + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_reject_reg); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c new file mode 100644 index 000000000000..fbfb4974fe69 --- /dev/null +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -0,0 +1,87 @@ +/* This is a module which is used for setting the TOS field of a packet. */ +#include +#include +#include +#include + +#include +#include + +EXPORT_NO_SYMBOLS; + +static unsigned int +target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + struct iphdr *iph = (*pskb)->nh.iph; + const struct ipt_tos_target_info *tosinfo = targinfo; + + if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { + u_int8_t diffs[2]; + + diffs[0] = iph->tos; + iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; + diffs[1] = iph->tos; + iph->check = csum_fold(csum_partial((char *)diffs, + sizeof(diffs), + iph->check^0xFFFF)); + (*pskb)->nfcache |= NFC_ALTERED; + } + return IPT_CONTINUE; +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos; + + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tos_target_info))) { + printk(KERN_WARNING "TOS: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_tos_target_info))); + return 0; + } + + if (strcmp(tablename, "mangle") != 0) { + printk(KERN_WARNING "TOS: can only be called from \"mangle\" table, not \"%s\"\n", tablename); + return 0; + } + + if (tos != IPTOS_LOWDELAY + && tos != IPTOS_THROUGHPUT + && tos != IPTOS_RELIABILITY + && tos != IPTOS_MINCOST + && tos != IPTOS_NORMALSVC) { + printk(KERN_WARNING "TOS: bad tos value %#x\n", tos); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_tos_reg += { { NULL, NULL }, "TOS", target, checkentry, THIS_MODULE }; + +static int __init init(void) +{ + if (ipt_register_target(&ipt_tos_reg)) + return -EINVAL; + + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_tos_reg); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_limit.c b/net/ipv4/netfilter/ipt_limit.c new file mode 100644 index 000000000000..3785ba371ecb --- /dev/null +++ b/net/ipv4/netfilter/ipt_limit.c @@ -0,0 +1,144 @@ +/* Kernel module to control the rate + * + * Jérôme de Vivie + * Hervé Eychenne + * + * 2 September 1999: Changed from the target RATE to the match + * `limit', removed logging. Did I mention that + * Alexey is a fucking genius? + * Rusty Russell (rusty@rustcorp.com.au). */ +#include +#include +#include +#include + +#include +#include +EXPORT_NO_SYMBOLS; + +#define IP_PARTS_NATIVE(n) \ +(unsigned int)((n)>>24)&0xFF, \ +(unsigned int)((n)>>16)&0xFF, \ +(unsigned int)((n)>>8)&0xFF, \ +(unsigned int)((n)&0xFF) + +#define IP_PARTS(n) IP_PARTS_NATIVE(ntohl(n)) + +/* The algorithm used is the Simple Token Bucket Filter (TBF) + * see net/sched/sch_tbf.c in the linux source tree + */ + +static spinlock_t limit_lock = SPIN_LOCK_UNLOCKED; + +/* Rusty: This is my (non-mathematically-inclined) understanding of + this algorithm. The `average rate' in jiffies becomes your initial + amount of credit `credit' and the most credit you can ever have + `credit_cap'. The `peak rate' becomes the cost of passing the + test, `cost'. + + `prev' tracks the last packet hit: you gain one credit per jiffy. + If you get credit balance more than this, the extra credit is + discarded. Every time the match passes, you lose `cost' credits; + if you don't have that many, the test fails. + + See Alexey's formal explanation in net/sched/sch_tbf.c. + + To avoid underflow, we multiply by 128 (ie. you get 128 credits per + jiffy). Hence a cost of 2^32-1, means one pass per 32768 seconds + at 1024HZ (or one every 9 hours). A cost of 1 means 12800 passes + per second at 100HZ. */ + +#define CREDITS_PER_JIFFY 128 + +static int +ipt_limit_match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + struct ipt_rateinfo *r = ((struct ipt_rateinfo *)matchinfo)->master; + unsigned long now = jiffies; + + spin_lock_bh(&limit_lock); + r->credit += (now - xchg(&r->prev, now)) * CREDITS_PER_JIFFY; + if (r->credit > r->credit_cap) + r->credit = r->credit_cap; + + if (r->credit >= r->cost) { + /* We're not limited. */ + r->credit -= r->cost; + spin_unlock_bh(&limit_lock); + return 1; + } + + spin_unlock_bh(&limit_lock); + return 0; +} + +/* Precision saver. */ +static u_int32_t +user2credits(u_int32_t user) +{ + /* If multiplying would overflow... */ + if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) + /* Divide first. */ + return (user / IPT_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY; + + return (user * HZ * CREDITS_PER_JIFFY) / IPT_LIMIT_SCALE; +} + +static int +ipt_limit_checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + struct ipt_rateinfo *r = matchinfo; + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_rateinfo))) + return 0; + + /* Check for overflow. */ + if (r->burst == 0 + || user2credits(r->avg * r->burst) < user2credits(r->avg)) { + printk("Call rusty: overflow in ipt_limit: %u/%u\n", + r->avg, r->burst); + return 0; + } + + /* User avg in seconds * IPT_LIMIT_SCALE: convert to jiffies * + 128. */ + r->prev = jiffies; + r->credit = user2credits(r->avg * r->burst); /* Credits full. */ + r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ + r->cost = user2credits(r->avg); + + /* For SMP, we only want to use one set of counters. */ + r->master = r; + + return 1; +} + +static struct ipt_match ipt_limit_reg += { { NULL, NULL }, "limit", ipt_limit_match, ipt_limit_checkentry, + THIS_MODULE }; + +static int __init init(void) +{ + if (ipt_register_match(&ipt_limit_reg)) + return -EINVAL; + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_match(&ipt_limit_reg); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_mac.c b/net/ipv4/netfilter/ipt_mac.c new file mode 100644 index 000000000000..90dbec59d221 --- /dev/null +++ b/net/ipv4/netfilter/ipt_mac.c @@ -0,0 +1,63 @@ +/* Kernel module to match MAC address parameters. */ +#include +#include +#include + +#include +#include +EXPORT_NO_SYMBOLS; + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_mac_info *info = matchinfo; + + /* Is mac pointer valid? */ + return (skb->mac.raw >= skb->head + && skb->mac.raw < skb->head + skb->len - ETH_HLEN + /* If so, compare... */ + && ((memcmp(skb->mac.ethernet->h_source, info->srcaddr, ETH_ALEN) + == 0) ^ info->invert)); +} + +static int +ipt_mac_checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (hook_mask + & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN))) { + printk("ipt_mac: only valid for PRE_ROUTING or LOCAL_IN.\n"); + return 0; + } + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_mac_info))) + return 0; + + return 1; +} + +static struct ipt_match mac_match += { { NULL, NULL }, "mac", &match, &ipt_mac_checkentry, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_match(&mac_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&mac_match); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/ipv4/netfilter/ipt_mark.c new file mode 100644 index 000000000000..0d828fd208d3 --- /dev/null +++ b/net/ipv4/netfilter/ipt_mark.c @@ -0,0 +1,52 @@ +/* Kernel module to match NFMARK values. */ +#include +#include + +#include +#include + +EXPORT_NO_SYMBOLS; + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_mark_info *info = matchinfo; + + return ((skb->nfmark & info->mask) == info->mark) ^ info->invert; +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (matchsize != IPT_ALIGN(sizeof(struct ipt_mark_info))) + return 0; + + return 1; +} + +static struct ipt_match mark_match += { { NULL, NULL }, "mark", &match, &checkentry, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_match(&mark_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&mark_match); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c new file mode 100644 index 000000000000..08cc4a968e6e --- /dev/null +++ b/net/ipv4/netfilter/ipt_multiport.c @@ -0,0 +1,102 @@ +/* Kernel module to match one of a list of TCP/UDP ports: ports are in + the same place so we can treat them as equal. */ +#include +#include +#include +#include + +#include +#include + +#if 0 +#define duprintf(format, args...) printk(format , ## args) +#else +#define duprintf(format, args...) +#endif + +EXPORT_NO_SYMBOLS; + +/* Returns 1 if the port is matched by the test, 0 otherwise. */ +static inline int +ports_match(const u_int16_t *portlist, enum ipt_multiport_flags flags, + u_int8_t count, u_int16_t src, u_int16_t dst) +{ + unsigned int i; + for (i=0; iports, + multiinfo->flags, multiinfo->count, + ntohs(udp->source), ntohs(udp->dest)); +} + +/* Called when user tries to insert an entry of this type. */ +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + const struct ipt_multiport *multiinfo = matchinfo; + + /* Must specify proto == TCP/UDP, no unknown flags or bad count */ + return (ip->proto == IPPROTO_TCP || ip->proto == IPPROTO_UDP) + && !(ip->flags & IPT_INV_PROTO) + && matchsize == IPT_ALIGN(sizeof(struct ipt_multiport)) + && (multiinfo->flags == IPT_MULTIPORT_SOURCE + || multiinfo->flags == IPT_MULTIPORT_DESTINATION + || multiinfo->flags == IPT_MULTIPORT_EITHER) + && multiinfo->count <= IPT_MULTI_PORTS; +} + +static struct ipt_match multiport_match += { { NULL, NULL }, "multiport", &match, &checkentry, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_match(&multiport_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&multiport_match); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c new file mode 100644 index 000000000000..5438571d3d35 --- /dev/null +++ b/net/ipv4/netfilter/ipt_owner.c @@ -0,0 +1,136 @@ +/* Kernel module to match various things tied to sockets associated with + locally generated outgoing packets. + + (C)2000 Marc Boucher + */ +#include +#include +#include +#include + +#include +#include + +EXPORT_NO_SYMBOLS; + +static int +match_pid(const struct sk_buff *skb, pid_t pid) +{ + struct task_struct *p; + int i; + + read_lock(&tasklist_lock); + p = find_task_by_pid(pid); + if(p && p->files) { + for (i=0; i < p->files->max_fds; i++) { + if (fcheck_task(p, i) == skb->sk->socket->file) { + read_unlock(&tasklist_lock); + return 1; + } + } + } + read_unlock(&tasklist_lock); + return 0; +} + +static int +match_sid(const struct sk_buff *skb, pid_t sid) +{ + struct task_struct *p; + int i, found=0; + + read_lock(&tasklist_lock); + for_each_task(p) { + if ((p->session != sid) || !p->files) + continue; + + for (i=0; i < p->files->max_fds; i++) { + if (fcheck_task(p, i) == skb->sk->socket->file) { + found = 1; + break; + } + } + if(found) + break; + } + read_unlock(&tasklist_lock); + + return found; +} + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_owner_info *info = matchinfo; + + if (!skb->sk || !skb->sk->socket || !skb->sk->socket->file) + return 0; + + if(info->match & IPT_OWNER_UID) { + if((skb->sk->socket->file->f_uid != info->uid) ^ + !!(info->invert & IPT_OWNER_UID)) + return 0; + } + + if(info->match & IPT_OWNER_GID) { + if((skb->sk->socket->file->f_gid != info->gid) ^ + !!(info->invert & IPT_OWNER_GID)) + return 0; + } + + if(info->match & IPT_OWNER_PID) { + if (!match_pid(skb, info->pid) ^ + !!(info->invert & IPT_OWNER_PID)) + return 0; + } + + if(info->match & IPT_OWNER_SID) { + if (!match_sid(skb, info->sid) ^ + !!(info->invert & IPT_OWNER_SID)) + return 0; + } + + return 1; +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (hook_mask + & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))) { + printk("ipt_owner: only valid for LOCAL_OUT or POST_ROUTING.\n"); + return 0; + } + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_owner_info))) + return 0; + + return 1; +} + +static struct ipt_match owner_match += { { NULL, NULL }, "owner", &match, &checkentry, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_match(&owner_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&owner_match); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_state.c b/net/ipv4/netfilter/ipt_state.c new file mode 100644 index 000000000000..1baa54d62f71 --- /dev/null +++ b/net/ipv4/netfilter/ipt_state.c @@ -0,0 +1,61 @@ +/* Kernel module to match connection tracking information. + * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au). + */ +#include +#include +#include +#include +#include +EXPORT_NO_SYMBOLS; + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_state_info *sinfo = matchinfo; + enum ip_conntrack_info ctinfo; + unsigned int statebit; + + if (!ip_conntrack_get((struct sk_buff *)skb, &ctinfo)) + statebit = IPT_STATE_INVALID; + else + statebit = IPT_STATE_BIT(ctinfo); + + return (sinfo->statemask & statebit); +} + +static int check(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (matchsize != IPT_ALIGN(sizeof(struct ipt_state_info))) + return 0; + + return 1; +} + +static struct ipt_match state_match += { { NULL, NULL }, "state", &match, &check, THIS_MODULE }; + +static int __init init(void) +{ + __MOD_INC_USE_COUNT(ip_conntrack_module); + return ipt_register_match(&state_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&state_match); + __MOD_DEC_USE_COUNT(ip_conntrack_module); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c new file mode 100644 index 000000000000..6da72b2d8eda --- /dev/null +++ b/net/ipv4/netfilter/ipt_tos.c @@ -0,0 +1,53 @@ +/* Kernel module to match TOS values. */ +#include +#include + +#include +#include + +EXPORT_NO_SYMBOLS; + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_tos_info *info = matchinfo; + const struct iphdr *iph = skb->nh.iph; + + return (iph->tos == info->tos) ^ info->invert; +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (matchsize != IPT_ALIGN(sizeof(struct ipt_tos_info))) + return 0; + + return 1; +} + +static struct ipt_match tos_match += { { NULL, NULL }, "tos", &match, &checkentry, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_match(&tos_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&tos_match); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_unclean.c b/net/ipv4/netfilter/ipt_unclean.c new file mode 100644 index 000000000000..056224a8785a --- /dev/null +++ b/net/ipv4/netfilter/ipt_unclean.c @@ -0,0 +1,576 @@ +/* Kernel module to match suspect packets. */ +#include +#include +#include +#include +#include +#include +#include + +#include + +EXPORT_NO_SYMBOLS; + +#define limpk(format, args...) \ +do { \ + if (net_ratelimit()) \ + printk("ipt_unclean: %s" format, \ + embedded ? "(embedded packet) " : "" , ## args); \ +} while(0) + +enum icmp_error_status +{ + ICMP_MAY_BE_ERROR, + ICMP_IS_ERROR, + ICMP_NOT_ERROR +}; + +struct icmp_info +{ + size_t min_len, max_len; + enum icmp_error_status err; + u_int8_t min_code, max_code; +}; + +static int +check_ip(struct iphdr *iph, size_t length, int embedded); + +/* ICMP-specific checks. */ +static int +check_icmp(const struct icmphdr *icmph, + u_int16_t datalen, + unsigned int offset, + int more_frags, + int embedded) +{ + static struct icmp_info info[] + = { [ICMP_ECHOREPLY] + = { 8, 65536, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_DEST_UNREACH] + = { 8 + 28, 65536, ICMP_IS_ERROR, 0, 15 }, + [ICMP_SOURCE_QUENCH] + = { 8 + 28, 65536, ICMP_IS_ERROR, 0, 0 }, + [ICMP_REDIRECT] + = { 8 + 28, 65536, ICMP_IS_ERROR, 0, 3 }, + [ICMP_ECHO] + = { 8, 65536, ICMP_NOT_ERROR, 0, 0 }, + /* Router advertisement. */ + [9] + = { 8, 8 + 255 * 8, ICMP_NOT_ERROR, 0, 0 }, + /* Router solicitation. */ + [10] + = { 8, 8, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_TIME_EXCEEDED] + = { 8 + 28, 65536, ICMP_IS_ERROR, 0, 1 }, + [ICMP_PARAMETERPROB] + = { 8 + 28, 65536, ICMP_IS_ERROR, 0, 1 }, + [ICMP_TIMESTAMP] + = { 20, 20, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_TIMESTAMPREPLY] + = { 20, 20, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_INFO_REQUEST] + = { 8, 65536, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_INFO_REPLY] + = { 8, 65536, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_ADDRESS] + = { 12, 12, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_ADDRESSREPLY] + = { 12, 12, ICMP_NOT_ERROR, 0, 0 } }; + + /* Can't do anything if it's a fragment. */ + if (!offset) + return 1; + + /* Must cover type and code. */ + if (datalen < 2) { + limpk("ICMP len=%u too short\n", datalen); + return 0; + } + + /* If not embedded. */ + if (!embedded) { + /* Bad checksum? Don't print, just drop. */ + if (!more_frags + && ip_compute_csum((unsigned char *) icmph, datalen) != 0) + return 0; + + /* CHECK: Truncated ICMP (even if first fragment). */ + if (icmph->type < sizeof(info)/sizeof(struct icmp_info) + && info[icmph->type].min_len != 0 + && datalen < info[icmph->type].min_len) { + limpk("ICMP type %u len %u too short\n", + icmph->type, datalen); + return 0; + } + + /* CHECK: Check within known error ICMPs. */ + if (icmph->type < sizeof(info)/sizeof(struct icmp_info) + && info[icmph->type].err == ICMP_IS_ERROR) { + /* CHECK: Embedded packet must be at least + length of iph + 8 bytes. */ + struct iphdr *inner = (void *)icmph + 8; + + if (datalen - 8 < sizeof(struct iphdr)) { + limpk("ICMP error internal way too short\n"); + return 0; + } + if (datalen - 8 < inner->ihl*4 + 8) { + limpk("ICMP error internal too short\n"); + return 0; + } + if (!check_ip(inner, datalen - 8, 1)) + return 0; + } + } else { + /* CHECK: Can't embed ICMP unless known non-error. */ + if (icmph->type >= sizeof(info)/sizeof(struct icmp_info) + || info[icmph->type].err != ICMP_NOT_ERROR) { + limpk("ICMP type %u not embeddable\n", + icmph->type); + return 0; + } + } + + /* CHECK: Invalid ICMP codes. */ + if (icmph->type < sizeof(info)/sizeof(struct icmp_info) + && (icmph->code < info[icmph->type].min_code + || icmph->code > info[icmph->type].max_code)) { + limpk("ICMP type=%u code=%u\n", + icmph->type, icmph->code); + return 0; + } + + /* CHECK: Above maximum length. */ + if (icmph->type < sizeof(info)/sizeof(struct icmp_info) + && info[icmph->type].max_len != 0 + && datalen > info[icmph->type].max_len) { + limpk("ICMP type=%u too long: %u bytes\n", + icmph->type, datalen); + return 0; + } + + switch (icmph->type) { + case ICMP_PARAMETERPROB: { + /* CHECK: Problem param must be within error packet's + * IP header. */ + struct iphdr *iph = (void *)icmph + 8; + u_int32_t arg = ntohl(icmph->un.gateway); + + if (icmph->code == 0) { + if ((arg >> 24) >= iph->ihl*4) { + limpk("ICMP PARAMETERPROB ptr = %u\n", + ntohl(icmph->un.gateway) >> 24); + return 0; + } + arg &= 0x00FFFFFF; + } + + /* CHECK: Rest must be zero. */ + if (arg) { + limpk("ICMP PARAMETERPROB nonzero arg = %u\n", + arg); + return 0; + } + break; + } + + case ICMP_TIME_EXCEEDED: + case ICMP_SOURCE_QUENCH: + /* CHECK: Unused must be zero. */ + if (icmph->un.gateway != 0) { + limpk("ICMP type=%u unused = %u\n", + icmph->type, ntohl(icmph->un.gateway)); + return 0; + } + break; + } + + return 1; +} + +/* UDP-specific checks. */ +static int +check_udp(const struct iphdr *iph, + const struct udphdr *udph, + u_int16_t datalen, + unsigned int offset, + int more_frags, + int embedded) +{ + /* Can't do anything if it's a fragment. */ + if (!offset) + return 1; + + /* CHECK: Must cover UDP header. */ + if (datalen < sizeof(struct udphdr)) { + limpk("UDP len=%u too short\n", datalen); + return 0; + } + + /* Bad checksum? Don't print, just drop. */ + /* FIXME: SRC ROUTE packets won't match checksum --RR */ + if (!more_frags && !embedded + && csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, IPPROTO_UDP, + csum_partial((char *)udph, datalen, 0)) != 0) + return 0; + + /* CHECK: Ports can't be zero. */ + if (!udph->source || !udph->dest) { + limpk("UDP zero ports %u/%u\n", + ntohs(udph->source), ntohs(udph->dest)); + return 0; + } + + if (!more_frags) { + if (!embedded) { + /* CHECK: UDP length must match. */ + if (ntohs(udph->len) != datalen) { + limpk("UDP len too short %u vs %u\n", + ntohs(udph->len), datalen); + return 0; + } + } else { + /* CHECK: UDP length be >= this truncated pkt. */ + if (ntohs(udph->len) < datalen) { + limpk("UDP len too long %u vs %u\n", + ntohs(udph->len), datalen); + return 0; + } + } + } else { + /* CHECK: UDP length must be > this frag's length. */ + if (ntohs(udph->len) <= datalen) { + limpk("UDP fragment len too short %u vs %u\n", + ntohs(udph->len), datalen); + return 0; + } + } + + return 1; +} + +#define TH_FIN 0x01 +#define TH_SYN 0x02 +#define TH_RST 0x04 +#define TH_PUSH 0x08 +#define TH_ACK 0x10 +#define TH_URG 0x20 + +/* TCP-specific checks. */ +static int +check_tcp(const struct iphdr *iph, + const struct tcphdr *tcph, + u_int16_t datalen, + unsigned int offset, + int more_frags, + int embedded) +{ + u_int8_t *opt = (u_int8_t *)(tcph + 1); + u_int8_t tcpflags; + int end_of_options = 0; + size_t i; + + /* CHECK: Can't have offset=1: used to override TCP syn-checks. */ + /* In fact, this is caught below (offset < 516). */ + + /* Can't do anything if it's a fragment. */ + if (!offset) + return 1; + + /* CHECK: Smaller than minimal TCP hdr. */ + if (datalen < sizeof(struct tcphdr)) { + if (!embedded) { + limpk("Packet length %u < TCP header.\n", datalen); + return 0; + } + /* Must have ports available (datalen >= 8). */ + /* CHECK: TCP ports inside ICMP error */ + if (!tcph->source || !tcph->dest) { + limpk("Zero TCP ports %u/%u.\n", + htons(tcph->source), htons(tcph->dest)); + return 0; + } + return 1; + } + + /* CHECK: Smaller than actual TCP hdr. */ + if (datalen < tcph->doff * 4) { + if (!embedded) { + limpk("Packet length %u < actual TCP header.\n", + datalen); + return 0; + } else + return 1; + } + + /* Bad checksum? Don't print, just drop. */ + /* FIXME: SRC ROUTE packets won't match checksum --RR */ + if (!more_frags && !embedded + && csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, IPPROTO_TCP, + csum_partial((char *)tcph, datalen, 0)) != 0) + return 0; + + /* CHECK: TCP ports non-zero */ + if (!tcph->source || !tcph->dest) { + limpk("Zero TCP ports %u/%u.\n", + htons(tcph->source), htons(tcph->dest)); + return 0; + } + + /* CHECK: TCP reserved bits zero. */ + if(tcp_flag_word(tcph) & TCP_RESERVED_BITS) { + limpk("TCP reserved bits not zero\n"); + return 0; + } + + /* CHECK: TCP flags. */ + tcpflags = ((u_int8_t *)tcph)[13]; + if (tcpflags != TH_SYN + && tcpflags != (TH_SYN|TH_ACK) + && tcpflags != (TH_RST|TH_ACK) + && tcpflags != (TH_RST|TH_ACK|TH_PUSH) + && tcpflags != (TH_FIN|TH_ACK) + && tcpflags != TH_ACK + && tcpflags != (TH_ACK|TH_PUSH) + && tcpflags != (TH_ACK|TH_URG) + && tcpflags != (TH_ACK|TH_URG|TH_PUSH) + && tcpflags != (TH_FIN|TH_ACK|TH_PUSH) + && tcpflags != (TH_FIN|TH_ACK|TH_URG) + && tcpflags != (TH_FIN|TH_ACK|TH_URG|TH_PUSH)) { + limpk("TCP flags bad: %u\n", tcpflags); + return 0; + } + + for (i = sizeof(struct tcphdr); i < tcph->doff * 4; ) { + switch (opt[i]) { + case 0: + end_of_options = 1; + i++; + break; + case 1: + i++; + break; + default: + /* CHECK: options after EOO. */ + if (end_of_options) { + limpk("TCP option %u after end\n", + opt[i]); + return 0; + } + /* CHECK: options at tail. */ + else if (i+1 >= tcph->doff * 4) { + limpk("TCP option %u at tail\n", + opt[i]); + return 0; + } + /* CHECK: zero-length options. */ + else if (opt[i+1] == 0) { + limpk("TCP option %u 0 len\n", + opt[i]); + return 0; + } + /* CHECK: oversize options. */ + else if (opt[i+1] + i >= tcph->doff * 4) { + limpk("TCP option %u at %Zu too long\n", + (unsigned int) opt[i], i); + return 0; + } + } + } + + return 1; +} + +/* Returns 1 if ok */ +/* Standard IP checks. */ +static int +check_ip(struct iphdr *iph, size_t length, int embedded) +{ + u_int8_t *opt = (u_int8_t *)(iph + 1); + int end_of_options = 0; + void *protoh; + size_t datalen; + unsigned int i; + unsigned int offset; + + /* Should only happen for local outgoing raw-socket packets. */ + /* CHECK: length >= ip header. */ + if (length < sizeof(struct iphdr) || length < iph->ihl * 4) { + limpk("Packet length %Zu < IP header.\n", length); + return 0; + } + + offset = ntohs(iph->frag_off) & IP_OFFSET; + protoh = (void *)iph + iph->ihl * 4; + datalen = length - iph->ihl * 4; + + /* CHECK: Embedded fragment. */ + if (embedded && offset) { + limpk("Embedded fragment.\n"); + return 0; + } + + for (i = sizeof(struct iphdr); i < iph->ihl * 4; ) { + switch (opt[i]) { + case 0: + end_of_options = 1; + i++; + break; + case 1: + i++; + break; + default: + /* CHECK: options after EOO. */ + if (end_of_options) { + limpk("IP option %u after end\n", + opt[i]); + return 0; + } + /* CHECK: options at tail. */ + else if (i+1 >= iph->ihl * 4) { + limpk("IP option %u at tail\n", + opt[i]); + return 0; + } + /* CHECK: zero-length options. */ + else if (opt[i+1] == 0) { + limpk("IP option %u 0 len\n", + opt[i]); + return 0; + } + /* CHECK: oversize options. */ + else if (opt[i+1] + i >= iph->ihl * 4) { + limpk("IP option %u at %u too long\n", + opt[i], i); + return 0; + } + } + } + + /* Fragment checks. */ + + /* CHECK: More fragments, but doesn't fill 8-byte boundary. */ + if ((ntohs(iph->frag_off) & IP_MF) + && (ntohs(iph->tot_len) % 8) != 0) { + limpk("Truncated fragment %u long.\n", ntohs(iph->tot_len)); + return 0; + } + + /* CHECK: Oversize fragment a-la Ping of Death. */ + if (offset * 8 + datalen > 65535) { + limpk("Oversize fragment to %u.\n", offset * 8); + return 0; + } + + /* CHECK: DF set and offset or MF set. */ + if ((ntohs(iph->frag_off) & IP_DF) + && (offset || (ntohs(iph->frag_off) & IP_MF))) { + limpk("DF set and offset=%u, MF=%u.\n", + offset, ntohs(iph->frag_off) & IP_MF); + return 0; + } + + /* CHECK: Zero-sized fragments. */ + if ((offset || (ntohs(iph->frag_off) & IP_MF)) + && datalen == 0) { + limpk("Zero size fragment offset=%u\n", offset); + return 0; + } + + /* Note: we can have even middle fragments smaller than this: + consider a large packet passing through a 600MTU then + 576MTU link: this gives a fragment of 24 data bytes. But + everyone packs fragments largest first, hence a fragment + can't START before 576 - MAX_IP_HEADER_LEN. */ + + /* Used to be min-size 576: I recall Alan Cox saying ax25 goes + down to 128 (576 taken from RFC 791: All hosts must be + prepared to accept datagrams of up to 576 octets). Use 128 + here. */ +#define MIN_LIKELY_MTU 128 + /* CHECK: Min size of first frag = 128. */ + if ((ntohs(iph->frag_off) & IP_MF) + && offset == 0 + && ntohs(iph->tot_len) < MIN_LIKELY_MTU) { + limpk("First fragment size %u < %u\n", ntohs(iph->tot_len), + MIN_LIKELY_MTU); + return 0; + } + + /* CHECK: Min offset of frag = 128 - 60 (max IP hdr len). */ + if (offset && offset * 8 < MIN_LIKELY_MTU - 60) { + limpk("Fragment starts at %u < %u\n", offset * 8, + MIN_LIKELY_MTU-60); + return 0; + } + + /* CHECK: Protocol specification non-zero. */ + if (iph->protocol == 0) { + limpk("Zero protocol\n"); + return 0; + } + + /* Per-protocol checks. */ + switch (iph->protocol) { + case IPPROTO_ICMP: + return check_icmp(protoh, datalen, offset, + (ntohs(iph->frag_off) & IP_MF), + embedded); + + case IPPROTO_UDP: + return check_udp(iph, protoh, datalen, offset, + (ntohs(iph->frag_off) & IP_MF), + embedded); + + case IPPROTO_TCP: + return check_tcp(iph, protoh, datalen, offset, + (ntohs(iph->frag_off) & IP_MF), + embedded); + default: + /* Ignorance is bliss. */ + return 1; + } +} + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + return !check_ip(skb->nh.iph, skb->len, 0); +} + +/* Called when user tries to insert an entry of this type. */ +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (matchsize != IPT_ALIGN(0)) + return 0; + + return 1; +} + +static struct ipt_match unclean_match += { { NULL, NULL }, "unclean", &match, &checkentry, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_match(&unclean_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&unclean_match); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c new file mode 100644 index 000000000000..b7172e8561bd --- /dev/null +++ b/net/ipv4/netfilter/iptable_filter.c @@ -0,0 +1,182 @@ +/* + * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x. + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + */ +#include +#include +#include + +#define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT)) + +/* Standard entry. */ +struct ipt_standard +{ + struct ipt_entry entry; + struct ipt_standard_target target; +}; + +struct ipt_error_target +{ + struct ipt_entry_target target; + char errorname[IPT_FUNCTION_MAXNAMELEN]; +}; + +struct ipt_error +{ + struct ipt_entry entry; + struct ipt_error_target target; +}; + +static struct +{ + struct ipt_replace repl; + struct ipt_standard entries[3]; + struct ipt_error term; +} initial_table __initdata += { { "filter", FILTER_VALID_HOOKS, 4, + sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), + { [NF_IP_LOCAL_IN] 0, + [NF_IP_FORWARD] sizeof(struct ipt_standard), + [NF_IP_LOCAL_OUT] sizeof(struct ipt_standard) * 2 }, + { [NF_IP_LOCAL_IN] 0, + [NF_IP_FORWARD] sizeof(struct ipt_standard), + [NF_IP_LOCAL_OUT] sizeof(struct ipt_standard) * 2 }, + 0, NULL, { } }, + { + /* LOCAL_IN */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_standard), + 0, { 0, 0 }, { } }, + { { sizeof(struct ipt_standard_target), { "" }, { } }, + -NF_ACCEPT - 1 } }, + /* FORWARD */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_standard), + 0, { 0, 0 }, { } }, + { { sizeof(struct ipt_standard_target), { "" }, { } }, + -NF_ACCEPT - 1 } }, + /* LOCAL_OUT */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_standard), + 0, { 0, 0 }, { } }, + { { sizeof(struct ipt_standard_target), { "" }, { } }, + -NF_ACCEPT - 1 } } + }, + /* ERROR */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_error), + 0, { 0, 0 }, { } }, + { { sizeof(struct ipt_error_target), { IPT_ERROR_TARGET }, + { } }, + "ERROR" + } + } +}; + +static struct ipt_table packet_filter += { { NULL, NULL }, "filter", &initial_table.repl, + FILTER_VALID_HOOKS, RW_LOCK_UNLOCKED, NULL }; + +/* The work comes in here from netfilter.c. */ +static unsigned int +ipt_hook(unsigned int hook, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); +} + +static unsigned int +ipt_local_out_hook(unsigned int hook, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct iphdr) + || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { + if (net_ratelimit()) + printk("ipt_hook: happy cracking.\n"); + return NF_ACCEPT; + } + + return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); +} + +static struct nf_hook_ops ipt_ops[] += { { { NULL, NULL }, ipt_hook, PF_INET, NF_IP_LOCAL_IN, NF_IP_PRI_FILTER }, + { { NULL, NULL }, ipt_hook, PF_INET, NF_IP_FORWARD, NF_IP_PRI_FILTER }, + { { NULL, NULL }, ipt_local_out_hook, PF_INET, NF_IP_LOCAL_OUT, + NF_IP_PRI_FILTER } +}; + +/* Default to no forward for security reasons. */ +static int forward = NF_DROP; +MODULE_PARM(forward, "i"); + +static int __init init(void) +{ + int ret; + + if (forward < 0 || forward > NF_MAX_VERDICT) { + printk("iptables forward must be 0 or 1\n"); + return -EINVAL; + } + + /* Entry 1 is the FORWARD hook */ + initial_table.entries[1].target.verdict = -forward - 1; + + /* Register table */ + ret = ipt_register_table(&packet_filter); + if (ret < 0) + return ret; + + /* Register hooks */ + ret = nf_register_hook(&ipt_ops[0]); + if (ret < 0) + goto cleanup_table; + + ret = nf_register_hook(&ipt_ops[1]); + if (ret < 0) + goto cleanup_hook0; + + ret = nf_register_hook(&ipt_ops[2]); + if (ret < 0) + goto cleanup_hook1; + + return ret; + + cleanup_hook1: + nf_unregister_hook(&ipt_ops[1]); + cleanup_hook0: + nf_unregister_hook(&ipt_ops[0]); + cleanup_table: + ipt_unregister_table(&packet_filter); + + return ret; +} + +static void __exit fini(void) +{ + unsigned int i; + + for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++) + nf_unregister_hook(&ipt_ops[i]); + + ipt_unregister_table(&packet_filter); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c new file mode 100644 index 000000000000..be07c1eb020a --- /dev/null +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -0,0 +1,153 @@ +/* + * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x. + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + */ +#include +#include +#include + +#define MANGLE_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT)) + +/* Standard entry. */ +struct ipt_standard +{ + struct ipt_entry entry; + struct ipt_standard_target target; +}; + +struct ipt_error_target +{ + struct ipt_entry_target target; + char errorname[IPT_FUNCTION_MAXNAMELEN]; +}; + +struct ipt_error +{ + struct ipt_entry entry; + struct ipt_error_target target; +}; + +static struct +{ + struct ipt_replace repl; + struct ipt_standard entries[2]; + struct ipt_error term; +} initial_table __initdata += { { "mangle", MANGLE_VALID_HOOKS, 3, + sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error), + { [NF_IP_PRE_ROUTING] 0, + [NF_IP_LOCAL_OUT] sizeof(struct ipt_standard) }, + { [NF_IP_PRE_ROUTING] 0, + [NF_IP_LOCAL_OUT] sizeof(struct ipt_standard) }, + 0, NULL, { } }, + { + /* PRE_ROUTING */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_standard), + 0, { 0, 0 }, { } }, + { { sizeof(struct ipt_standard_target), { "" }, { } }, + -NF_ACCEPT - 1 } }, + /* LOCAL_OUT */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_standard), + 0, { 0, 0 }, { } }, + { { sizeof(struct ipt_standard_target), { "" }, { } }, + -NF_ACCEPT - 1 } } + }, + /* ERROR */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_error), + 0, { 0, 0 }, { } }, + { { sizeof(struct ipt_error_target), { IPT_ERROR_TARGET }, + { } }, + "ERROR" + } + } +}; + +static struct ipt_table packet_mangler += { { NULL, NULL }, "mangle", &initial_table.repl, + MANGLE_VALID_HOOKS, RW_LOCK_UNLOCKED, NULL }; + +/* The work comes in here from netfilter.c. */ +static unsigned int +ipt_hook(unsigned int hook, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); +} + +static unsigned int +ipt_local_out_hook(unsigned int hook, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct iphdr) + || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { + if (net_ratelimit()) + printk("ipt_hook: happy cracking.\n"); + return NF_ACCEPT; + } + + return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); +} + +static struct nf_hook_ops ipt_ops[] += { { { NULL, NULL }, ipt_hook, PF_INET, NF_IP_PRE_ROUTING, NF_IP_PRI_MANGLE }, + { { NULL, NULL }, ipt_local_out_hook, PF_INET, NF_IP_LOCAL_OUT, + NF_IP_PRI_MANGLE } +}; + +static int __init init(void) +{ + int ret; + + /* Register table */ + ret = ipt_register_table(&packet_mangler); + if (ret < 0) + return ret; + + /* Register hooks */ + ret = nf_register_hook(&ipt_ops[0]); + if (ret < 0) + goto cleanup_table; + + ret = nf_register_hook(&ipt_ops[1]); + if (ret < 0) + goto cleanup_hook0; + + return ret; + + cleanup_hook0: + nf_unregister_hook(&ipt_ops[0]); + cleanup_table: + ipt_unregister_table(&packet_mangler); + + return ret; +} + +static void __exit fini(void) +{ + unsigned int i; + + for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++) + nf_unregister_hook(&ipt_ops[i]); + + ipt_unregister_table(&packet_mangler); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4e649eded39f..c683f2f234fc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -5,7 +5,7 @@ * * ROUTE - implementation of the IP router. * - * Version: $Id: route.c,v 1.81 2000/02/09 11:16:42 davem Exp $ + * Version: $Id: route.c,v 1.82 2000/03/17 14:41:52 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -1187,10 +1187,7 @@ ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr, rth->rt_dst = daddr; rth->key.tos = tos; #ifdef CONFIG_IP_ROUTE_FWMARK - if (skb->nfreason == NF_REASON_FOR_ROUTING) - rth->key.fwmark = skb->nfmark; - else - rth->key.fwmark = 0; + rth->key.fwmark = skb->nfmark; #endif rth->key.src = saddr; rth->rt_src = saddr; @@ -1269,10 +1266,7 @@ int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, key.src = saddr; key.tos = tos; #ifdef CONFIG_IP_ROUTE_FWMARK - if (skb->nfreason == NF_REASON_FOR_ROUTING) - key.fwmark = skb->nfmark; - else - key.fwmark = 0; + key.fwmark = skb->nfmark; #endif key.iif = dev->ifindex; key.oif = 0; @@ -1395,10 +1389,7 @@ int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, rth->rt_dst = daddr; rth->key.tos = tos; #ifdef CONFIG_IP_ROUTE_FWMARK - if (skb->nfreason == NF_REASON_FOR_ROUTING) - rth->key.fwmark = skb->nfmark; - else - rth->key.fwmark = 0; + rth->key.fwmark = skb->nfmark; #endif rth->key.src = saddr; rth->rt_src = saddr; @@ -1473,10 +1464,7 @@ local_input: rth->rt_dst = daddr; rth->key.tos = tos; #ifdef CONFIG_IP_ROUTE_FWMARK - if (skb->nfreason == NF_REASON_FOR_ROUTING) - rth->key.fwmark = skb->nfmark; - else - rth->key.fwmark = 0; + rth->key.fwmark = skb->nfmark; #endif rth->key.src = saddr; rth->rt_src = saddr; @@ -1563,9 +1551,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, rth->key.iif == iif && rth->key.oif == 0 && #ifdef CONFIG_IP_ROUTE_FWMARK - rth->key.fwmark - == (skb->nfreason == NF_REASON_FOR_ROUTING - ? skb->nfmark : 0) && + rth->key.fwmark == skb->nfmark && #endif rth->key.tos == tos) { rth->u.dst.lastuse = jiffies; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 204f2557457f..1edee9f51bb8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.201 2000/03/08 19:36:42 davem Exp $ + * Version: $Id: tcp_ipv4.c,v 1.202 2000/03/17 14:41:53 davem Exp $ * * IPv4 specific functions * @@ -72,8 +72,6 @@ extern int sysctl_ip_dynaddr; struct inode tcp_inode; struct socket *tcp_socket=&tcp_inode.u.socket_i; -static void tcp_v4_send_reset(struct sk_buff *skb); - void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb); @@ -1059,7 +1057,7 @@ void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, * Exception: precedence violation. We do not implement it in any case. */ -static void tcp_v4_send_reset(struct sk_buff *skb) +void tcp_v4_send_reset(struct sk_buff *skb) { struct tcphdr *th = skb->h.th; struct tcphdr rth; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 818ad66caa0c..45d20856e77f 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -6,7 +6,7 @@ * Pedro Roque * Alexey Kuznetsov * - * $Id: sit.c,v 1.35 2000/01/06 00:42:08 davem Exp $ + * $Id: sit.c,v 1.36 2000/03/17 14:42:08 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -388,6 +388,10 @@ int ipip6_rcv(struct sk_buff *skb, unsigned short len) skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif netif_rx(skb); read_unlock(&ipip6_lock); return 0; @@ -547,6 +551,11 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) ip_select_ident(iph, &rt->u.dst); ip_send_check(iph); +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#endif + stats->tx_bytes += skb->len; stats->tx_packets++; ip_send(skb); diff --git a/net/netsyms.c b/net/netsyms.c index 48cd5b503653..c6745cafe673 100644 --- a/net/netsyms.c +++ b/net/netsyms.c @@ -229,6 +229,7 @@ EXPORT_SYMBOL(inet_del_protocol); EXPORT_SYMBOL(ip_route_output); EXPORT_SYMBOL(ip_route_input); EXPORT_SYMBOL(icmp_send); +EXPORT_SYMBOL(icmp_reply); EXPORT_SYMBOL(ip_options_compile); EXPORT_SYMBOL(ip_options_undo); EXPORT_SYMBOL(arp_send); @@ -339,6 +340,7 @@ EXPORT_SYMBOL(tcp_sendmsg); EXPORT_SYMBOL(tcp_v4_rebuild_header); EXPORT_SYMBOL(tcp_v4_send_check); EXPORT_SYMBOL(tcp_v4_conn_request); +EXPORT_SYMBOL(tcp_v4_send_reset); EXPORT_SYMBOL(tcp_create_openreq_child); EXPORT_SYMBOL(tcp_bucket_create); EXPORT_SYMBOL(__tcp_put_port); diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 143d6e361f2c..31dedf1eaae7 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -66,7 +66,7 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, struct fw_head *head = (struct fw_head*)tp->root; struct fw_filter *f; #ifdef CONFIG_NETFILTER - u32 id = (skb->nfreason == NF_REASON_FOR_CLS_FW ? skb->nfmark : 0); + u32 id = skb->nfmark; #else u32 id = 0; #endif diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index ddc738fccaa1..947aede01905 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -224,14 +224,14 @@ used on the egress (might slow things for an iota) return fwres; } -/* after iptables */ +/* after ipt_filter */ static struct nf_hook_ops ing_ops = { { NULL, NULL}, ing_hook, PF_INET, NF_IP_PRE_ROUTING, - 1 + NF_IP_PRI_FILTER + 1 }; int ingress_init(struct Qdisc *sch,struct rtattr *opt) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 15a16c161319..cbe730b5d339 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -8,7 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Version: $Id: af_unix.c,v 1.89 2000/02/27 19:52:50 davem Exp $ + * Version: $Id: af_unix.c,v 1.90 2000/03/16 20:38:45 davem Exp $ * * Fixes: * Linus Torvalds : Assorted bug cures. -- 2.39.5